module BioCMerger

Public Instance Methods

adjust_annotation_offset(obj) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 83
def adjust_annotation_offset(obj)
  obj.annotations.each do |a|
    positions = find_all_locations(obj, a.text)
    a.locations.each do |l|
      l.offset = choose_offset_candidate(l.offset, positions)
    end
  end
end
adjust_relation_refid(obj, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 102
def adjust_relation_refid(obj, id_map) 
  obj.relations.each do |r|
    next if r.original.nil?
    r.nodes.each do |n|
      new_id = id_map[n.refid]
      n.refid = new_id unless new_id.nil?
      n.adjust_ref
    end
  end
end
adjust_relation_refids(doc, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 92
def adjust_relation_refids(doc, id_map)
  adjust_relation_refid(doc, id_map)
  doc.passages.each do |p|
    adjust_relation_refid(p, id_map)
    p.sentences.each do |s|
      adjust_relation_refid(s, id_map)
    end
  end
end
blank?(text) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 172
def blank?(text)
  return text.nil? || text.empty?
end
choose_id(doc, id, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 151
def choose_id(doc, id, id_map) 
  new_id = id
  node = doc.find_node(new_id)

  until node.nil? do
    new_id = new_id + "_c"
    node = doc.find_node(new_id)
  end

  if new_id != id
    id_map[id] = new_id
  end
  return new_id
end
choose_offset_candidate(offset, positions) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 196
def choose_offset_candidate(offset, positions)
  min_diff = 99999
  ret = offset
  offset = offset.to_i
  positions.each do |p|
    diff = (offset - p).abs
    if diff < min_diff
      offset = p 
      min_diff = diff
    end
  end
  return ret
end
copy_annotation(doc, dest, annotation, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 137
def copy_annotation(doc, dest, annotation, id_map)
  new_a = SimpleBioC::Annotation.new(dest)
  new_a.id = choose_id(doc, annotation.id, id_map)
  new_a.text = annotation.text
  new_a.locations = []
  annotation.locations.each do |l|
    new_l = SimpleBioC::Location.new(new_a)
    new_l.offset = l.offset
    new_l.length = l.length 
    new_a.locations << new_l
  end
  dest.annotations << new_a
end
copy_annotations(doc, dest, src, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 119
def copy_annotations(doc, dest, src, id_map)
  src.annotations.each do |a|
    copy_annotation(doc, dest, a, id_map)
  end
end
copy_infons(dest, src, warnings) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 176
def copy_infons(dest, src, warnings)
  src.infons.each do |k, v|
    if dest.infons[k].nil?
      dest.infons[k] = v
    elsif dest.infons[k] != v
      warnings << "Failed to copy <#{k}:#{v}>. the key is already exist in infons."
    end
  end
end
copy_relation(doc, dest, relation, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 124
def copy_relation(doc, dest, relation, id_map)
  new_r = SimpleBioC::Relation.new(dest)
  new_r.id = choose_id(doc, relation.id, id_map)
  relation.nodes.each do |n|
    node = SimpleBioC::Node.new(new_r)
    node.refid = relation.refid
    node.role = relation.role
    new_r.nodes << node
  end
  new_r.original = relation
  dest.relations << new_r
end
copy_relations(doc, dest, src, id_map) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 113
def copy_relations(doc, dest, src, id_map)
  src.relations.each do |r|
    copy_relation(doc, dest, r, id_map)
  end
end
copy_text(dest, src) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 166
def copy_text(dest, src)
  if blank?(dest.text) && !blank?(src.text)
    dest.text = src.text
  end
end
find_all_locations(obj, text) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 186
def find_all_locations(obj, text)
  positions = []
  pos = obj.text.index(text)
  until pos.nil? 
    positions << (pos + obj.offset)
    pos = obj.text.index(text, pos + 1)
  end
  return positions
end
merge(dest_collection, src_collection) click to toggle source
# File lib/simple_bioc/bioc_merger.rb, line 8
def merge(dest_collection, src_collection)
  errors = []
  warnings = []
  id_map = {}

  if dest_collection.documents.size != 1 || src_collection.documents.size != 1
    warnings << 'Only the first documents will be merged'
  end

  doc_d = dest_collection.documents[0]
  doc_s = src_collection.documents[0]

  copy_infons(dest_collection, src_collection, warnings)
  dest_collection.source = src_collection.source if dest_collection.source.nil? || dest_collection.source.empty?
  dest_collection.date = src_collection.date if dest_collection.date.nil? || dest_collection.date.empty?
  dest_collection.key = src_collection.key if dest_collection.key.nil? || dest_collection.key.empty?

  copy_infons(doc_d, doc_s, warnings)
  copy_relations(doc_d, doc_d, doc_s, id_map)

  if doc_d.passages.size != doc_s.passages.size
    warnings << 'Passages will not be merged because the numbers of passages in documents are different'
  end

  doc_d.passages.each_with_index do |p_d, index|
    p_s = doc_s.passages[index]
    if blank?(p_d.text) && blank?(p_s.text) && p_d.sentences.size != p_s.sentences.size
      warnings << 'The number of sentences in pages should be the same'
    end
  end

  doc_d.passages.each_with_index do |p_d, index|
    p_s = doc_s.passages[index]
    copy_relations(doc_d, p_d, p_s, id_map)

    if p_d.sentences.size == p_s.sentences.size
      p_d.sentences.each_with_index do |s_d, index|
        s_s = p_s.sentences[index]
        copy_infons(s_d, s_s, warnings)
        copy_text(s_d, s_s)
        copy_relations(doc_d, s_d, s_s, id_map)
        copy_annotations(doc_d, s_d, s_s, id_map)
        adjust_annotation_offset(s_d)
      end
    elsif p_d.sentences.size == 0
      p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
      p_s.sentences.each do |s|
        copy_relations(doc_d, p_d, s, id_map)
        copy_annotations(doc_d, p_d, s, id_map)
      end
    elsif p_s.sentences.size == 0
      if p_d.sentences.size > 0
        # dest has sentences, but src has only passages. 
        p_d.text = p_d.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text)
        p_d.sentences.each do |s|
          s.annotations.each do |a|
            a.sentence = nil
            p_d.annotations << a
          end
          s.relations.each do |r|
            r.sentence = nil
            p_d.relations << r
          end
        end
        p_d.sentences.clear
      else
        copy_text(p_d, p_s)
      end
    end
    copy_annotations(doc_d, p_d, p_s, id_map)
    adjust_annotation_offset(p_d)
  end
  puts warnings
end