class MyersDiff::WordDiff

Constants

BOUNDARIES_REGEX
EXTENDED_WORD_CHARS_REGEX

Public Instance Methods

build_values(components, new_string, old_string, use_longest_token = true) click to toggle source

new_string - tokenized string i.e. array of strings

# File lib/myers_diff/word_diff.rb, line 146
def build_values(components, new_string, old_string, use_longest_token = true)
  component_pos = 0
  component_len = components.size
  new_pos = 0
  old_pos = 0

  while component_pos < component_len
    component = components[component_pos]
    if !component[:removed]
      if !component[:added] && use_longest_token
        value = new_string[new_pos, component[:count]]
        value = value.map.with_index do |val, i|
          old_val = old_string[old_pos + i]
          old_val.size > val.size ? old_val : val
        end

        component[:value] = join(value)
      else
        component[:value] = join(new_string[new_pos, component[:count]])
      end

      new_pos += component[:count]
      old_pos += component[:count] unless component[:added]
    else
      component[:value] = join(old_string[old_pos, component[:count]])
      old_pos += component[:count]

      if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
        tmp = components[component_pos - 1]
        components[component_pos - 1] = components[component_pos]
        components[component_pos] = tmp
      end
    end

    component_pos += 1
  end

  last_component = components[component_len - 1]
  if component_len > 1 &&
      last_component[:value].is_a?(String) &&
      (last_component[:added] || last_component[:removed]) &&
      equals('', last_component[:value])
    components[component_len - 2][:value] += last_component[:value]
    components.pop
  end

  recount(components)

  components
end
cast_input(str) click to toggle source
# File lib/myers_diff/word_diff.rb, line 118
def cast_input(str)
  str
end
clone_path(path_hash) click to toggle source
# File lib/myers_diff/word_diff.rb, line 197
def clone_path(path_hash)
  { new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
end
diff(s1, s2, **options) click to toggle source
# File lib/myers_diff/word_diff.rb, line 6
def diff(s1, s2, **options)
  old_string = cast_input(s1)
  new_string = cast_input(s2)

  old_string = remove_empty(tokenize(old_string))
  new_string = remove_empty(tokenize(new_string))

  new_len = new_string.size
  old_len = old_string.size
  edit_length = 1
  max_edit_length = new_len + old_len
  best_path = { }
  best_path[0] = { new_pos: -1, components: [] }

  old_pos = extract_common(best_path[0], new_string, old_string, 0)
  if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
    return [ { value: join(new_string), count: new_string.size } ]
  end

  exec_edit_length = lambda do
    diagonal_path = -1 * edit_length
    while diagonal_path <= edit_length
      add_path = best_path[diagonal_path - 1]
      remove_path = best_path[diagonal_path + 1]
      old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
      best_path[diagonal_path - 1] = nil if add_path

      can_add = add_path && add_path[:new_pos] + 1 < new_len
      can_remove = remove_path && 0 <= old_pos && old_pos < old_len
      if !can_add && !can_remove
        best_path[diagonal_path] = nil
        diagonal_path += 2
        next
      end

      base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
        p = clone_path(remove_path)
        push_component(p[:components], nil, true)
        p
      else
        p = add_path
        p[:new_pos] += 1
        push_component(p[:components], true, nil)
        p
      end

      old_pos = extract_common(base_path, new_string, old_string, diagonal_path)

      if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
        return build_values(base_path[:components], new_string, old_string)
      else
        best_path[diagonal_path] = base_path
      end

      diagonal_path += 2
    end

    edit_length += 1
    nil
  end

  while edit_length <= max_edit_length
    if res = exec_edit_length.call
      return res
    end
  end

  'death'
end
equals(l, r) click to toggle source
# File lib/myers_diff/word_diff.rb, line 108
def equals(l, r)
  l == r
  # TODO: support custom comparator
  # TODO: support case-insensitive
end
extract_common(base_path, new_string, old_string, diagonal_path) click to toggle source

base_path : { new_pos: int, components: [] } diagonal_path : int

# File lib/myers_diff/word_diff.rb, line 87
def extract_common(base_path, new_string, old_string, diagonal_path)
  new_len = new_string.size
  old_len = old_string.size
  new_pos = base_path[:new_pos]
  old_pos = new_pos - diagonal_path
  common_count = 0

  while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
    new_pos += 1
    old_pos += 1
    common_count += 1
  end

  if common_count > 0
    base_path[:components].push(count: common_count)
  end

  base_path[:new_pos] = new_pos
  old_pos
end
join(chars) click to toggle source
# File lib/myers_diff/word_diff.rb, line 141
def join(chars)
  chars.join('')
end
push_component(components, added, removed) click to toggle source
# File lib/myers_diff/word_diff.rb, line 76
def push_component(components, added, removed)
  last = components.last
  if last && last[:added] == added && last[:removed] == removed
    components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
  else
    components.push(count: 1, added: added, removed: removed)
  end
end
recount(components) click to toggle source
# File lib/myers_diff/word_diff.rb, line 201
def recount(components)
  components.each { |component| component[:count] = component[:value].size }
end
remove_empty(array) click to toggle source
# File lib/myers_diff/word_diff.rb, line 114
def remove_empty(array)
  array.compact
end
tokenize(str) click to toggle source
# File lib/myers_diff/word_diff.rb, line 122
def tokenize(str)
  tokens = str.split(BOUNDARIES_REGEX)

  i = 0
  while i < tokens.size - 1
    if tokens[i + 1].empty? && !tokens[i + 2].empty? &&
        EXTENDED_WORD_CHARS_REGEX.match?(tokens[i]) &&
        EXTENDED_WORD_CHARS_REGEX.match?(tokens[i + 2])
      tokens[i] += tokens[i + 2]
      tokens.delete_at(i + 1)
      tokens.delete_at(i + 1)
      i -= 1
    end
    i += 1
  end

  tokens
end