class StringDiff::Diff
Attributes
string1[R]
string2[R]
Public Class Methods
new(string1, string2)
click to toggle source
# File lib/string_diff.rb, line 8 def initialize(string1, string2) @string1 = string1 @string2 = string2 end
Public Instance Methods
diff()
click to toggle source
# File lib/string_diff.rb, line 13 def diff a1 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1) a2 = PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string2) construct_string(compare(process_parens(a1), process_parens(a2))) end
Private Instance Methods
annotate_deletions(deletions, array1)
click to toggle source
# File lib/string_diff.rb, line 64 def annotate_deletions(deletions, array1) deletions.each do |v| index = array1.find_index(v) next if index.nil? array1[index] = "<span class='deletion'>#{v}</span>" end end
annotate_insertions(insertions, array1, array2)
click to toggle source
# File lib/string_diff.rb, line 72 def annotate_insertions(insertions, array1, array2) insertions.each_with_index do |v, i| if array2.find_index(v) == 0 index = 0 else insertion_position = array2.find_index(v) + i - 1 index = array1.find_index(array2[insertion_position]) end if index.nil? # Check whether or not we're dealing with an annotated deletion/insertion, or plain token contains_span = array1.last.include?("<span") ? true : false contains_punct_in_span = !(array1.last.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[[:punct:]]/)).nil? if contains_span stand_alone_punct = array1.last =~ (/[[:punct:]]/) if !contains_span # If there is punctuation after a deletion, we need to make sure the # insertion is added before the punctuation. if (contains_punct_in_span || stand_alone_punct) && array1[-2].include?("<span class='deletion'") array1.insert(-2, "<span class='insertion'>#{v}</span>") elsif array2.find_index(v) < (PragmaticTokenizer::Tokenizer.new(downcase: false).tokenize(string1).count) # Count how many insertions up to the original position insertions_count = 0 deletions_count = 0 for i in 0..(array2.find_index(v)+1) do insertions_count += 1 if array1[i].include?("<span class='insertion'") deletions_count += 1 if array1[i].include?("<span class='deletion'") end array1.insert(((find_correct_index(v, array1, array2) + insertions_count + deletions_count) - 1), "<span class='insertion'>#{v}</span>") else # Otherwise we put it on the end. array1.insert(-1, "<span class='insertion'>#{v}</span>") end else array1.insert(index + 1, "<span class='insertion'>#{v}</span>") end end array1 end
compare(array1, array2)
click to toggle source
# File lib/string_diff.rb, line 53 def compare(array1, array2) deletions = array1 - array2 puts "deletions: #{deletions.to_s}" insertions = array2 - array1 puts "insertions: #{insertions.to_s}" process_duplicates(array1, array2) annotate_deletions(deletions, array1) annotate_insertions(insertions, array1, array2) end
construct_string(array1)
click to toggle source
# File lib/string_diff.rb, line 162 def construct_string(array1) string = "" array1.each_with_index do |token, i| if i == 0 string += token else if token.include?("<span") if token.scan(/(?<='>).*(?=<\/)/)[0] !~ /[[:punct:]]/ || string1.include?(" #{token.scan(/(?<='>).*(?=<\/)/)[0]}") string += " #{token}" elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[']/) ).nil? if string.scan(/[']/).empty? || string.scan(/[(]/).empty? string += " #{token}#{array1[i+1]}" array1.slice!(i+1) else string += token end elsif !( token.scan(/(?<='>).*(?=<\/)/)[0] =~ (/[(]/) ).nil? if string.scan(/[(]/).empty? string += " #{token}#{array1[i+1]}" array1.slice!(i+1) else string += token end else string += token end else if token !~ /[[:punct:]]/ string += " #{token}" elsif !( token =~ (/[']/) ).nil? if string.scan(/[']/).empty? string += " #{token}#{array1[i+1]}" array1.slice!(i+1) else string += token end elsif !( token =~ (/[(]/) ).nil? if string.scan(/[(]/).empty? string += " #{token}#{array1[i+1]}" array1.slice!(i+1) else string += token end else string += token end end end end string end
find_correct_index(token, array1, array2)
click to toggle source
# File lib/string_diff.rb, line 144 def find_correct_index(token, array1, array2) unless @additional_indexes.nil? # We need to find if the word has already been added, if so, use a later index appeared_count = 0 array1.each do |item| appeared_count += 1 if item.include?("<span class='insertion'>#{token}") end if appeared_count == 0 @additional_indexes[0] else @additional_indexes[appeared_count] end else array2.find_index(token) end end
process_duplicates(array1, array2)
click to toggle source
# File lib/string_diff.rb, line 111 def process_duplicates(array1, array2) dup1 = array1.find_all { |e| array1.count(e) > 1 } dup2 = array2.find_all { |e| array2.count(e) > 1 } missing_words = (dup1 - dup2).uniq additional_words = (dup2 - dup1).uniq unless additional_words.empty? set_additional_duplicates_indexes(array2, additional_words) end duplicate_indexs_of_array1 = [] duplicate_indexs_of_array2 = [] missing_words.each do |word| array1.each_with_index do |v, i| duplicate_indexs_of_array1 << i if word == v end array2.each_with_index do |v, i| duplicate_indexs_of_array2 << i if word == v end missing_index = duplicate_indexs_of_array1 - duplicate_indexs_of_array2 array1[missing_index[0]] = "<span class='deletion'>#{word}</span>" end end
process_parens(array)
click to toggle source
# File lib/string_diff.rb, line 22 def process_parens(array) if array.include?('(') && array.include?(')') array_open_parens_indexes = array.each_index.select{|i| array[i] == "("} array_closed_parens_indexes = array.each_index.select{|i| array[i] == ")"} if array_open_parens_indexes.count == array_closed_parens_indexes.count removed_count = 0 array_open_parens_indexes.each do |i| combined_string = "" combined_string += (array[i-removed_count] + array[i+1-removed_count]) array.delete_at(i-removed_count) array.delete_at(i-removed_count) array.insert(i-removed_count, combined_string) removed_count += 1 end array_closed_parens_indexes.each do |i| combined_string = "" combined_string += (array[i-(removed_count+1)] + array[i-removed_count]) array.delete_at(i-(removed_count+1)) array.delete_at(i-(removed_count+1)) array.insert(i-(removed_count+1), combined_string) removed_count += 1 end end else array end array end
set_additional_duplicates_indexes(array, dup)
click to toggle source
# File lib/string_diff.rb, line 140 def set_additional_duplicates_indexes(array, dup) @additional_indexes = array.each_index.select{|i| array[i] == dup[0]} end