class Typogrowth::Parser

Parses and corrects the typography in strings. It supports different language rules and easy user rules customization.

Constants

DEFAULT_SET
DEFAULT_SHADOWS
HTML_TAG_RE

Attributes

shadows[R]
yaml[R]

Public Class Methods

defuse(str, elements, shadows: []) click to toggle source

Out-of-place version of `String` typographing. See parse!

# File lib/typogrowth.rb, line 167
def self.defuse str, elements, shadows: []
  Parser.new.defuse str, elements, shadows: shadows
end
is_ru?(str, shadows: []) click to toggle source

Out-of-place version of `String` typographing. See parse!

# File lib/typogrowth.rb, line 162
def self.is_ru? str, shadows: []
  @@instance.is_ru? str, shadows: shadows
end
new(file = nil, shadows = nil) click to toggle source
# File lib/typogrowth.rb, line 175
def initialize file = nil, shadows = nil
  file = DEFAULT_SET unless file
  @yaml = YAML.load_file "#{File.dirname(__FILE__)}/config/#{file}.yaml"
  @yaml.delete(:placeholder)
  shadows = DEFAULT_SHADOWS unless shadows
  shadows = YAML.load_file("#{File.dirname(__FILE__)}/config/#{shadows}.yaml")
  @shadows = ([
    (shadows[:custom].map { |g| /#{g}/ } if shadows[:custom]),
    (shadows[:grip].map { |g| /(?<=#{g})([^#{g}]*)(?=#{g})/m } if shadows[:grip]),
    HTML_TAG_RE,
    URI.regexp(['ftp', 'http', 'https', 'mailto'])
  ] - [nil]).flatten
end
parse(str, lang: :default, shadows: [], sections: nil) click to toggle source

Out-of-place version of `String` typographing. See parse!

# File lib/typogrowth.rb, line 152
def self.parse str, lang: :default, shadows: [], sections: nil
  Parser.new.parse str, lang: lang, shadows: shadows, sections: sections
end
parse!(str, lang: :default, shadows: [], sections: nil) click to toggle source

Out-of-place version of `String` typographing. See parse!

# File lib/typogrowth.rb, line 157
def self.parse! str, lang: :default, shadows: [], sections: nil
  str.replace self.parse str, lang: lang, shadows: shadows, sections: sections
end

Public Instance Methods

add_shadows(re) click to toggle source
# File lib/typogrowth.rb, line 143
def add_shadows re
  @shadows.concat [*re]
end
defuse(str, elements, shadows: []) click to toggle source
# File lib/typogrowth.rb, line 130
    def defuse str, elements, shadows: []
      delims = str.safe_delimiters
      s = str.dup
      [*shadows].concat(@shadows).uniq.each { |re|
        s.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
      }
#      s.gsub(Regexp.union(elements), ' \1 ')
      s.gsub(/(#{elements.map {|e| Regexp.escape e}.join('|')})/, ' \1 ')
       .gsub(/#{delims.first}(.*?)#{delims.last}/m) { |m|
         Base64.decode64(m).force_encoding('UTF-8')
       }
    end
del_shadows(re) click to toggle source
# File lib/typogrowth.rb, line 147
def del_shadows re
  @shadows.delete_if { |stored| [*re].include? stored }
end
is_ru?(str, shadows: []) click to toggle source
# File lib/typogrowth.rb, line 123
def is_ru? str, shadows: []
  clean = [*shadows].concat(@shadows).uniq.inject(str) { |memo, re|
    memo.gsub(re, '')
  }
  clean.scan(/[А-Яа-я]/).size > clean.length / 3
end
merge(custom) click to toggle source

Recursively merges the initial settings with custom.

To supply your own rules to processing:

  • create a hash of additional rules in the same form as in the

standard `typogrowth.yaml` file shipped with a project

  • merge the hash with the standard one using this function

For instance, to add french rules one is to merge in the following yaml:

:quotes :
  :punctuation :
    :fr : "\\k<quote>\\k<punct>"
…
# File lib/typogrowth.rb, line 52
def merge custom
  yaml.rmerge!(custom)
end
parse(str, lang: :default, shadows: [], sections: nil) click to toggle source

Inplace version of string typographying.

Retrieves the string and changes all the typewriters quotes (doubles and sigles), to inches, minutes, seconds, proper quotation signs.

While the input strings are e.g.

And God said "Baz heard "Bar" once" , and there was light.
That's a 6.3" man, he sees sunsets at 10°20'30" E.

It will produce:

And God said “Baz heard ‘Bar’ once,” and there was light.
That’s a 6.3″ man, he sees sunsets at 10°20′30″ E.

The utility also handles dashes as well.

@param str [String] the string to be typographyed inplace @param lang the language to use rules for

# File lib/typogrowth.rb, line 77
    def parse str, lang: :default, shadows: [], sections: nil
      lang = lang.to_sym
      delims = str.safe_delimiters
      str.split(/\R{2,}/).map { |para|
        [*shadows].concat(@shadows).uniq.each { |re|
          para.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
        }
        @yaml.each { |key, values|
          next if sections && ![*sections].include?(key)
          values.each { |k, v|
            if !!v[:re]
              v[lang] = v[:default] if (!v[lang] || v[lang].size.zero?)
              raise MalformedRulesFile.new "Malformed rules file (no subst for #{v})" \
                if !v[lang] || v[lang].size.zero?
              substituted = !!v[:pattern] ?
                  para.gsub!(/#{v[:re]}/) { |m| m.gsub(/#{v[:pattern]}/, v[lang].first) } :
                  para.gsub!(/#{v[:re]}/, v[lang].first)
              # logger.warn "Unsafe substitutions were made to source:\n# ⇒ #{para}"\
              #  if v[:alert] && substituted
              if v[lang].size > 1
                para.gsub!(/#{v[lang].first}/) { |m|
                  prev = $`
                  obsoletes = prev.count(v[lang].join)
                  compliants = values[v[:compliant].to_sym][lang] ||
                               values[v[:compliant].to_sym][:default]
                  obsoletes -= prev.count(compliants.join) \
                    if !!v[:compliant]
                  !!v[:slave] ?
                    obsoletes -= prev.count(v[:original]) + 1 :
                    obsoletes += prev.count(v[:original])

                  v[lang][obsoletes % v[lang].size]
                }
              end
            end
          }
        }
        para
      }.join(%Q(

))
      .gsub(/#{delims.first}(.*?)#{delims.last}/m) { |m|
        Base64.decode64(m).force_encoding('UTF-8')
      }
    end