class String
Constants
- BASE_DIGITS
Cached constants for base conversion.
- COLOR_REGEXP
A Regexp to recognize ANSI escape sequences
- LOWERCASE_WORDS
For ‘titlecase`
- STOP_WORDS
For ‘words_without_stopwords`
Public Instance Methods
Cache an ‘URI::RFC2396_Parser` instance, because it’s slowwww to initialize
# File lib/epitools/core_ext/string.rb, line 290 def _rfc2396_parser @@rfc2396_parser ||= URI::RFC2396_Parser.new end
Convert this string into a string describing this many of the string. (Note: Doesn’t know anything about proper grammar.)
Example:
"cookie".amount(0) #=> "0 cookies" "shirt".amount(17) #=> "17 shirts" "dollar".amount(-10) #=> "-10 dollars" "love".amount(1) #=> "1 love"
# File lib/epitools/core_ext/string.rb, line 507 def amount(n) case n when 0 "0 #{self}s" when 1, -1 "#{n} #{self}" else "#{n} #{self}s" end end
Are there any non-whitespace characters in the string?
# File lib/epitools/core_ext/string.rb, line 20 def any? not blank? end
‘true’ if the string’s length is 0 (after whitespace has been stripped from the ends)
# File lib/epitools/core_ext/truthiness.rb, line 126 def blank? strip.size == 0 end
This string contains ANSI (VT100) control codes
# File lib/epitools/core_ext/string.rb, line 98 def contains_color? self[COLOR_REGEXP] end
deflate the string
# File lib/epitools/core_ext/string.rb, line 449 def deflate(level=nil) Zlib::Deflate.deflate(self, level) end
Remove redundant whitespace AND newlines.
# File lib/epitools/core_ext/string.rb, line 63 def dewhitespace gsub(/\s+/,' ').strip end
Like each_line, but removes trailing n
# File lib/epitools/core_ext/string.rb, line 126 def each_chomped each_line { |line| yield line.chomp } end
Iterate over slices of the string of size ‘slice_width`.
# File lib/epitools/core_ext/string.rb, line 270 def each_slice(slice_width, &block) max = size p = 0 while p < max yield self[p...p+slice_width] p += slice_width end end
‘true` if this string ends with the substring
# File lib/epitools/core_ext/string.rb, line 470 def endswith?(substring) self[-substring.size..-1] == substring end
# File lib/epitools/core_ext/string.rb, line 769 def eval_block(code, block) eval code, block && block.binding end
Could this string be cast to an float?
# File lib/epitools/core_ext/truthiness.rb, line 112 def float? !!strip.match(/^-?\d+\.\d+$/) end
Convert a string encoded in some base <= 64 into an integer. (See Integer#to_base
for more info.)
# File lib/epitools/core_ext/string.rb, line 363 def from_base(base=10) n = 0 chars.reverse_each.with_index do |c, power| value = BASE_DIGITS[c] n += (base**power) * value end n end
# File lib/epitools/core_ext/string.rb, line 372 def from_base62 from_base(62) end
Decode a mime64/base64 encoded string
# File lib/epitools/core_ext/string.rb, line 387 def from_base64 unpack("m").first end
Convert Python serialized bencoded (pickled) objects to Ruby Objects
# File lib/epitools/core_ext/string.rb, line 404 def from_bencode BEncode.load(self) end
Converts time duration strings (mm:ss, mm:ss.dd, hh:mm:ss, or dd:hh:mm:ss) to seconds. (The reverse of Integer#to_hms)
# File lib/epitools/core_ext/string.rb, line 522 def from_hms nums = split(':') nums[-1] = nums[-1].to_f if nums[-1] =~ /\d+\.\d+/ # convert fractional seconds to a float nums.map! { |n| n.is_a?(String) ? n.to_i : n } # convert the rest to integers nums_and_units = nums.reverse.zip %w[seconds minutes hours days] nums_and_units.map { |num, units| num.send(units) }.sum end
Parse this string as JSON
# File lib/epitools/core_ext/string.rb, line 478 def from_json JSON.parse(self) end
Parse this string as YAML
# File lib/epitools/core_ext/string.rb, line 485 def from_yaml YAML.load(self) end
gunzip the string
# File lib/epitools/core_ext/string.rb, line 441 def gunzip data = StringIO.new(self) Zlib::GzipReader.new(data).read end
gzip the string
# File lib/epitools/core_ext/string.rb, line 432 def gzip(level=nil) zipped = StringIO.new Zlib::GzipWriter.wrap(zipped, level) { |io| io.write(self) } zipped.string end
Print a hexdump of the string to STDOUT (coloured, if the terminal supports it)
# File lib/epitools/core_ext/string.rb, line 597 def hexdump Hex.dump(self) end
Indent all the lines, if “prefix” is a string, prepend that string to each lien. If it’s an integer, prepend that many spaces.
# File lib/epitools/core_ext/string.rb, line 167 def indent(prefix=" ") prefix = (" " * prefix) if prefix.is_an? Integer if block_given? lines.each { |line| yield prefix + line } else lines.map { |line| prefix + line }.join('') end end
inflate the string
# File lib/epitools/core_ext/string.rb, line 456 def inflate Zlib::Inflate.inflate(self) end
Could this string be cast to an integer?
# File lib/epitools/core_ext/truthiness.rb, line 105 def integer? !!strip.match(/^-?\d+$/) end
MD5 the string
# File lib/epitools/core_ext/string.rb, line 411 def md5 Digest::MD5.hexdigest self end
Use Nokogiri to parse this string as HTML, and return an indented version
# File lib/epitools/core_ext/string.rb, line 180 def nice_html(indent=2) Nokogiri::HTML.fragment(self).to_xhtml(indent: indent) end
Like each_line, but skips empty lines and removes n‘s.
# File lib/epitools/core_ext/string.rb, line 115 def nice_lines # note: $/ is the platform's newline separator split($/).select{|l| not l.blank? } end
Could this string be cast to an number?
# File lib/epitools/core_ext/truthiness.rb, line 119 def number? !!strip.match(/^-?\d\.?\d*$/) end
Translate numbers with units (like 25k, 150GB, 15%, 5 hours) into their expanded numeric value
# File lib/epitools/core_ext/string.rb, line 536 def parse_units # extract the unit suffix if self =~ /(\d[\d_]*(?:\.\d+)?)\s*([a-zA-Z]+\b|%(?= \s|$))/ units = $2.downcase num = $1 #.to_f num = num["."] ? num.to_f : num.to_i case units when "%" # 0.01 num / 100.0 when "k" # 10**3 num.thousand when "m", "mm" # 10**6 num.million when "b", "bn" # 10**9 num.billion when "gib", "gb", "g" num * 2**30 when "mib", "mb" num * 2**20 when "kib", "kb" num * 2**10 when "t", "tb" # 10**12 num.trillion when "q" # 10**15 num.quadrillion when "Q" # 10**18 num.quintillion when "min" # 1.minute num.minutes when "hours", "h", "hr", "hrs" # 1.hour num.hours when "d", "days", "dy" num.days else raise "Invalid units: #{units.inspect}, in: #{self.inspect}" end else raise "Couldn't find any units to parse! (expecting: '<a number><some letters>')" end end
Is there anything in the string? (ignoring whitespace/newlines)
# File lib/epitools/core_ext/truthiness.rb, line 133 def present? not blank? end
The Infamous Caesar-Cipher. Unbreakable to this day.
# File lib/epitools/core_ext/string.rb, line 283 def rot13 tr('n-za-mN-ZA-M', 'a-zA-Z') end
# File lib/epitools/core_ext/string.rb, line 254 def sentences split_after(/[\.\!\?]+/).lazy.map {|s| s.strip.gsub(/\s+/, " ") } end
SHA1 the string
# File lib/epitools/core_ext/string.rb, line 418 def sha1 Digest::SHA1.hexdigest self end
SHA256 the string
# File lib/epitools/core_ext/string.rb, line 425 def sha256 Digest::SHA256.hexdigest self end
Escape shell characters (globs, quotes, parens, etc.)
# File lib/epitools/core_ext/string.rb, line 34 def shellescape Shellwords.escape(self) end
Smash together all the characters in a string (removing whitespace)
# File lib/epitools/core_ext/string.rb, line 56 def smash downcase.scan(/\w+/).join end
# File lib/epitools/core_ext/string.rb, line 155 def split_after(boundary) split_at(boundary, include_boundary: true) end
# File lib/epitools/core_ext/string.rb, line 133 def split_at(boundary, **options) include_boundary = options[:include_boundary] || false boundary = Regexp.new(Regexp.escape(boundary)) if boundary.is_a?(String) s = StringScanner.new(self) Enumerator.new do |yielder| loop do if match = s.scan_until(boundary) if include_boundary yielder << match else yielder << match[0..-(s.matched_size+1)] end else yielder << s.rest if s.rest? break end end end end
# File lib/epitools/core_ext/string.rb, line 159 def split_before(boundary) raise "Why would you want this? Sorry, unimplemented. Send patches." end
‘true` if this string starts with the substring
# File lib/epitools/core_ext/string.rb, line 462 def startswith?(substring) self[0...substring.size] == substring end
Remove ANSI color codes.
# File lib/epitools/core_ext/string.rb, line 107 def strip_color gsub(COLOR_REGEXP, '') end
Remove redundant whitespaces (not including newlines).
# File lib/epitools/core_ext/string.rb, line 49 def tighten gsub(/[\t ]+/,' ').strip end
Return a new string converted to “Title Case” (first letter of each word capitalized)
# File lib/epitools/core_ext/string.rb, line 70 def titlecase first = true words = downcase.split(/(?<!\w')\b/) words.map.with_index do |word,i| if LOWERCASE_WORDS.include?(word) and i > 0 # leave LOWERCASE_WORDS lowercase, unless it's the first word. word else word.gsub(/^\w/) { |c| c.upcase } # capitalize first letter end end.join('') end
Convert string to “Title Case” (first letter of each word capitalized)
# File lib/epitools/core_ext/string.rb, line 86 def titlecase! replace(titlecase) end
Convert the string to a Path
object (for representing files/directories).
# File lib/epitools/minimal.rb, line 269 def to_Path Path[self] end
Convert a string (encoded in base16 “hex” – for example, an MD5 or SHA1 hash) into “base62” format. (See Integer#to_base62
for more info.)
# File lib/epitools/core_ext/string.rb, line 380 def to_base62 to_i(16).to_base62 end
Encode into a mime64/base64 string
# File lib/epitools/core_ext/string.rb, line 395 def to_base64 [self].pack("m") end
# File lib/epitools/core_ext/string.rb, line 312 def to_bigdecimal BigDecimal BigDecimal(self) end
Raw bytes to an integer (as big as necessary)
# File lib/epitools/core_ext/string.rb, line 349 def to_i_from_bytes(big_endian=false) bs = big_endian ? bytes.reverse_each : bytes.each bs.with_index.inject(0) { |sum,(b,i)| (b << (8*i)) + sum } end
Convert a query string to a hash of params
# File lib/epitools/core_ext/string.rb, line 331 def to_params params = {} split(/[&;]/).each do |pairs| key, value = pairs.split('=',2).collect { |v| CGI.unescape(v) } if key and value params[key] ||= [] params[key] << value end end params.map_values { |v| v.size > 1 ? v : v.first } end
See: weblog.raganwald.com/2007/10/stringtoproc.html
Ported from the String
Lambdas in Oliver Steele’s Functional Javascript osteele.com/sources/javascript/functional/
This work is licensed under the MIT License:
© 2007 Reginald Braithwaite Portions Copyright © 2006 Oliver Steele
## Basic Usage
→ 3
‘x+2*y’.to_proc[2, 3];
→ 8
or (more usefully) later:
square = ‘x*x’.to_proc; square(3);
→ 9
square(4);
→ 16
## Explicit parameters
If the string contains a ->, this separates the parameters from the body.
‘x y -> x+2*y’.to_proc[2, 3];
→ 8
‘y x -> x+2*y’.to_proc[2, 3];
→ 7
Otherwise, if the string contains a _, it’s a unary function and _ is name of the parameter:
→ 3
→ 9
## Implicit parameters
If the string doesn’t specify explicit parameters, they are implicit.
If the string starts with an operator or relation besides -, or ends with an operator or relation, then its implicit arguments are placed at the beginning and/or end:
‘*2’.to_proc;
→ 4
‘/2’.to_proc;
→ 2
→ 0.5
‘/’.to_proc[2, 4];
→ 0.5
’.’ counts as a right operator:
‘.abs’.to_proc;
→ 1
Otherwise, the variables in the string, in order of occurrence, are its parameters.
→ 3
→ 9
‘x + 2*y’.to_proc[1, 2];
→ 5
‘y + 2*x’.to_proc[1, 2];
→ 5
## Chaining
Chain -> to create curried functions.
‘x y -> x+y’.to_proc[2, 3];
→ 5
‘x -> y -> x+y’.to_proc[3];
→ 5
plus_two = ‘x -> y -> x+y’.to_proc; plus_two
→ 5
Using String#to_proc
in Idiomatic Ruby
Ruby on Rails popularized Symbol#to_proc, so much so that it will be part of Ruby 1.9.
If you like:
%w[dsf fgdg fg].map(&:capitalize)
→ ["Dsf", "Fgdg", "Fg"]
then %w[dsf fgdg fg].map(&‘.capitalize’) isn’t much of an improvement.
But what about doubling every value in a list:
(1..5).map &‘*2’
→ [2, 4, 6, 8, 10]
Or folding a list:
(1..5).inject &‘+’
→ 15
Or having fun with factorial:
factorial = “(1.._).inject &‘*’”.to_proc factorial
→ 120
LICENSE: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# File lib/epitools/core_ext/string.rb, line 734 def to_proc &block params = [] expr = self sections = expr.split(/\s*->\s*/m) if sections.length > 1 then eval_block(sections.reverse!.inject { |e, p| "(Proc.new { |#{p.split(/\s/).join(', ')}| #{e} })" }, block) elsif expr.match(/\b_\b/) eval_block("Proc.new { |_| #{expr} }", block) else leftSection = expr.match(/^\s*(?:[+*\/%&|\^\.=<>\[]|!=)/m) rightSection = expr.match(/[+\-*\/%&|\^\.=<>!]\s*$/m) if leftSection || rightSection then if (leftSection) then params.push('$left') expr = '$left' + expr end if (rightSection) then params.push('$right') expr = expr + '$right' end else self.gsub( /(?:\b[A-Z]|\.[a-zA-Z_$])[a-zA-Z_$\d]*|[a-zA-Z_$][a-zA-Z_$\d]*:|self|arguments|'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"/, '' ).scan( /([a-z_$][a-z_$\d]*)/i ) do |v| params.push(v) unless params.include?(v) end end eval_block("Proc.new { |#{params.join(', ')}| #{expr} }", block) end end
Convert rn to n
# File lib/epitools/core_ext/string.rb, line 27 def to_unix gsub("\r\n", "\n") end
URI.parse the string and return an URI
object
# File lib/epitools/core_ext/string.rb, line 323 def to_uri URI.parse self end
Does this string contain something that means roughly “true”?
# File lib/epitools/core_ext/truthiness.rb, line 140 def truthy? case strip.downcase when "1", "true", "yes", "on", "enabled", "affirmative" true else false end end
Unmarshal the string (transform it into Ruby datatypes).
# File lib/epitools/core_ext/string.rb, line 492 def unmarshal Marshal.restore self end
Convert an URI’s %XXes into regular characters.
# File lib/epitools/core_ext/string.rb, line 305 def urldecode _rfc2396_parser.unescape(self) end
Convert non-URI characters into %XXes.
# File lib/epitools/core_ext/string.rb, line 297 def urlencode #URI.escape(self) _rfc2396_parser.escape(self) end
Do what a browser would do when you type something into the address bar
# File lib/epitools/core_ext/string.rb, line 41 def urlescape @@uri_parser ||= URI::RFC2396_Parser.new @@uri_parser.escape(self) end
# File lib/epitools/core_ext/string.rb, line 258 def words scan /[[:alnum:]]+/ end
# File lib/epitools/core_ext/string.rb, line 262 def words_without_stopwords downcase.words - STOP_WORDS end
Word-wrap the string so each line is at most ‘width` wide. Returns a string, or, if a block is given, yields each word-wrapped line to the block.
If ‘width` is nil, find the current width of the terminal and use that. If `width` is negative, subtract `width` from the terminal’s current width.
# File lib/epitools/core_ext/string.rb, line 194 def wrap(width=nil) if width.nil? or width < 0 term_width, _ = Term.size if width and width < 0 width = (term_width - 1) + width else width = term_width - 1 end end return self if size <= width strings = [] start_pos = 0 end_pos = width loop do split_pos = rindex(/\s/, end_pos) || end_pos strings << self[start_pos...split_pos] start_pos = index(/\S/, split_pos) break if start_pos == nil end_pos = start_pos + width if end_pos > size strings << self[start_pos..-1] break end end if block_given? strings.each { |s| yield s } else strings.join("\n") end end
Wrap all lines at window size, and indent
# File lib/epitools/core_ext/string.rb, line 239 def wrap_and_indent(prefix, width=nil) prefix = " "*prefix if prefix.is_a? Numeric prefix_size = prefix.strip_color.size if width width = width - prefix_size else width = -prefix_size end wrap(width).each_line.map { |line| prefix + line }.join end