class Ronin::Support::Network::PublicSuffix::List

Represents the [public suffix list].

[public suffix list]: publicsuffix.org/

@api public

@since 1.0.0

Constants

FILE_NAME

File name of the public suffix list.

ONE_DAY

One day in seconds.

PATH

The path to ‘~/.cache/ronin/ronin-support/public_suffix_list.dat` list file.

URL

The ‘publicsuffix.org/list/public_suffix_list.dat` URL.

Attributes

path[R]

The path to the list file.

@return [String]

tree[R]

The tree of all public suffix TLDs.

@return [Hash{String => Hash}]

Public Class Methods

download(url: URL, path: PATH) click to toggle source

Downloads the list file.

@param [String] url

An optional alternate URL to download the `ip2asn-combined.tsv.gz`
file.

@param [String] path

An optional alternate path to the list file.
# File lib/ronin/support/network/public_suffix/list.rb, line 115
def self.download(url: URL, path: PATH)
  uri = URI(url)

  Net::HTTP.start(uri.host,uri.port, use_ssl: true) do |http|
    request  = Net::HTTP::Get.new(uri.path)

    http.request(request) do |response|
      FileUtils.mkdir_p(File.dirname(path))

      File.open("#{path}.part",'wb') do |file|
        response.read_body do |chunk|
          file.write(chunk)
        end
      end

      FileUtils.mv("#{path}.part",path)
    end
  end
end
downloaded?(path=PATH) click to toggle source

Determines whether the list file has been previously downloaded.

@param [String] path

An optional alternate path to the list file.

@return [Boolean]

# File lib/ronin/support/network/public_suffix/list.rb, line 86
def self.downloaded?(path=PATH)
  File.file?(path)
end
load_file(path=PATH) click to toggle source

Loads the public suffix list from the given file.

@param [String] path

The path to the public suffix list file.

@return [List]

The parsed public suffix list file.
# File lib/ronin/support/network/public_suffix/list.rb, line 200
def self.load_file(path=PATH)
  list = new(path)

  parse(path) do |suffix|
    list << suffix
  end

  return list
end
new(path=PATH) click to toggle source

Initializes the public suffix list.

@param [String] path

The path to the list file.

@api private

Calls superclass method
# File lib/ronin/support/network/public_suffix/list.rb, line 71
def initialize(path=PATH)
  super()

  @path = path
  @tree = {}
end
parse(path=PATH) { |suffix(line, type: type)| ... } click to toggle source

Parses the contents of the list file.

@param [String] path

An optional alternate path to the list file.

@yield [suffix]

If a block is given, it will be passed each parsed suffix from the
list file.

@yieldparam [Suffix] suffix

A parsed suffix in the list file.

@return [Enumerator]

If no block is given, an Enumerator object will be returned.
# File lib/ronin/support/network/public_suffix/list.rb, line 173
def self.parse(path=PATH)
  return enum_for(__method__,path) unless block_given?

  type = nil

  File.open(path) do |file|
    file.each_line(chomp: true) do |line|
      if line == '// ===BEGIN ICANN DOMAINS==='
        type = :icann
      elsif line == '// ===BEGIN PRIVATE DOMAINS==='
        type = :private
      elsif !(line.empty? || line.start_with?('//'))
        yield Suffix.new(line, type: type)
      end
    end
  end
end
stale?(path=PATH) click to toggle source

Determines if the downloaded list file is older than one day.

@param [String] path

An optional alternate path to the list file.

@return [Boolean]

# File lib/ronin/support/network/public_suffix/list.rb, line 101
def self.stale?(path=PATH)
  !File.file?(path) || File.stat(path).mtime < (Time.now - ONE_DAY)
end
update(url: URL, path: PATH) click to toggle source

Optionally update the cached list file if it is older than one day.

@param [String] url

An optional alternate URL to download the `ip2asn-combined.tsv.gz`
file.

@param [String] path

An optional alternate path to the list file.
# File lib/ronin/support/network/public_suffix/list.rb, line 145
def self.update(url: URL, path: PATH)
  if !downloaded?(path)
    download(url: url, path: path)
  elsif stale?(path)
    begin
      download(url: url, path: path)
    rescue
      # ignore any network failures
    end
  end
end

Public Instance Methods

<<(suffix) click to toggle source

Adds a public suffix to the list.

@param [Suffix] suffix

The suffix String to add.

@return [self]

@api private

Calls superclass method
# File lib/ronin/support/network/public_suffix/list.rb, line 220
def <<(suffix)
  super(suffix)

  if suffix.name.include?('.')
    tree = @tree

    suffix.name.split('.').reverse_each.each_cons(2) do |parent,child|
      subtree = tree[parent] ||= {}

      subtree[child] ||= nil

      tree = subtree
    end
  else
    @tree[suffix.name] ||= nil
  end

  return self
end
inspect() click to toggle source

Inspects the public suffix list.

@return [String]

The inspected list object.
# File lib/ronin/support/network/public_suffix/list.rb, line 296
def inspect
  "#<#{self.class}: #{@path}>"
end
split(host_name) click to toggle source

Splits a hostname into it’s name and public suffix components.

@param [String] host_name

The host name to split.

@return [(String, String)]

The host name's name and public suffix components.

@raise [InvalidHostname]

The given hostname does not end with a valid suffix.
# File lib/ronin/support/network/public_suffix/list.rb, line 252
def split(host_name)
  components = host_name.split('.')
  suffixes   = []

  tree = @tree

  while tree
    component = components.last

    tld, subtree = tree.find do |tld,subtree|
      tld == '*' || component == tld
    end

    suffixes.prepend(components.pop) if tld
    tree = subtree
  end

  if suffixes.empty?
    raise(InvalidHostname,"hostname does not have a valid suffix: #{host_name.inspect}")
  end

  return components.join('.'), suffixes.join('.')
end
to_regexp() click to toggle source

Creates a regular expression that can match every domain suffix in the list.

@return [Regexp]

# File lib/ronin/support/network/public_suffix/list.rb, line 282
def to_regexp
  regexp = Regexp.union(@tree.map { |tld,subtree|
    tld_regexp(tld,subtree)
  })

  return /(?<=[^a-zA-Z0-9_-]|^)#{regexp}(?=[^\.a-z0-9-]|$)/
end

Private Instance Methods

tld_regexp(tld,subtree) click to toggle source

Create a regexp to match the given Top-Level-Domain (TLD) and all sub-TLDs below it.

@param [String] tld

The Top-Level-Domain (TLD).

@param [Hash{String => Hash}] subtree

The other TLDs below the given TLD.

@return [Regexp]

The compiled regular expression.
# File lib/ronin/support/network/public_suffix/list.rb, line 315
def tld_regexp(tld,subtree)
  if subtree
    subtree_regexp = if subtree.length == 1
                       tld_regexp(subtree.keys[0],subtree.values[0])
                     else
                       Regexp.union(
                         subtree.map { |sub_tld,sub_subtree|
                           tld_regexp(sub_tld,sub_subtree)
                         }
                       )
                     end

    /(?:#{subtree_regexp}\.)?#{tld}/
  else
    if tld == '*'
      /[a-z0-9]+(?:-[a-z0-9]+)*/
    else
      tld
    end
  end
end