class Fluent::Plugin::SanitizerFilter

Public Instance Methods

configure(conf) click to toggle source
Calls superclass method
# File lib/fluent/plugin/filter_sanitizer.rb, line 48
def configure(conf)
  super

  @salt = conf['hash_salt']
 
  @sanitizerules = []
  @rules.each do |rule|
    if rule.keys.empty?
      raise Fluent::ConfigError, "You need to specify at least one key in rule statement."
    else
      #keys = record_accessor_create(rule.keys)
      keys = rule.keys
    end
    
    #record_accessor_create(rule.keys)
    
    if rule.pattern_ipv4 || !rule.pattern_ipv4
      pattern_ipv4 = rule.pattern_ipv4
    else
      raise Fluent::ConfigError, "true or false is available for pattern_ipv4 option."
    end
 
    if rule.pattern_fqdn || !rule.pattern_fqdn
      pattern_fqdn = rule.pattern_fqdn
    else
      raise Fluent::ConfigError, "true or false is available for pattern_fqdn option."
    end
    
    if rule.pattern_regex.class == Regexp
      pattern_regex = rule.pattern_regex
      regex_capture_group = rule.regex_capture_group
    else
      raise Fluent::ConfigError, "Your need to specify Regexp for pattern_fqdn option."
    end      
    
    pattern_keywords = rule.pattern_keywords

    case [pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords.empty?]
    when [false, false, /^$/, true]
      raise Fluent::ConfigError, "You need to specify at least one pattern option in the rule statement." 
    end

    regex_prefix = rule.pattern_regex_prefix
    keywords_prefix = rule.pattern_keywords_prefix

    @sanitizerules.push([keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix])
  end
end
filter(tag, time, record) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 97
def filter(tag, time, record)
  @sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix|  
    keys.each do |key|
      accessor = record_accessor_create("$."+key.to_s)
      if pattern_ipv4 && accessor.call(record)
        accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s))
      end
      if pattern_fqdn && accessor.call(record)
        accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s))
      end
      if pattern_regex && accessor.call(record)
        if regex_capture_group.empty?
          accessor.set(record, sanitize_regex_val(accessor.call(record).to_s, regex_prefix, pattern_regex))
        else
          accessor.set(record, sanitize_regex_val_capture(accessor.call(record).to_s, regex_prefix, pattern_regex, regex_capture_group))
        end
      end
      if !pattern_keywords.empty? && accessor.call(record)
        accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
      end
    end
  end
  record
end
include_fqdn?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 134
def include_fqdn?(str)
  str.match?(/^.*\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}.*$/)
end
include_ipv4?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 122
def include_ipv4?(str)
  str.match?(/^.*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}.*$/)
end
is_fqdn?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 138
def is_fqdn?(str)
  str.match?(/^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
end
is_fqdn_port?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 142
def is_fqdn_port?(str)
  str.match?(/^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}:[0-9]{1,5}$/)
end
is_ipv4?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 126
def is_ipv4?(str)
  str.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
end
is_ipv4_port?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 130
def is_ipv4_port?(str)
  str.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:[0-9]{1,5}$/)
end
is_url?(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 146
def is_url?(str)
  str.match?(/^[a-zA-Z0-9]{2,}:\/\/.*$/)
end
sanitize_fqdn(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 158
def sanitize_fqdn(str)
  return "FQDN_"+Digest::MD5.hexdigest(@salt + str)
end
sanitize_fqdn_port(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 200
def sanitize_fqdn_port(str)
  fqdn_port = []
  str.split(":").each do |s|
    s = sanitize_fqdn(s) if is_fqdn?(s)
    fqdn_port.push(s)
  end
  return fqdn_port.join(":")
end
sanitize_fqdn_url(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 229
def sanitize_fqdn_url(str)
  fqdn_url = []
  str.split("://").each do |s|
    if s.include?("/")
      url_slash = []
      s.split("/").each do |ss|
        ss = sanitize_fqdn(ss) if is_fqdn?(ss)
        ss = sanitize_fqdn_port(ss) if is_fqdn_port?(ss)
        url_slash.push(ss)
      end
      s = url_slash.join("/")
    else
      s = sanitize_fqdn(s) if is_fqdn?(s)
      s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
    end
    fqdn_url.push(s)
  end
  return fqdn_url.join("://")
end
sanitize_fqdn_val(v) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 283
def sanitize_fqdn_val(v)
  line = []
  if v.include?(",")
    v.split(",").each do |s|
      s = subtract_quotations(s)
      if include_fqdn?(s)
        if is_url?(s)
          s = sanitize_fqdn_url(s)
        else
          s = sanitize_fqdn(s) if is_fqdn?(s)
          s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
        end
      end
      line.push(s)
    end
    return line.join(",")
  else
    v.split().each do |s|
      s = subtract_quotations(s)
      if include_fqdn?(s)
        if is_url?(s)
          s = sanitize_fqdn_url(s)
        else
          s = sanitize_fqdn(s) if is_fqdn?(s)
          s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
        end
      end
      line.push(s)
    end
    $log.debug "[pattern_fqdn] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
    return line.join(" ")
  end
end
sanitize_ipv4(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 154
def sanitize_ipv4(str)
  return "IPv4_"+Digest::MD5.hexdigest(@salt + str)
end
sanitize_ipv4_port(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 191
def sanitize_ipv4_port(str)
  ip_port = []
  str.split(":").each do |s|
    s =  sanitize_ipv4(s) if is_ipv4?(s)
    ip_port.push(s)
  end
  return ip_port.join(":")
end
sanitize_ipv4_url(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 209
def sanitize_ipv4_url(str)
  ip_url = []
  str.split("://").each do |s|
    if s.include?("/")
      url_slash = []
      s.split("/").each do |ss|
        ss = sanitize_ipv4(ss) if is_ipv4?(ss)
        ss = sanitize_ipv4_port(ss) if is_ipv4_port?(ss)
        url_slash.push(ss)
      end
      s = url_slash.join("/")
    else
      s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
      s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
    end
    ip_url.push(s)
  end
  return ip_url.join("://")
end
sanitize_ipv4_val(v) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 249
def sanitize_ipv4_val(v)
  line = []
  if v.include?(",")
    v.split(",").each do |s|
      s = subtract_quotations(s)
      if include_ipv4?(s)
        if is_url?(s)
          s = sanitize_ipv4_url(s)
        else
          s = sanitize_ipv4(s) if is_ipv4?(s)
          s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
        end
      end
      line.push(s)
    end
    return line.join(",")
  else
    v.split().each do |s|
      s = subtract_quotations(s)
      if include_ipv4?(s)
        if is_url?(s)
          s = sanitize_ipv4_url(s)
        else
          s = sanitize_ipv4(s) if is_ipv4?(s)
          s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
        end
      end
      line.push(s)
    end
    $log.debug "[pattern_ipv4] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
    return line.join(" ")
  end
end
sanitize_keyword(str, prefix) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 187
def sanitize_keyword(str, prefix)
  return prefix + "_" + Digest::MD5.hexdigest(@salt + str)
end
sanitize_keywords_val(v, keywords, prefix) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 329
def sanitize_keywords_val(v, keywords, prefix)
  line = []
  v.split().each do |vv|
    if keywords.include?(vv)
      line.push(sanitize_keyword(vv, prefix))
    else
      line.push(vv)
     end
  end
  $log.debug "[pattern_keywords] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
  return line.join(" ")
end
sanitize_regex(str, prefix, regex) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 162
def sanitize_regex(str, prefix, regex)
  if str.to_s.match?(regex)
    return prefix + "_" + Digest::MD5.hexdigest(@salt + str)
  else
    $log.debug "[pattern_regex] #{str} does not match given regex #{regex}. skip this rule."
    return str
  end
end
sanitize_regex_capture(str, prefix, regex, capture_group) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 171
def sanitize_regex_capture(str, prefix, regex, capture_group)
  if str.match?(regex)
    if str.match(regex).names.include?(capture_group)
      cg = str.match(regex)[capture_group]
      mask = prefix + "_" + Digest::MD5.hexdigest(@salt + cg)
      return str.split(cg)[0] + mask + str.split(cg)[1]
    else
       $log.debug "[pattern_regex] regex pattern matched but capture group '#{capture_group}' does not exist. Skip this rule."
       return str
    end
  else
    $log.debug "[pattern_regex] #{str} does not match given regex #{regex}. Skip this rule."
    return str
  end
end
sanitize_regex_val(v, prefix, regex) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 317
def sanitize_regex_val(v, prefix, regex)
  s = sanitize_regex(v, prefix, regex)  
  $log.debug "[pattern_regex] sanitize '#{v}' to '#{s}'" if v != s
  return s
end
sanitize_regex_val_capture(v, prefix, regex, capture_group) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 323
def sanitize_regex_val_capture(v, prefix, regex, capture_group)
  s = sanitize_regex_capture(v, prefix, regex, capture_group)
  $log.debug "[pattern_regex] sanitize '#{v}' to '#{s}'" if v != s
  return s
end
subtract_quotations(str) click to toggle source
# File lib/fluent/plugin/filter_sanitizer.rb, line 150
def subtract_quotations(str)
  str.gsub(/\\\"|\'|\"|\\\'/,'')
end