class HTMLPipeline
Constants
- VERSION
Attributes
Public: Default instrumentation service for new pipeline objects.
Public: String name for this Pipeline. Defaults to Class name.
Public: Instrumentation service for the pipeline. Set an ActiveSupport::Notifications compatible object to enable.
Public: Returns an Array of Filter
objects for this Pipeline.
Public: A hash representing the sanitization configuration settings
Public: Returns an Array of Filter
objects for this Pipeline.
Public Class Methods
Source
# File lib/html_pipeline.rb, line 86 def define_dependency_loaded_method(name, value) self.class.define_method(:"#{name}_loaded?", -> { value }) end
Source
# File lib/html_pipeline.rb, line 114 def initialize(text_filters: [], convert_filter: nil, sanitization_config: SanitizationFilter::DEFAULT_CONFIG, node_filters: [], default_context: {}, result_class: Hash) raise ArgumentError, "default_context cannot be nil" if default_context.nil? @text_filters = text_filters.flatten.freeze || [] validate_filters(@text_filters, HTMLPipeline::TextFilter) @node_filters = node_filters.flatten.freeze || [] validate_filters(@node_filters, HTMLPipeline::NodeFilter) @convert_filter = convert_filter if @convert_filter.nil? && !@text_filters.empty? && !@node_filters.empty? raise InvalidFilterError, "Must provide `convert_filter` if `text_filters` and `node_filters` are also provided" elsif !@convert_filter.nil? validate_filter(@convert_filter, HTMLPipeline::ConvertFilter) end @sanitization_config = sanitization_config.nil? ? nil : Selma::Sanitizer.new(sanitization_config) @default_context = default_context.freeze @instrumentation_service = self.class.default_instrumentation_service end
Source
# File lib/html_pipeline.rb, line 45 def optional_dependency(name, requirer) require name rescue LoadError # rubocop:disable Lint/SuppressedException: end
Source
# File lib/html_pipeline.rb, line 57 def require_dependencies(names, requirer) dependency_list = names.dup loaded = false while !loaded && names.length > 1 name = names.shift begin require_dependency(name, requirer) loaded = true # we got a dependency define_dependency_loaded_method(name, true) # try the next dependency rescue MissingDependencyError define_dependency_loaded_method(name, false) end end return if loaded begin name = names.shift require name define_dependency_loaded_method(name, true) rescue LoadError => e raise MissingDependencyError, "Missing all dependencies '#{dependency_list.join(", ")}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}" end end
Source
# File lib/html_pipeline.rb, line 50 def require_dependency(name, requirer) require name rescue LoadError => e raise MissingDependencyError, "Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}" end
Public Instance Methods
Source
# File lib/html_pipeline.rb, line 149 def call(text, context: {}, result: {}) context = @default_context.merge(context) context = context.freeze result ||= {} if @text_filters.any? payload = default_payload({ text_filters: @text_filters.map { |f| f.class.name }, context:, result:, }) instrument("call_text_filters.html_pipeline", payload) do result[:output] = @text_filters.inject(text) do |doc, filter| perform_filter(filter, doc, context: (filter.context || {}).merge(context), result:) end end end text = result[:output] || text html = if @convert_filter.nil? text else instrument("call_convert_filter.html_pipeline", payload) do html = @convert_filter.call(text, context: (@convert_filter.context || {}).merge(context)) end end rewriter_options = { memory: { max_allowed_memory_usage: 5242880, # arbitrary limit of 5MB }, } if @node_filters.empty? && !@sanitization_config.nil? instrument("sanitization.html_pipeline", payload) do result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, options: rewriter_options).rewrite(html) end elsif @node_filters.any? instrument("call_node_filters.html_pipeline", payload) do @node_filters.each { |filter| filter.context = (filter.context || {}).merge(context) } result[:output] = Selma::Rewriter.new(sanitizer: @sanitization_config, handlers: @node_filters, options: rewriter_options).rewrite(html) payload = default_payload({ node_filters: @node_filters.map { |f| f.class.name }, context:, result:, }) end end result = result.merge(@node_filters.collect(&:result).reduce({}, :merge)) @node_filters.each(&:reset!) result end
Apply all filters in the pipeline to the given HTML.
html - A UTF-8 String comprised of HTML. context - The context hash passed to each filter. See the Filter
docs
for more info on possible values. This object MUST NOT be modified in place by filters. Use the Result for passing state back.
result - The result Hash passed to each filter for modification. This
is where Filters store extracted information from the content.
Returns the result Hash after being filtered by this Pipeline. Contains an :output key with the String HTML markup based on the output of the last filter in the pipeline.
Source
# File lib/html_pipeline.rb, line 259 def default_payload(payload = {}) { pipeline: instrumentation_name }.merge(payload) end
Internal: Default payload for instrumentation.
Accepts a Hash of additional payload data to be merged.
Returns a Hash.
Source
# File lib/html_pipeline.rb, line 247 def instrument(event, payload = {}, &block) payload ||= default_payload return yield(payload) unless instrumentation_service instrumentation_service.instrument(event, payload, &block) end
Internal: if the ‘instrumentation_service` object is set, instruments the block, otherwise the block is ran without instrumentation.
Returns the result of the provided block.
Source
# File lib/html_pipeline.rb, line 103 def instrumentation_name return @instrumentation_name if defined?(@instrumentation_name) @instrumentation_name = self.class.name end
Source
# File lib/html_pipeline.rb, line 211 def perform_filter(filter, doc, context: {}, result: {}) payload = default_payload({ filter: filter.class.name, context:, result:, }) instrument("call_filter.html_pipeline", payload) do filter.call(doc, context:, result:) end end
Internal: Applies a specific filter to the supplied doc.
The filter is instrumented.
Returns the result of the filter.
Source
# File lib/html_pipeline.rb, line 237 def setup_instrumentation(name, service: nil) self.instrumentation_name = name self.instrumentation_service = service || self.class.default_instrumentation_service end
Public: setup instrumentation for this pipeline.
Returns nothing.
Source
# File lib/html_pipeline.rb, line 224 def to_html(input, context: {}, result: {}) result = call(input, context:, result:) output = result[:output] if output.respond_to?(:to_html) output.to_html else output.to_s end end
Like call but guarantee the value returned is a string of HTML markup.
Private Instance Methods
Source
# File lib/html_pipeline.rb, line 280 def correctly_ancestored?(filter, klass) if filter.respond_to?(:ancestors) filter.ancestors.include?(klass) else filter.class.ancestors.include?(klass) end end
Source
# File lib/html_pipeline.rb, line 263 def validate_filter(filter, klass) unless correctly_ancestored?(filter, klass) raise InvalidFilterError, "Filter must inherit from `#{klass}`; #{filter} does not" end end
Source
# File lib/html_pipeline.rb, line 269 def validate_filters(filters, klass) return if filters.nil? || filters.empty? invalid_filters = filters.reject { |f| correctly_ancestored?(f, klass) } unless invalid_filters.empty? verb = invalid_filters.count == 1 ? "does" : "do" raise InvalidFilterError, "All filters must inherit from `#{klass}`; #{invalid_filters.join(", ")} #{verb} not" end end