class Jekyll::J1LunrSearch::Indexer

noinspection RubyTooManyInstanceVariablesInspection

Public Class Methods

new(config = {}) click to toggle source
Calls superclass method
# File lib/starter_web/_plugins/lunr_index.rb, line 24
def initialize(config = {})
  super(config)

  @mode = config['environment']
  @template = config['theme']

  @module_path = File.join(File.dirname(__FILE__))
  @module_path.slice! "_plugins"
  @module_config_path = File.join(@module_path, config['data_dir'], 'modules')

  @module_config_default = YAML::load(File.open(File.join(@module_config_path, 'defaults', 'quicksearch.yml')))
  @module_config_user = YAML::load(File.open(File.join(@module_config_path,'quicksearch.yml')))

  @module_config_default_settings = @module_config_default['defaults']
  @module_config_user_settings = @module_config_user['settings']
  @module_config = @module_config_default_settings.merge!(@module_config_user_settings)

  @lunr_config = {
    'excludes' => [],
    'strip_index_html' => false,
    'min_length' => 3,
    'stopwords' => 'stopwords.txt',
    'fields' => {
      'title' => 10,
      'tagline' => 10,
      'tags' => 20,
      'categories' => 20,
      'description' => 20,
      'body' => 1
    }
  }.merge!(@module_config || {})

  @module_dir = @lunr_config['module_dir']
  @index_dir  = @lunr_config['index_dir']
  @index_name = @lunr_config['index_name']

  # calculate the module path
  # if NO template GEM is used (dev system), @module_path points
  # in the project folder, otherwise (runtime system) to the Ruby
  # GEM installation folder!
  #
  if @template.nil?
    @module_path = File.join(File.dirname(__FILE__), '../', @module_dir)
  else
    @gem_path    = `bundle info --path j1-template`
    @module_path = File.join(@gem_path.chomp, @module_dir)
  end

  @lunr_path = File.join(@module_path, 'lunr.min.js')
  raise "Could not find #{@lunr_path}" unless File.exist?(@lunr_path)

  lunr_src = open(@lunr_path).read
  ctx = ExecJS.compile(lunr_src)

  @docs = {}
  @excludes = @lunr_config['excludes']

  # if web host supports index.html as default doc, then
  # optionally exclude it from the url
  #
  @strip_index_html = @lunr_config['strip_index_html']

  # @strip_categories configuration
  #
  @strip_categories = @lunr_config['strip_categories']
  @stripped_categories = @strip_categories.join(',').gsub!(',', ' ')

  # stop word exclusion configuration
  #
  @min_length = @lunr_config['min_length']
  @stopwords_file = @lunr_config['stopwords']

end

Public Instance Methods

generate(site) click to toggle source

Index all pages except pages matching any value in config or with frontmatter settings (exclude_from_search: true)

# File lib/starter_web/_plugins/lunr_index.rb, line 101
def generate(site)

  @site = site
  index_dest = @site.instance_variable_get(:@dest)
  rebuild = @module_config['rebuild']
  index_file = index_dest + @module_config['index_file']

  if @module_config['rebuild'] == false
    if File.exist?(index_file)
      Jekyll.logger.info 'J1 QuickSearch:', 'recreate index disabled.'
      # Keep the index file from being cleaned by Jekyll
      #
      site.static_files << SearchIndexFile.new(site, site.dest, '/', @module_config['index_file'])
      return
    end
  end

  Jekyll.logger.info 'J1 QuickSearch:', 'creating search index ...'

  # gather posts and pages
  #
  items = pages_to_index(site)
  content_renderer = PageRenderer.new(site)
  # index = []
  # rebuild = @module_config['rebuild']

  index_js = open(@lunr_path).read

  # NOTE: all settings must be added within the index function
  #
  index_js << 'var idx = lunr(function() {'

  @lunr_config['fields'].each_pair do |name, boost|
      index_js << "this.field('#{name}', {'boost': #{boost}});"
  end

  items.each_with_index do |item, i|
    entry = SearchEntry.create(item, content_renderer)

    entry.strip_index_suffix_from_url! if @strip_index_html
    entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)

    doc = {
      'id'          => i,
      'title'       => entry.title,
      'tagline'     => entry.tagline,
      'url'         => entry.url,
      'date'        => entry.date,
      'tags'        => entry.tags,
      'categories'  => entry.categories,
      'description' => entry.description,
      'is_post'     => entry.is_post,
      'body'        => entry.body
    }

    # remove unwanted categories (if any)
    #
    doc['categories'] -= @strip_categories unless doc['categories'] == nil

    index_js << 'this.add(' << ::JSON.generate(doc, quirks_mode: true) << ');'

    # reduce the size of the doc array by deleting the body key
    #
    doc.delete('body')
    @docs[i] = doc

  end
  index_js << '});'

  filename  = File.join(@index_dir, "#{@index_name}")
  ctx       = ExecJS.compile(index_js)
  index     = ctx.eval('JSON.stringify(idx)')

  total     = {
    'docs'  => @docs,
    'index' => ::JSON.parse(index, {:max_nesting => false})
  }
  filepath  = File.join(site.dest, filename)

  # create data path if not already exists
  #
  FileUtils.mkdir_p(File.dirname(filepath))
  File.open(filepath, 'w') { |f| f.write(JSON.dump(total)) }
  Jekyll.logger.info 'J1 QuickSearch:', "finished, index ready."
  added_files = [filename]

  # Keep the written files from being cleaned by Jekyll
  #
  added_files.each do |fname|
    site.static_files << SearchIndexFile.new(site, site.dest, '/', fname)
  end
end

Private Instance Methods

output_ext(doc) click to toggle source
# File lib/starter_web/_plugins/lunr_index.rb, line 202
def output_ext(doc)
  if doc.is_a?(Jekyll::Document)
    Jekyll::Renderer.new(@site, doc).output_ext
  else
    doc.output_ext
  end
end
pages_to_index(site) click to toggle source
# File lib/starter_web/_plugins/lunr_index.rb, line 210
def pages_to_index(site)
  items = []

  # deep copy pages and documents (all collections, including posts)
  #
  site.pages.each {|page| items << page.dup }
  site.documents.each {|document| items << document.dup }

  # process only files that will be converted to .html and only
  # non excluded files
  #
  items.select! {|i| i.respond_to?(:output_ext) && output_ext(i) == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
  items.reject! {|i| i.data['exclude_from_search'] }

  # jadams, 2012-08-04
  # TODO: List of excludes should (alternatively) be taken from
  #       quicksearch.yml configuration

  items
end
stopwords() click to toggle source

load the stopwords (file)

# File lib/starter_web/_plugins/lunr_index.rb, line 198
def stopwords
  @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
end