class Jekyll::J1LunrSearch::Indexer
noinspection RubyTooManyInstanceVariablesInspection
Public Class Methods
new(config = {})
click to toggle source
Calls superclass method
# File lib/starter_web/_plugins/lunr_index.rb, line 24 def initialize(config = {}) super(config) @mode = config['environment'] @template = config['theme'] @module_path = File.join(File.dirname(__FILE__)) @module_path.slice! "_plugins" @module_config_path = File.join(@module_path, config['data_dir'], 'modules') @module_config_default = YAML::load(File.open(File.join(@module_config_path, 'defaults', 'quicksearch.yml'))) @module_config_user = YAML::load(File.open(File.join(@module_config_path,'quicksearch.yml'))) @module_config_default_settings = @module_config_default['defaults'] @module_config_user_settings = @module_config_user['settings'] @module_config = @module_config_default_settings.merge!(@module_config_user_settings) @lunr_config = { 'excludes' => [], 'strip_index_html' => false, 'min_length' => 3, 'stopwords' => 'stopwords.txt', 'fields' => { 'title' => 10, 'tagline' => 10, 'tags' => 20, 'categories' => 20, 'description' => 20, 'body' => 1 } }.merge!(@module_config || {}) @module_dir = @lunr_config['module_dir'] @index_dir = @lunr_config['index_dir'] @index_name = @lunr_config['index_name'] # calculate the module path # if NO template GEM is used (dev system), @module_path points # in the project folder, otherwise (runtime system) to the Ruby # GEM installation folder! # if @template.nil? @module_path = File.join(File.dirname(__FILE__), '../', @module_dir) else @gem_path = `bundle info --path j1-template` @module_path = File.join(@gem_path.chomp, @module_dir) end @lunr_path = File.join(@module_path, 'lunr.min.js') raise "Could not find #{@lunr_path}" unless File.exist?(@lunr_path) lunr_src = open(@lunr_path).read ctx = ExecJS.compile(lunr_src) @docs = {} @excludes = @lunr_config['excludes'] # if web host supports index.html as default doc, then # optionally exclude it from the url # @strip_index_html = @lunr_config['strip_index_html'] # @strip_categories configuration # @strip_categories = @lunr_config['strip_categories'] @stripped_categories = @strip_categories.join(',').gsub!(',', ' ') # stop word exclusion configuration # @min_length = @lunr_config['min_length'] @stopwords_file = @lunr_config['stopwords'] end
Public Instance Methods
generate(site)
click to toggle source
Index all pages except pages matching any value in config or with frontmatter settings (exclude_from_search: true)
# File lib/starter_web/_plugins/lunr_index.rb, line 101 def generate(site) @site = site index_dest = @site.instance_variable_get(:@dest) rebuild = @module_config['rebuild'] index_file = index_dest + @module_config['index_file'] if @module_config['rebuild'] == false if File.exist?(index_file) Jekyll.logger.info 'J1 QuickSearch:', 'recreate index disabled.' # Keep the index file from being cleaned by Jekyll # site.static_files << SearchIndexFile.new(site, site.dest, '/', @module_config['index_file']) return end end Jekyll.logger.info 'J1 QuickSearch:', 'creating search index ...' # gather posts and pages # items = pages_to_index(site) content_renderer = PageRenderer.new(site) # index = [] # rebuild = @module_config['rebuild'] index_js = open(@lunr_path).read # NOTE: all settings must be added within the index function # index_js << 'var idx = lunr(function() {' @lunr_config['fields'].each_pair do |name, boost| index_js << "this.field('#{name}', {'boost': #{boost}});" end items.each_with_index do |item, i| entry = SearchEntry.create(item, content_renderer) entry.strip_index_suffix_from_url! if @strip_index_html entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file) doc = { 'id' => i, 'title' => entry.title, 'tagline' => entry.tagline, 'url' => entry.url, 'date' => entry.date, 'tags' => entry.tags, 'categories' => entry.categories, 'description' => entry.description, 'is_post' => entry.is_post, 'body' => entry.body } # remove unwanted categories (if any) # doc['categories'] -= @strip_categories unless doc['categories'] == nil index_js << 'this.add(' << ::JSON.generate(doc, quirks_mode: true) << ');' # reduce the size of the doc array by deleting the body key # doc.delete('body') @docs[i] = doc end index_js << '});' filename = File.join(@index_dir, "#{@index_name}") ctx = ExecJS.compile(index_js) index = ctx.eval('JSON.stringify(idx)') total = { 'docs' => @docs, 'index' => ::JSON.parse(index, {:max_nesting => false}) } filepath = File.join(site.dest, filename) # create data path if not already exists # FileUtils.mkdir_p(File.dirname(filepath)) File.open(filepath, 'w') { |f| f.write(JSON.dump(total)) } Jekyll.logger.info 'J1 QuickSearch:', "finished, index ready." added_files = [filename] # Keep the written files from being cleaned by Jekyll # added_files.each do |fname| site.static_files << SearchIndexFile.new(site, site.dest, '/', fname) end end
Private Instance Methods
output_ext(doc)
click to toggle source
# File lib/starter_web/_plugins/lunr_index.rb, line 202 def output_ext(doc) if doc.is_a?(Jekyll::Document) Jekyll::Renderer.new(@site, doc).output_ext else doc.output_ext end end
pages_to_index(site)
click to toggle source
# File lib/starter_web/_plugins/lunr_index.rb, line 210 def pages_to_index(site) items = [] # deep copy pages and documents (all collections, including posts) # site.pages.each {|page| items << page.dup } site.documents.each {|document| items << document.dup } # process only files that will be converted to .html and only # non excluded files # items.select! {|i| i.respond_to?(:output_ext) && output_ext(i) == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } } items.reject! {|i| i.data['exclude_from_search'] } # jadams, 2012-08-04 # TODO: List of excludes should (alternatively) be taken from # quicksearch.yml configuration items end
stopwords()
click to toggle source
load the stopwords (file)
# File lib/starter_web/_plugins/lunr_index.rb, line 198 def stopwords @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip } end