class RuboCop::ResultCache
Provides functionality for caching RuboCop
runs. @api private
Constants
- DL_EXTENSIONS
- NON_CHANGING
Attributes
@api private
Public Class Methods
# File lib/rubocop/result_cache.rb, line 81 def self.allow_symlinks_in_cache_location?(config_store) config_store.for_pwd.for_all_cops['AllowSymlinksInCacheRootDirectory'] end
# File lib/rubocop/result_cache.rb, line 75 def self.cache_root(config_store) CacheConfig.root_dir do config_store.for_pwd.for_all_cops['CacheRootDirectory'] end end
Remove old files so that the cache doesn’t grow too big. When the threshold MaxFilesInCache has been exceeded, the oldest 50% of all the files in the cache are removed. The reason for removing so much is that cleaning should be done relatively seldom, since there is a slight risk that some other RuboCop
process was just about to read the file, when there’s parallel execution and the cache is shared.
# File lib/rubocop/result_cache.rb, line 28 def self.cleanup(config_store, verbose, cache_root = nil) return if inhibit_cleanup # OPTIMIZE: For faster testing cache_root ||= cache_root(config_store) return unless File.exist?(cache_root) files, dirs = Find.find(cache_root).partition { |path| File.file?(path) } return unless requires_file_removal?(files.length, config_store) remove_oldest_files(files, dirs, cache_root, verbose) end
# File lib/rubocop/result_cache.rb, line 87 def initialize(file, team, options, config_store, cache_root = nil) cache_root ||= File.join(options[:cache_root], 'rubocop_cache') if options[:cache_root] cache_root ||= ResultCache.cache_root(config_store) @allow_symlinks_in_cache_location = ResultCache.allow_symlinks_in_cache_location?(config_store) @path = File.join(cache_root, rubocop_checksum, context_checksum(team, options), file_checksum(file, config_store)) @cached_data = CachedData.new(file) @debug = options[:debug] end
Private Class Methods
# File lib/rubocop/result_cache.rb, line 65 def remove_files(files, dirs, remove_count) # Batch file deletions, deleting over 130,000+ files will crash # File.delete. files[0, remove_count].each_slice(10_000).each do |files_slice| File.delete(*files_slice) end dirs.each { |dir| Dir.rmdir(dir) if Dir["#{dir}/*"].empty? } end
# File lib/rubocop/result_cache.rb, line 52 def remove_oldest_files(files, dirs, cache_root, verbose) # Add 1 to half the number of files, so that we remove the file if # there's only 1 left. remove_count = (files.length / 2) + 1 puts "Removing the #{remove_count} oldest files from #{cache_root}" if verbose sorted = files.sort_by { |path| File.mtime(path) } remove_files(sorted, dirs, remove_count) rescue Errno::ENOENT # This can happen if parallel RuboCop invocations try to remove the # same files. No problem. puts $ERROR_INFO if verbose end
# File lib/rubocop/result_cache.rb, line 48 def requires_file_removal?(file_count, config_store) file_count > 1 && file_count > config_store.for_pwd.for_all_cops['MaxFilesInCache'] end
Public Instance Methods
# File lib/rubocop/result_cache.rb, line 100 def debug? @debug end
# File lib/rubocop/result_cache.rb, line 108 def load puts "Loading cache from #{@path}" if debug? @cached_data.from_json(File.read(@path, encoding: Encoding::UTF_8)) end
# File lib/rubocop/result_cache.rb, line 113 def save(offenses) dir = File.dirname(@path) begin FileUtils.mkdir_p(dir) rescue Errno::EACCES, Errno::EROFS => e warn "Couldn't create cache directory. Continuing without cache.\n #{e.message}" return end preliminary_path = "#{@path}_#{rand(1_000_000_000)}" # RuboCop must be in control of where its cached data is stored. A # symbolic link anywhere in the cache directory tree can be an # indication that a symlink attack is being waged. return if symlink_protection_triggered?(dir) File.open(preliminary_path, 'w', encoding: Encoding::UTF_8) do |f| f.write(@cached_data.to_json(offenses)) end # The preliminary path is used so that if there are multiple RuboCop # processes trying to save data for the same inspected file # simultaneously, the only problem we run in to is a competition who gets # to write to the final file. The contents are the same, so no corruption # of data should occur. FileUtils.mv(preliminary_path, @path) end
# File lib/rubocop/result_cache.rb, line 104 def valid? File.exist?(@path) end
Private Instance Methods
# File lib/rubocop/result_cache.rb, line 146 def any_symlink?(path) while path != File.dirname(path) if File.symlink?(path) warn "Warning: #{path} is a symlink, which is not allowed." return true end path = File.dirname(path) end false end
We combine team and options into a single “context” checksum to avoid making file names that are too long for some filesystems to handle. This context is for anything that’s not (1) the RuboCop
executable checksum or (2) the inspected file checksum.
# File lib/rubocop/result_cache.rb, line 229 def context_checksum(team, options) keys = [team.external_dependency_checksum, relevant_options_digest(options)] Digest::SHA1.hexdigest(keys.join) end
# File lib/rubocop/result_cache.rb, line 189 def digest(path) content = if path.end_with?(*DL_EXTENSIONS) # Shared libraries often contain timestamps of when # they were compiled and other non-stable data. File.basename(path) else File.binread(path) # mtime not reliable end Zlib.crc32(content).to_s end
# File lib/rubocop/result_cache.rb, line 157 def file_checksum(file, config_store) digester = Digest::SHA1.new mode = File.stat(file).mode digester.update("#{file}#{mode}#{config_store.for_file(file).signature}") digester.file(file) digester.hexdigest rescue Errno::ENOENT # Spurious files that come and go should not cause a crash, at least not # here. '_' end
Return a hash of the options given at invocation, minus the ones that have no effect on which offenses and disabled line ranges are found, and thus don’t affect caching.
# File lib/rubocop/result_cache.rb, line 220 def relevant_options_digest(options) options = options.reject { |key, _| NON_CHANGING.include?(key) } options.to_s.gsub(/[^a-z]+/i, '_') end
The checksum of the RuboCop
program running the inspection.
# File lib/rubocop/result_cache.rb, line 174 def rubocop_checksum ResultCache.source_checksum ||= begin digest = Digest::SHA1.new rubocop_extra_features .select { |path| File.file?(path) } .sort! .each do |path| digest << digest(path) end digest << RuboCop::Version::STRING << RuboCop::AST::Version::STRING digest.hexdigest end end
# File lib/rubocop/result_cache.rb, line 200 def rubocop_extra_features lib_root = File.join(File.dirname(__FILE__), '..') exe_root = File.join(lib_root, '..', 'exe') # Make sure to use an absolute path to prevent errors on Windows # when traversing the relative paths with symlinks. exe_root = File.absolute_path(exe_root) # These are all the files we have `require`d plus everything in the # exe directory. A change to any of them could affect the cop output # so we include them in the cache hash. source_files = $LOADED_FEATURES + Find.find(exe_root).to_a source_files -= ResultCache.rubocop_required_features # Rely on gem versions source_files end
# File lib/rubocop/result_cache.rb, line 142 def symlink_protection_triggered?(path) !@allow_symlinks_in_cache_location && any_symlink?(path) end