# File lib/rdbms_sampler/table_sample.rb, line 151 def primary_keys quoted_schema = @connection.quote @schema quoted_table = @connection.quote @table sql = <<SQL SELECT column_name FROM information_schema.key_column_usage WHERE constraint_name = 'PRIMARY' AND table_schema = #{quoted_schema} AND table_name = #{quoted_table} SQL @connection.execute(sql).map do |row| row.first end end
class RdbmsSampler::TableSample
Attributes
pending_dependencies[R]
Public Class Methods
new(connection, schema_name, table_name, size = 1000)
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 11 def initialize(connection, schema_name, table_name, size = 1000) @schema = schema_name @table = table_name @connection = connection @size = size @pending_dependencies = Set.new @sample = Set.new @sampled = false @sampled_ids = Set.new end
Public Instance Methods
add(row)
click to toggle source
Add a row to the table sample. Returns number of new dependencies introduced.
# File lib/rdbms_sampler/table_sample.rb, line 57 def add(row) return 0 unless @sample.add? row @sampled_ids.add row['id'] if row['id'] dependencies_for(row).collect { |dep| 1 if @pending_dependencies.add?(dep) }.compact.sum end
ensure_referential_integrity(sample)
click to toggle source
@param [Sample] sample
# File lib/rdbms_sampler/table_sample.rb, line 66 def ensure_referential_integrity(sample) dependencies_in_progress = @pending_dependencies @pending_dependencies = Set.new dependencies_in_progress.map { |dependency| dependency_sample = sample.table_sample_for_dependency(dependency) dependency_sample.fulfil(dependency) }.compact.sum end
fulfil(dependency)
click to toggle source
Add the given dependency to the sample @param [Dependency] dependency
# File lib/rdbms_sampler/table_sample.rb, line 37 def fulfil(dependency) return 0 if fulfilled?(dependency) quoted_column = @connection.quote_column_name dependency.child_key quoted_value = @connection.quote dependency.value sql = "SELECT * FROM #{quoted_name} WHERE #{quoted_column} = #{quoted_value}" row = @connection.select_one(sql) raise "Could not fulfil #{dependency} using query [#{sql}]" if row.nil? add row end
fulfilled?(dependency)
click to toggle source
@param [Dependency] dependency
# File lib/rdbms_sampler/table_sample.rb, line 48 def fulfilled?(dependency) # FIXME: Only handles `id` column return false if dependency.child_key != 'id' @sampled_ids.include?(dependency.value) end
identifier()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 31 def identifier "#{@schema}.#{@table}" end
quoted_name()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 92 def quoted_name @connection.quote_table_name(@schema)+'.'+@connection.quote_table_name(@table) end
sample!()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 22 def sample! fetch(@size) unless @sampled @sample end
size()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 27 def size @sampled ? @sample.size : @size end
to_sql()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 75 def to_sql ret = "\n-- Sample from #{quoted_name} (#{@sample.count} rows)\n" unless @sample.empty? quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col } # INSERT in batches to reduce the likelihood of hitting `max_allowed_packet` @sample.each_slice(250) do |rows| values = rows.collect { |row| row.values.map { |val| @connection.quote(val) } * ',' } * "),\n (" ret << "INSERT INTO #{quoted_name} \n (#{quoted_cols * ','}) \nVALUES \n (#{values});\n" end end ret end
Protected Instance Methods
dependencies_for(row)
click to toggle source
@param [Array] row
# File lib/rdbms_sampler/table_sample.rb, line 119 def dependencies_for(row) foreign_keys.collect { |fk| dependency_for(fk, row) }.compact end
dependency_for(fk, row)
click to toggle source
@param [ForeignKey] fk @param [Array] row
# File lib/rdbms_sampler/table_sample.rb, line 112 def dependency_for(fk, row) unless (value = row[fk.key]).nil? Dependency.new(fk.schema, fk.table, fk.key, fk.referenced_schema, fk.referenced_table, fk.referenced_key, value) end end
discover_foreign_keys()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 127 def discover_foreign_keys quoted_schema = @connection.quote @schema quoted_table = @connection.quote @table sql = <<SQL SELECT fk.constraint_name, fk.table_schema, fk.table_name, fk.column_name, fk.referenced_table_schema, fk.referenced_table_name, fk.referenced_column_name FROM information_schema.key_column_usage fk WHERE fk.referenced_column_name IS NOT NULL AND fk.table_schema = #{quoted_schema} AND fk.table_name = #{quoted_table} SQL @connection.execute(sql).map do |row| ForeignKey.new(*row) end end
fetch(count = 1000)
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 98 def fetch(count = 1000) sql = "SELECT * FROM #{quoted_name}" unless (pks = self.primary_keys).count == 0 order_by = @connection.quote_column_name(pks.first) sql += " ORDER BY #{order_by} DESC" end sql += " LIMIT #{count}" warn " Sampling #{count} rows from #{quoted_name}..." @connection.select_all(sql).each { |row| add(row) } @sampled = true end
foreign_keys()
click to toggle source
# File lib/rdbms_sampler/table_sample.rb, line 123 def foreign_keys @fks ||= discover_foreign_keys end
primary_keys()
click to toggle source