class RDF::Reader
The base class for RDF
parsers.
@example Loading an RDF
reader implementation
require 'rdf/ntriples'
@example Iterating over known RDF
reader classes
RDF::Reader.each { |klass| puts klass.name }
@example Obtaining an RDF
reader class
RDF::Reader.for(:ntriples) #=> RDF::NTriples::Reader RDF::Reader.for("etc/doap.nt") RDF::Reader.for(file_name: "etc/doap.nt") RDF::Reader.for(file_extension: "nt") RDF::Reader.for(content_type: "application/n-triples")
@example Instantiating an RDF
reader class
RDF::Reader.for(:ntriples).new($stdin) { |reader| ... }
@example Parsing RDF
statements from a file
RDF::Reader.open("etc/doap.nt") do |reader| reader.each_statement do |statement| puts statement.inspect end end
@example Parsing RDF
statements from a string
data = StringIO.new(File.read("etc/doap.nt")) RDF::Reader.for(:ntriples).new(data) do |reader| reader.each_statement do |statement| puts statement.inspect end end
@abstract @see RDF::Format
@see RDF::Writer
Attributes
Any additional options for this reader.
@return [Hash] @since 0.3.0
Public Class Methods
Source
# File lib/rdf/reader.rb, line 53 def self.each(&block) RDF::Format.map(&:reader).reject(&:nil?).each(&block) end
Enumerates known RDF
reader classes.
@yield [klass] @yieldparam [Class] klass @return [Enumerator]
Source
# File lib/rdf/reader.rb, line 91 def self.for(*arg, &block) case arg.length when 0 then arg = nil when 1 then arg = arg.first else raise ArgumentError, "Format.for accepts zero or one argument, got #{arg.length}." end arg = arg.merge(has_reader: true) if arg.is_a?(Hash) if format = self.format || Format.for(arg, &block) format.reader end end
Finds an RDF
reader class based on the given criteria.
If the reader class has a defined format, use that.
@overload for(format)
Finds an RDF reader class based on a symbolic name. @param [Symbol] format @return [Class]
@overload for(filename)
Finds an RDF reader class based on a file name. @param [String] filename @return [Class]
@overload for(options = {})
Finds an RDF reader class based on various options. @param [Hash{Symbol => Object}] options @option options [String, #to_s] :file_name (nil) @option options [Symbol, #to_sym] :file_extension (nil) @option options [String, #to_s] :content_type (nil) @return [Class] @option options [String] :sample (nil) A sample of input used for performing format detection. If we find no formats, or we find more than one, and we have a sample, we can perform format detection to find a specific format to use, in which case we pick the first one we find @return [Class] @yieldreturn [String] another way to provide a sample, allows lazy for retrieving the sample.
@return [Class]
Source
# File lib/rdf/reader.rb, line 108 def self.format(klass = nil) if klass.nil? Format.each do |format| if format.reader == self return format end end nil # not found end end
Retrieves the RDF
serialization format class for this reader class.
@return [Class]
Source
# File lib/rdf/reader.rb, line 664 def self.inherited(child) @@subclasses << child super end
@private @return [void]
Source
# File lib/rdf/reader.rb, line 298 def initialize(input = $stdin, base_uri: nil, canonicalize: false, encoding: Encoding::UTF_8, intern: true, prefixes: Hash.new, rdfstar: false, validate: false, **options, &block) base_uri ||= input.base_uri if input.respond_to?(:base_uri) @options = options.merge({ base_uri: base_uri, canonicalize: canonicalize, encoding: encoding, intern: intern, prefixes: prefixes, rdfstar: rdfstar, validate: validate }) # The rdfstar option implies version 1.2, but can be overridden @options[:version] ||= "1.2" if @options[:rdfstar] unless self.version.nil? || RDF::Format::VERSIONS.include?(self.version) log_error("Expected version to be one of #{RDF::Format::VERSIONS.join(', ')}, was #{self.version}") end @input = case input when String then StringIO.new(input) else input end if block_given? case block.arity when 0 then instance_eval(&block) else block.call(self) end end end
Initializes the reader.
@param [IO, File, String] input
the input stream to read
@param [#to_s] base_uri
(nil)
the base URI to use when resolving relative URIs (not supported by all readers)
@param [Boolean] canonicalize (false)
whether to canonicalize parsed URIs and Literals.
@param [Encoding] encoding (Encoding::UTF_8)
the encoding of the input stream
@param [Boolean] intern (true)
whether to intern all parsed URIs
@param [Boolean] rdfstar (false)
Preliminary support for RDF 1.2.
@param [Hash] prefixes (Hash.new)
the prefix mappings to use (not supported by all readers)
@param [Hash{Symbol => Object}] options
any additional options
@param [Boolean] validate (false)
whether to validate the parsed statements and values
@option options [String] :version
Parse a specific version of RDF ("1.1', "1.2", or "1.2-basic"")
@yield [reader] ‘self` @yieldparam [RDF::Reader] reader @yieldreturn [void] ignored
Source
# File lib/rdf/reader.rb, line 215 def self.open(filename, format: nil, **options, &block) # If we're the abstract reader, and we can figure out a concrete reader from format, use that. if self == RDF::Reader && format && reader = self.for(format) return reader.open(filename, format: format, **options, &block) end # If we are a concrete reader class or format is not nil, set accept header from our content_types. unless self == RDF::Reader headers = (options[:headers] ||= {}) headers['Accept'] ||= (self.format.accept_type + %w(*/*;q=0.1)).join(", ") end Util::File.open_file(filename, **options) do |file| format_options = options.dup format_options[:content_type] ||= file.content_type if file.respond_to?(:content_type) && !file.content_type.to_s.include?('text/plain') format_options[:file_name] ||= filename reader = if self == RDF::Reader # We are the abstract reader class, find an appropriate reader self.for(format || format_options) do # Return a sample from the input file sample = file.read(1000) file.rewind sample end else # We are a concrete reader class self end options[:encoding] ||= file.encoding if file.respond_to?(:encoding) options[:filename] ||= filename if reader reader.new(file, **options, &block) else raise FormatError, "unknown RDF format: #{format_options.inspect}#{"\nThis may be resolved with a require of the 'linkeddata' gem." unless Object.const_defined?(:LinkedData)}" end end end
Parses input from the given file name or URL.
@note A reader returned via this method may not be readable depending on the processing model of the specific reader, as the file is only open during the scope of ‘open`. The reader is intended to be accessed through a block.
@example Parsing RDF
statements from a file
RDF::Reader.open("etc/doap.nt") do |reader| reader.each_statement do |statement| puts statement.inspect end end
@param [String, to_s] filename @param [Symbol] format @param [Hash{Symbol => Object}] options
any additional options (see {RDF::Util::File.open_file}, {RDF::Reader#initialize} and {RDF::Format.for})
@yield [reader] @yieldparam [RDF::Reader] reader @yieldreturn [void] ignored @raise [RDF::FormatError] if no reader found for the specified format
Source
# File lib/rdf/reader.rb, line 122 def self.options [ RDF::CLI::Option.new( symbol: :base_uri, control: :url, datatype: RDF::URI, on: ["--uri URI"], description: "Base URI of input file, defaults to the filename.") {|arg| RDF::URI(arg)}, RDF::CLI::Option.new( symbol: :canonicalize, datatype: TrueClass, on: ["--canonicalize"], control: :checkbox, default: false, description: "Canonicalize URI/literal forms") {true}, RDF::CLI::Option.new( symbol: :encoding, datatype: Encoding, control: :text, on: ["--encoding ENCODING"], description: "The encoding of the input stream.") {|arg| Encoding.find arg}, RDF::CLI::Option.new( symbol: :intern, datatype: TrueClass, control: :none, on: ["--intern"], description: "Intern all parsed URIs."), RDF::CLI::Option.new( symbol: :prefixes, datatype: Hash, control: :none, multiple: true, on: ["--prefixes PREFIX:URI,PREFIX:URI"], description: "A comma-separated list of prefix:uri pairs.") do |arg| arg.split(',').inject({}) do |memo, pfxuri| pfx,uri = pfxuri.split(':', 2) memo.merge(pfx.to_sym => RDF::URI(uri)) end end, RDF::CLI::Option.new( symbol: :rdfstar, datatype: TrueClass, control: :checkbox, on: ["--rdfstar"], description: "Parse RDF-star for preliminary RDF 1.2 support."), RDF::CLI::Option.new( symbol: :validate, datatype: TrueClass, control: :checkbox, on: ["--[no-]validate"], description: "Validate on input and output."), RDF::CLI::Option.new( symbol: :verifySSL, datatype: TrueClass, default: true, control: :checkbox, on: ["--[no-]verifySSL"], description: "Verify SSL results on HTTP GET"), RDF::CLI::Option.new( symbol: :version, control: :select, datatype: RDF::Format::VERSIONS, # 1.1, 1.2, or 1.2-basic on: ["--version VERSION"], description: "RDF Version."), ] end
Options suitable for automatic Reader
provisioning. @return [Array<RDF::CLI::Option>]
Source
# File lib/rdf/reader.rb, line 260 def self.to_sym self.format.to_sym end
Returns a symbol appropriate to use with RDF::Reader.for()
@return [Symbol]
Public Instance Methods
Source
# File lib/rdf/reader.rb, line 355 def base_uri RDF::URI(@options[:base_uri]) if @options[:base_uri] end
Returns the base URI
determined by this reader.
@example
reader.base_uri #=> RDF::URI('http://example.com/')
@return [RDF::URI] @since 0.3.0
Source
# File lib/rdf/reader.rb, line 644 def canonicalize? @options[:canonicalize] end
Returns ‘true` if parsed values should be in canonical form.
@note This is for term canonicalization, for graph/dataset canonicalization use ‘RDF::Normalize`.
@return [Boolean] ‘true` or `false` @since 0.3.0
Source
# File lib/rdf/reader.rb, line 513 def close @input.close unless @input.closed? end
Closes the input stream, after which an ‘IOError` will be raised for further read attempts.
If the input stream is already closed, does nothing.
@return [void] @since 0.2.2 @see ruby-doc.org/core-2.2.2/IO.html#method-i-close
Source
# File lib/rdf/reader.rb, line 442 def each_statement(&block) if block_given? begin loop do st = read_statement block.call(st) unless st.nil? end rescue EOFError rewind rescue nil end end enum_for(:each_statement) end
Iterates the given block for each RDF
statement.
If no block was given, returns an enumerator.
Statements are yielded in the order that they are read from the input stream.
@overload each_statement
@yield [statement] each statement @yieldparam [RDF::Statement] statement @yieldreturn [void] ignored @return [void]
@overload each_statement
@return [Enumerator]
@return [void] @raise [RDF::ReaderError] on invalid data @see RDF::Enumerable#each_statement
Source
# File lib/rdf/reader.rb, line 479 def each_triple(&block) if block_given? begin loop do triple = read_triple block.call(*triple) unless triple.nil? end rescue EOFError rewind rescue nil end end enum_for(:each_triple) end
Iterates the given block for each RDF
triple.
If no block was given, returns an enumerator.
Triples are yielded in the order that they are read from the input stream.
@overload each_triple
@yield [subject, predicate, object] each triple @yieldparam [RDF::Resource] subject @yieldparam [RDF::URI] predicate @yieldparam [RDF::Term] object @yieldreturn [void] ignored @return [void]
@overload each_triple
@return [Enumerator]
@return [void] @see RDF::Enumerable#each_triple
Source
# File lib/rdf/reader.rb, line 617 def encoding case @options[:encoding] when String, Symbol Encoding.find(@options[:encoding].to_s) when Encoding @options[:encoding] else @options[:encoding] ||= Encoding.find(self.class.format.content_encoding.to_s) end end
Returns the encoding of the input stream.
@return [Encoding]
Source
# File lib/rdf/reader.rb, line 653 def intern? @options[:intern] end
Returns ‘true` if parsed URIs should be interned.
@return [Boolean] ‘true` or `false` @since 0.3.0
Source
# File lib/rdf/reader.rb, line 521 def lineno @input.lineno end
Current line number being processed. For formats that can associate generated {Statement} with a particular line number from input, this value reflects that line number. @return [Integer]
Source
# File lib/rdf/reader.rb, line 403 def prefix(name, uri = nil) name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym) uri.nil? ? prefixes[name] : prefixes[name] = uri end
Defines the given named URI
prefix for this reader.
@example Defining a URI
prefix
reader.prefix :dc, RDF::URI('http://purl.org/dc/terms/')
@example Returning a URI
prefix
reader.prefix(:dc) #=> RDF::URI('http://purl.org/dc/terms/')
@overload prefix(name, uri)
@param [Symbol, #to_s] name @param [RDF::URI, #to_s] uri
@overload prefix(name)
@param [Symbol, #to_s] name
@return [RDF::URI]
Source
Source
# File lib/rdf/reader.rb, line 382 def prefixes=(prefixes) @options[:prefixes] = prefixes end
Defines the given URI
prefixes for this reader.
@example
reader.prefixes = { dc: RDF::URI('http://purl.org/dc/terms/'), }
@param [Hash{Symbol => RDF::URI
}] prefixes @return [Hash{Symbol => RDF::URI
}] @since 0.3.0
Source
# File lib/rdf/reader.rb, line 499 def rewind @input.rewind end
Rewinds the input stream to the beginning of input.
@return [void] @since 0.2.3 @see ruby-doc.org/core-2.2.2/IO.html#method-i-rewind
Source
# File lib/rdf/reader.rb, line 267 def to_sym self.class.to_sym end
Returns a symbol appropriate to use with RDF::Reader.for()
@return [Symbol]
Source
# File lib/rdf/reader.rb, line 540 def valid? super && !log_statistics[:error] rescue ArgumentError, RDF::ReaderError => e log_error(e.message + " at #{e.backtrace.first}") false end
@return [Boolean]
@note this parses the full input and is valid only in the reader block.
Use `Reader.new(input, validate: true)` if you intend to capture the result.
@example Parsing RDF
statements from a file
RDF::NTriples::Reader.new("!!invalid input??") do |reader| reader.valid? # => false end
@see RDF::Value#validate!
for Literal
& URI
validation relevant to
error handling.
@see Enumerable#valid?
Source
# File lib/rdf/reader.rb, line 633 def validate? @options[:validate] end
Returns ‘true` if parsed statements and values should be validated.
@return [Boolean] ‘true` or `false` @since 0.3.0
Source
# File lib/rdf/reader.rb, line 417 def version @options[:version] end
Returns the RDF
version determined by this reader.
@example
reader.version #=> "1.2"
@return [String] @since 3.3.4
Protected Instance Methods
Source
# File lib/rdf/reader.rb, line 600 def each_pg_statement(statement, &block) if statement.subject.is_a?(Statement) block.call(statement.subject) each_pg_statement(statement.subject, &block) end if statement.object.is_a?(Statement) block.call(statement.object) each_pg_statement(statement.object, &block) end end
Recursively emit embedded statements in Property Graph
mode
@param [RDF::Statement] statement
Source
# File lib/rdf/reader.rb, line 592 def fail_object log_error("Expected object (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end
Raises an “expected object” parsing error on the current line.
@return [void] @raise [RDF::ReaderError]
Source
# File lib/rdf/reader.rb, line 583 def fail_predicate log_error("Expected predicate (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end
Raises an “expected predicate” parsing error on the current line.
@return [void] @raise [RDF::ReaderError]
Source
# File lib/rdf/reader.rb, line 574 def fail_subject log_error("Expected subject (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end
Raises an “expected subject” parsing error on the current line.
@return [void] @raise [RDF::ReaderError]
Source
# File lib/rdf/reader.rb, line 555 def read_statement Statement.new(*read_triple) end
Reads a statement from the input stream.
@return [RDF::Statement] a statement @raise [NotImplementedError] unless implemented in subclass @abstract
Source
# File lib/rdf/reader.rb, line 565 def read_triple raise NotImplementedError, "#{self.class}#read_triple" # override in subclasses end
Reads a triple from the input stream.
@return [Array(RDF::Term
)] a triple @raise [NotImplementedError] unless implemented in subclass @abstract
Private Instance Methods
Source
# File lib/rdf/reader.rb, line 705 def blank? @line.nil? || @line.empty? end
@return [Boolean]
Source
# File lib/rdf/reader.rb, line 672 def current_line @line end
@private @return [String] The most recently read line of the input
Source
# File lib/rdf/reader.rb, line 712 def match(pattern) if @line =~ pattern result, @line = $1, $'.lstrip result || true end end
@param [Regexp] pattern @return [Object]
Source
# File lib/rdf/reader.rb, line 678 def readline @line = instance_variable_defined?(:@line_rest) && @line_rest || @input.readline @line, @line_rest = @line.split("\r", 2) @line = String.new if @line.nil? # not frozen @line.chomp! begin @line.encode!(encoding) rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError, Encoding::ConverterNotFoundError # It is likely the persisted line was not encoded on initial write # (i.e. persisted via RDF <= 1.0.9 and read via RDF >= 1.0.10) # # Encoding::UndefinedConversionError is raised by MRI. # Encoding::InvalidByteSequenceError is raised by jruby >= 1.7.5 # Encoding::ConverterNotFoundError is raised by jruby < 1.7.5 @line.force_encoding(encoding) end @line end
@return [String]