class RDF::NTriples::Reader
N-Triples parser.
@example Obtaining an NTriples
reader class
RDF::Reader.for(:ntriples) #=> RDF::NTriples::Reader RDF::Reader.for("etc/doap.nt") RDF::Reader.for(file_name: "etc/doap.nt") RDF::Reader.for(file_extension: "nt") RDF::Reader.for(content_type: "application/n-triples")
@example Parsing RDF
statements from an NTriples
file
RDF::NTriples::Reader.open("etc/doap.nt") do |reader| reader.each_statement do |statement| puts statement.inspect end end
@example Parsing RDF
statements from an NTriples
string
data = StringIO.new(File.read("etc/doap.nt")) RDF::NTriples::Reader.new(data) do |reader| reader.each_statement do |statement| puts statement.inspect end end
** RDF=star
Supports statements as resources using ‘<<(s p o)>>`.
@see www.w3.org/TR/rdf-testcases/#ntriples @see www.w3.org/TR/n-triples/
Constants
- BLANK_NODE_LABEL
- COMMENT
- DATATYPE_URI
- ECHAR
- END_OF_STATEMENT
- ESCAPE_CHARS
- ESCAPE_CHARS_ESCAPED
-
cache constants to optimize escaping the escape chars in self.unescape
- ESCAPE_CHARS_ESCAPED_REGEXP
- IRIREF
- IRI_RANGE
- LANGTAG
-
LANGTAG
is deprecated - LANG_DIR
- LITERAL
- LITERAL_PLAIN
- LITERAL_WITH_DATATYPE
- LITERAL_WITH_LANGUAGE
- NODEID
- OBJECT
- PN_CHARS
- PN_CHARS_BASE
- PN_CHARS_U
- PREDICATE
- QT_END
- QT_START
- RDF_VERSION
- STRING_LITERAL_QUOTE
- SUBJECT
- TT_END
- TT_START
- UCHAR
- UCHAR4
- UCHAR8
- URIREF
- U_CHARS1
-
Terminals from rdf-turtle.
@see www.w3.org/TR/n-triples/ @see www.w3.org/TR/turtle/
Unicode regular expressions.
- U_CHARS2
Public Class Methods
Source
# File lib/rdf/ntriples/reader.rb, line 154 def self.parse_literal(input, **options) case input when LITERAL_WITH_LANGUAGE language, direction = $4.split('--') RDF::Literal.new(unescape($1), language: language, direction: direction) when LITERAL_WITH_DATATYPE RDF::Literal.new(unescape($1), datatype: $4) when LITERAL_PLAIN RDF::Literal.new(unescape($1)) end end
(see unserialize) @return [RDF::Literal]
Source
# File lib/rdf/ntriples/reader.rb, line 135 def self.parse_node(input, **options) if input =~ NODEID RDF::Node.new($1) end end
(see unserialize) @return [RDF::Node]
Source
# File lib/rdf/ntriples/reader.rb, line 128 def self.parse_object(input, **options) parse_uri(input, **options) || parse_node(input, **options) || parse_literal(input, **options) end
(see unserialize)
Source
# File lib/rdf/ntriples/reader.rb, line 122 def self.parse_predicate(input, **options) parse_uri(input, intern: true) end
(see unserialize) @return [RDF::URI]
Source
# File lib/rdf/ntriples/reader.rb, line 115 def self.parse_subject(input, **options) parse_uri(input, **options) || parse_node(input, **options) end
(see unserialize) @return [RDF::Resource]
Source
# File lib/rdf/ntriples/reader.rb, line 145 def self.parse_uri(input, intern: false, **options) if input =~ URIREF RDF::URI.send(intern ? :intern : :new, unescape($1)) end end
(see unserialize) @param [Boolean] intern (false) Use Interned URI
@return [RDF::URI]
Source
# File lib/rdf/ntriples/reader.rb, line 187 def self.unescape(string) # Note: avoiding copying the input string when no escaping is needed # greatly reduces the number of allocations and the processing time. string = string.dup.force_encoding(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8 string .gsub(UCHAR) do [($1 || $2).hex].pack('U*') end .gsub(ESCAPE_CHARS_ESCAPED_REGEXP, ESCAPE_CHARS_ESCAPED) end
@param [String] string @return [String] @see www.w3.org/TR/rdf-testcases/#ntrip_strings @see blog.grayproductions.net/articles/understanding_m17n @see yehudakatz.com/2010/05/17/encodings-unabridged/
Source
# File lib/rdf/ntriples/reader.rb, line 105 def self.unserialize(input, **options) case input when nil then nil else self.new(input, logger: [], **options).read_value end end
Reconstructs an RDF
value from its serialized N-Triples representation.
@param [String] input @param [{Symbol => Object}] options
From {RDF::Reader#initialize}
@option options [RDF::Util::Logger] :logger ([]) @return [RDF::Term]
Public Instance Methods
Source
# File lib/rdf/ntriples/reader.rb, line 263 def read_comment match(COMMENT) end
@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (comment)
Source
# File lib/rdf/ntriples/reader.rb, line 334 def read_eos match(END_OF_STATEMENT) end
@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (triple)
Source
# File lib/rdf/ntriples/reader.rb, line 295 def read_literal if literal_str = match(LITERAL_PLAIN) literal_str = self.class.unescape(literal_str) literal = case when lang_dir = match(LANG_DIR) language, direction = lang_dir.split('--') raise ArgumentError if direction && !@options[:rdfstar] log_warn("Literal base direction used with version #{version}") if version && version == "1.1" RDF::Literal.new(literal_str, language: language, direction: direction) when datatype = match(/^(\^\^)/) # FIXME RDF::Literal.new(literal_str, datatype: read_uriref || fail_object) else RDF::Literal.new(literal_str) # plain string literal end literal.validate! if validate? literal.canonicalize! if canonicalize? literal end rescue ArgumentError v = literal_str v += "@#{lang_dir}" if lang_dir log_error("Invalid Literal (found: \"#{v}\")", lineno: lineno, token: v, exception: RDF::ReaderError) end
@return [RDF::Literal] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (literal)
Source
# File lib/rdf/ntriples/reader.rb, line 285 def read_node if node_id = match(NODEID) @nodes ||= {} @nodes[node_id] ||= RDF::Node.new(node_id) end end
@return [RDF::Node] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (nodeID)
Source
# File lib/rdf/ntriples/reader.rb, line 214 def read_triple loop do readline.strip! # EOFError thrown on end of input line = @line # for backtracking input in case of parse error begin if blank? || read_comment # No-op elsif version = read_version @options[:version] = version else subject = read_uriref || read_node || fail_subject predicate = read_uriref(intern: true) || fail_predicate object = read_uriref || read_node || read_literal || read_tripleTerm || fail_object if validate? && !read_eos log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end spo = [subject, predicate, object] # Only return valid triples if validating return spo if !validate? || spo.all?(&:valid?) end rescue RDF::ReaderError => e @line = line # this allows #read_value to work raise e end end end
@return [Array] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar
Source
# File lib/rdf/ntriples/reader.rb, line 245 def read_tripleTerm if @options[:rdfstar] && match(TT_START) if version && version != "1.2" log_warn("Triple term used with version #{version}") end subject = read_uriref || read_node || fail_subject predicate = read_uriref(intern: true) || fail_predicate object = read_uriref || read_node || read_literal || read_tripleTerm || fail_object if !match(TT_END) log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end RDF::Statement.new(subject, predicate, object, tripleTerm: true) end end
@return [RDF::Statement]
Source
# File lib/rdf/ntriples/reader.rb, line 271 def read_uriref(intern: false, **options) if uri_str = match(URIREF) uri_str = self.class.unescape(uri_str) uri = RDF::URI.send(intern? && intern ? :intern : :new, uri_str, canonicalize: canonicalize?) uri.validate! if validate? uri end rescue ArgumentError log_error("Invalid URI (found: \"<#{uri_str}>\")", lineno: lineno, token: "<#{uri_str}>", exception: RDF::ReaderError) end
@param [Boolean] intern (false) Use Interned Node
@return [RDF::URI] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (uriref)
Source
# File lib/rdf/ntriples/reader.rb, line 201 def read_value begin read_statement rescue RDF::ReaderError value = read_uriref || read_node || read_literal || read_tripleTerm log_recover value end end
@return [RDF::Term]
Source
# File lib/rdf/ntriples/reader.rb, line 321 def read_version if match(RDF_VERSION) ver_tok = match(LITERAL_PLAIN) unless RDF::Format::VERSIONS.include?(ver_tok) log_warn("Expected version to be one of #{RDF::Format::VERSIONS.join(', ')}, was #{ver_tok}") end ver_tok end end
@return [String]