class CBETA::XMLDocument
Constants
- PASS
Attributes
doc[R]
Public Class Methods
new(string_or_io)
click to toggle source
# File lib/cbeta/xml_document.rb, line 8 def initialize(string_or_io) @doc = Nokogiri::XML(string_or_io) @doc.remove_namespaces! @gaiji = CBETA::Gaiji.new end
Public Instance Methods
to_text()
click to toggle source
# File lib/cbeta/xml_document.rb, line 14 def to_text @format = 'text' @gaiji_norm = [true] @next_line_buf = '' traverse(@doc.root) end
Private Instance Methods
e_anchor(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 23 def e_anchor(e) if e.has_attribute?('type') if e['type'] == 'circle' return '◎' end end '' end
e_app(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 33 def e_app(e) traverse(e) end
e_body(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 37 def e_body(e) traverse(e) end
e_byline(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 41 def e_byline(e) traverse(e) + "\n" end
e_caesura(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 45 def e_caesura(e) ' ' end
e_caption(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 49 def e_caption(e) traverse(e) + "\n" end
e_cell(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 53 def e_cell(e) traverse(e) + "\n" end
e_cit(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 57 def e_cit(e) traverse(e) end
e_closer(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 61 def e_closer(e) traverse(e) + "\n" end
e_corr(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 65 def e_corr(e) traverse(e) end
e_date(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 69 def e_date(e) traverse(e) end
e_dialog(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 73 def e_dialog(e) traverse(e) end
e_div(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 77 def e_div(e) traverse(e) end
e_docAuthor(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 81 def e_docAuthor(e) traverse(e) end
e_docNumber(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 85 def e_docNumber(e) traverse(e) + "\n" end
e_event(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 89 def e_event(e) traverse(e) + "\n" end
e_figDesc(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 99 def e_figDesc(e) traverse(e) + "\n" end
e_figure(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 93 def e_figure(e) r = traverse(e) r << "\n" unless r.empty? r end
e_foreign(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 103 def e_foreign(e) return '' if e.key?('place') and e['place'].include?('foot') traverse(e) end
e_g(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 108 def e_g(e) if @gaiji_norm.last cb_priority = %w(uni_char norm_uni_char norm_big5_char composition) else cb_priority = %w(uni_char composition) end gid = e['ref'].delete_prefix('#') unless @gaiji.key?(gid) raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}" end @gaiji.to_s(gid, cb_priority:) end
e_head(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 124 def e_head(e) r = traverse(e) r << "\n" unless r.empty? r end
e_hi(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 130 def e_hi(e) traverse(e) end
e_item(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 134 def e_item(e) r = "\n" list_level = e.xpath('ancestor::list').size r << ' ' * (list_level - 1) r << traverse(e) if e.key? 'n' r = e['n'] + r end r end
e_jhead(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 146 def e_jhead(e) traverse(e) end
e_juan(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 150 def e_juan(e) traverse(e) + "\n" end
e_l(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 154 def e_l(e) r = traverse(e) r << "\n" unless @lg_type == 'abnormal' r end
e_lb(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 160 def e_lb(e) return '' if e['type']=='old' r = '' r << "\n" if @p_type == 'pre' unless @next_line_buf.empty? r << @next_line_buf + "\n" @next_line_buf = '' end r end
e_lem(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 171 def e_lem(e) traverse(e) end
e_lg(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 175 def e_lg(e) traverse(e) end
e_list(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 179 def e_list(e) r = traverse(e) r << "\n\n" unless e.parent.name == 'item' r end
e_milestone(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 185 def e_milestone(e) '' end
e_note(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 189 def e_note(e) if e.has_attribute?('place') if "inline inline2 interlinear".include?(e['place']) r = traverse(e) return "(#{r})" end end '' end
e_p(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 199 def e_p(e) @p_type = e['type'] r = traverse(e) + "\n" @p_type = nil r end
e_pb(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 206 def e_pb(e) '' end
e_quote(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 210 def e_quote(e) traverse(e) end
e_ref(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 214 def e_ref(e) traverse(e) end
e_reg(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 218 def e_reg(e) r = '' choice = e.at_xpath('ancestor::choice') r = traverse(e) if choice.nil? r end
e_row(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 225 def e_row(e) traverse(e) + "\n" end
e_seg(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 229 def e_seg(e) traverse(e) end
e_sg(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 233 def e_sg(e) '(' + traverse(e) + ')' end
e_sp(e)
click to toggle source
speech
# File lib/cbeta/xml_document.rb, line 238 def e_sp(e) traverse(e) end
e_space(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 242 def e_space(e) return '' if e['quantity']=='0' ' ' * e['quantity'].to_i end
e_t(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 247 def e_t(e) if e.has_attribute? 'place' return '' if e['place'].include? 'foot' end r = traverse(e) # 如果不是雙行對照 tt = e.at_xpath('ancestor::tt') unless tt.nil? return r if %w(app single-line).include? tt['type'] return r if tt['place'] == 'inline' return r if tt['rend'] == 'normal' end # 處理雙行對照 i = e.xpath('../t').index(e) case i when 0 return r + ' ' when 1 @next_line_buf << r + ' ' return '' else return r end end
e_table(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 274 def e_table(e) traverse(e) + "\n" end
e_term(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 278 def e_term(e) norm = true if e['behaviour'] == "no-norm" norm = false end @gaiji_norm.push norm r = traverse(e) @gaiji_norm.pop r end
e_text(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 289 def e_text(e) norm = true if e['behaviour'] == "no-norm" norm = false end @gaiji_norm.push norm r = traverse(e) @gaiji_norm.pop r end
e_tt(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 300 def e_tt(e) traverse(e) end
e_unclear(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 304 def e_unclear(e) r = traverse(e) r = '▆' if r.empty? r end
handle_node(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 310 def handle_node(e) return '' if e.comment? return handle_text(e) if e.text? return '' if PASS.include?(e.name) send("e_#{e.name}", e) end
handle_text(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 317 def handle_text(e) s = e.content().chomp return '' if s.empty? return '' if e.parent.name == 'app' # cbeta xml 文字之間會有多餘的換行 r = s.gsub(/[\n\r]/, '') if @format == 'html' r = CGI.escapeHTML(r) # 把 & 轉為 & end r end
traverse(e)
click to toggle source
# File lib/cbeta/xml_document.rb, line 332 def traverse(e) r = '' e.children.each do |c| r << handle_node(c) end r end