class CBETA::XMLDocument

Constants

PASS

Attributes

doc[R]

Public Class Methods

new(string_or_io) click to toggle source
# File lib/cbeta/xml_document.rb, line 8
def initialize(string_or_io)
  @doc = Nokogiri::XML(string_or_io)
  @doc.remove_namespaces!
  @gaiji = CBETA::Gaiji.new
end

Public Instance Methods

to_text() click to toggle source
# File lib/cbeta/xml_document.rb, line 14
def to_text
  @format = 'text'
  @gaiji_norm = [true]
  @next_line_buf = ''
  traverse(@doc.root)
end

Private Instance Methods

e_anchor(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 23
def e_anchor(e)
  if e.has_attribute?('type')
    if e['type'] == 'circle'
      return '◎'
    end
  end

  ''
end
e_app(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 33
def e_app(e)
  traverse(e)
end
e_body(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 37
def e_body(e)
  traverse(e)
end
e_byline(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 41
def e_byline(e)
  traverse(e) + "\n"
end
e_caesura(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 45
def e_caesura(e)
  ' '
end
e_caption(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 49
def e_caption(e)
  traverse(e) + "\n"
end
e_cell(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 53
def e_cell(e)
  traverse(e) + "\n"
end
e_cit(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 57
def e_cit(e)
  traverse(e)
end
e_closer(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 61
def e_closer(e)
  traverse(e) + "\n"
end
e_corr(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 65
def e_corr(e)
  traverse(e)
end
e_date(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 69
def e_date(e)
  traverse(e)
end
e_dialog(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 73
def e_dialog(e)
  traverse(e)
end
e_div(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 77
def e_div(e)
  traverse(e)
end
e_docAuthor(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 81
def e_docAuthor(e)
  traverse(e)
end
e_docNumber(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 85
def e_docNumber(e)
  traverse(e) + "\n"
end
e_event(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 89
def e_event(e)
  traverse(e) + "\n"
end
e_figDesc(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 99
def e_figDesc(e)
  traverse(e) + "\n"
end
e_figure(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 93
def e_figure(e)
  r = traverse(e)
  r << "\n" unless r.empty?
  r
end
e_foreign(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 103
def e_foreign(e)
  return '' if e.key?('place') and e['place'].include?('foot')
  traverse(e)
end
e_g(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 108
def e_g(e)
  if @gaiji_norm.last
    cb_priority = %w(uni_char norm_uni_char norm_big5_char composition)
  else
    cb_priority = %w(uni_char composition)
  end

  gid = e['ref'].delete_prefix('#')

  unless @gaiji.key?(gid)
    raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
  end

  @gaiji.to_s(gid, cb_priority:)
end
e_head(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 124
def e_head(e)
  r = traverse(e)
  r << "\n" unless r.empty?
  r
end
e_hi(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 130
def e_hi(e)
  traverse(e)
end
e_item(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 134
def e_item(e)
  r = "\n"

  list_level = e.xpath('ancestor::list').size
  r << ' ' * (list_level - 1)
  r << traverse(e)
  if e.key? 'n'
    r = e['n'] + r
  end
  r
end
e_jhead(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 146
def e_jhead(e)
  traverse(e)
end
e_juan(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 150
def e_juan(e)
  traverse(e) + "\n"
end
e_l(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 154
def e_l(e)
  r = traverse(e)
  r << "\n" unless @lg_type == 'abnormal'
  r
end
e_lb(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 160
def e_lb(e)
  return '' if e['type']=='old'
  r = ''
  r << "\n" if @p_type == 'pre'
  unless @next_line_buf.empty?
    r << @next_line_buf + "\n"
    @next_line_buf = ''
  end
  r
end
e_lem(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 171
def e_lem(e)
  traverse(e)
end
e_lg(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 175
def e_lg(e)
  traverse(e)
end
e_list(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 179
def e_list(e)
  r = traverse(e)
  r << "\n\n" unless e.parent.name == 'item'
  r
end
e_milestone(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 185
def e_milestone(e)
  ''
end
e_note(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 189
def e_note(e)
  if e.has_attribute?('place')
    if "inline inline2 interlinear".include?(e['place'])
      r = traverse(e)
      return "(#{r})"
    end
  end
  ''
end
e_p(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 199
def e_p(e)
  @p_type = e['type']
  r = traverse(e) + "\n"
  @p_type = nil
  r
end
e_pb(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 206
def e_pb(e)
  ''
end
e_quote(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 210
def e_quote(e)
  traverse(e)
end
e_ref(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 214
def e_ref(e)
  traverse(e)
end
e_reg(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 218
def e_reg(e)
  r = ''
  choice = e.at_xpath('ancestor::choice')
  r = traverse(e) if choice.nil?
  r
end
e_row(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 225
def e_row(e)
  traverse(e) + "\n"
end
e_seg(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 229
def e_seg(e)
  traverse(e)
end
e_sg(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 233
def e_sg(e)
  '(' + traverse(e) + ')'
end
e_sp(e) click to toggle source

speech

# File lib/cbeta/xml_document.rb, line 238
def e_sp(e)
  traverse(e)
end
e_space(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 242
def e_space(e)
  return '' if e['quantity']=='0'
  ' ' * e['quantity'].to_i
end
e_t(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 247
def e_t(e)
  if e.has_attribute? 'place'
    return '' if e['place'].include? 'foot'
  end
  r = traverse(e)

  # 如果不是雙行對照
  tt = e.at_xpath('ancestor::tt')
  unless tt.nil? 
    return r if %w(app single-line).include? tt['type']
    return r if tt['place'] == 'inline'
    return r if tt['rend'] == 'normal'
  end

  # 處理雙行對照
  i = e.xpath('../t').index(e)
  case i
  when 0
    return r + ' '
  when 1
    @next_line_buf << r + ' '
    return ''
  else
    return r
  end
end
e_table(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 274
def e_table(e)
  traverse(e) + "\n"
end
e_term(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 278
def e_term(e)
  norm = true
  if e['behaviour'] == "no-norm"
    norm = false
  end
  @gaiji_norm.push norm
  r = traverse(e)
  @gaiji_norm.pop
  r
end
e_text(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 289
def e_text(e)
  norm = true
  if e['behaviour'] == "no-norm"
    norm = false
  end
  @gaiji_norm.push norm
  r = traverse(e)
  @gaiji_norm.pop
  r
end
e_tt(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 300
def e_tt(e)
  traverse(e)
end
e_unclear(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 304
def e_unclear(e)
  r = traverse(e)
  r = '▆' if r.empty?
  r
end
handle_node(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 310
def handle_node(e)
  return '' if e.comment?
  return handle_text(e) if e.text?
  return '' if PASS.include?(e.name)
  send("e_#{e.name}", e)
end
handle_text(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 317
def handle_text(e)
  s = e.content().chomp
  return '' if s.empty?
  return '' if e.parent.name == 'app'

  # cbeta xml 文字之間會有多餘的換行
  r = s.gsub(/[\n\r]/, '')

  if @format == 'html'
    r = CGI.escapeHTML(r) # 把 & 轉為 &amp;
  end

  r
end
traverse(e) click to toggle source
# File lib/cbeta/xml_document.rb, line 332
def traverse(e)
  r = ''
  e.children.each do |c| 
    r << handle_node(c)
  end
  r
end