class EpubBook::Book

Constants

Referer
Reg
UserAgent

Attributes

body_css[RW]
cover[RW]
cover_css[RW]
creator[RW]
des_url[RW]
description_css[RW]
ext_name[RW]
folder_name[RW]
index_item_css[RW]
item_attr[RW]
limit[RW]
mail_to[RW]
page_attr[RW]
page_css[RW]
path[RW]
referer[RW]
title_css[RW]
user_agent[RW]

Public Class Methods

new(index_url,des_url=nil ) { |self| ... } click to toggle source
# File lib/epub_book/book.rb, line 30
def initialize(index_url,des_url=nil )
  @index_url = index_url
  @des_url = des_url
  @user_agent = UserAgent
  @referer = Referer
  @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3]
  @creator = 'javy_liu'
  @title_css = '.wrapper h1.title1'
  @index_item_css = 'ul.list3>li>a'
  @cover = 'cover.jpg'
  @body_css = '.articlebody'
  @item_attr = "href"
  @ext_name = 'epub'
  yield self if block_given?
end

Public Instance Methods

book() click to toggle source
# File lib/epub_book/book.rb, line 54
def book
  return @book if @book
  Dir.mkdir(book_path) unless test(?d,book_path)
  @book = test(?s,File.join(book_path,'index.yml')) ? YAML.load(File.open(File.join(book_path,'index.yml'))) : {files: []}
end
book_path() click to toggle source
# File lib/epub_book/book.rb, line 46
def book_path
  @book_path ||= File.join((@path || `pwd`.strip), @folder_name)
end
fetch_book() click to toggle source
# File lib/epub_book/book.rb, line 167
def fetch_book
  #重新得到书目,如果不存在或重新索引的话
  #fetch_index  if !test(?s,File.join(book_path,'index.yml'))
  EpubBook.logger.info "------Fetch book----------"
  #open a txt file to write
  if ext_name == 'txt'
    txt_file = File.open(book[:file_abs_name], 'a')
  end
  book[:files].each_with_index do |item,index|
    break if limit && index >= limit

    content_path = File.join(book_path,item[:content])

    #如果文件存在且长度不为0则获取下一个
    #binding.pry
    next if test(?s,content_path)

    begin
      doc_file = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent,'Referer'=> @referer).get(item[:url]).to_s))

      EpubBook.logger.info item[:label]
      #binding.pry
      if ext_name == 'pub'
        File.open(content_path,'w') do |f|
          f.write("<h3>#{item[:label]}</h3>")
          f.write(doc_file.css(@body_css).to_s.gsub(Reg,''))
        end
      else
        txt_file.write("\n\n")
        txt_file.write(item[:label])
        txt_file.write("\n  ")
        txt_file.write(doc_file.css(@body_css).text)
      end
    rescue  Exception => e
      EpubBook.logger.info "Error:#{e.message},#{item.inspect}"
      #EpubBook.logger.info e.backtrace
      next
    end
  end

  txt_file.close if ext_name == 'txt'

end
fetch_index(url=nil) click to toggle source

得到书目索引

# File lib/epub_book/book.rb, line 126
def fetch_index(url=nil)
  book[:files] = []
  url ||= @index_url
  doc = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(URI.encode(url)).to_s))
  #generate index.yml
  EpubBook.logger.info "------Fetch index--#{url}---------------"

  if !book[:title]
    doc1 = if @des_url.nil?
             doc
           else
             Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(URI.encode(generate_abs_url(doc.css(@des_url).attr("href").to_s))).to_s))
           end
    get_des(doc1)
  end

  #binding.pry
  doc.css(@index_item_css).each do |item|
    _href = URI.encode(item.attr(@item_attr).to_s)
    next if _href.start_with?('javascript') || _href.start_with?('#')

    _href = generate_abs_url(_href)

    book[:files] << {label: item.text, url: _href}
  end

  #如果有分页
  if @page_css && @page_attr
    if next_page = doc.css(@page_css).attr(@page_attr).to_s
      fetch_index(generate_abs_url(next_page))
    else
      return
    end
  end

  book[:files].each_with_index{|item,index| item[:content] = "#{index}.html"}

  #保存书目
  save_book
end
generate_book(book_name=nil) { |self| ... } click to toggle source

创建书本

# File lib/epub_book/book.rb, line 69
    def generate_book(book_name=nil)
      #获取epub源数据
      fetch_index  if !test(?s,File.join(book_path,'index.yml'))

      book[:file_abs_name] = File.join(book_path,"#{book[:title]}.#{ext_name}")

      fetch_book
      if ext_name == 'epub'
        if  !@cover_css && @cover
          generate_cover = <<-eof
        convert #{File.expand_path("../../../#{@cover}",__FILE__)} -font tsxc.ttf -gravity center -fill red -pointsize 16 -draw "text 0,0 '#{book[:title]}'"  #{File.join(book_path,@cover)}
          eof
          system(generate_cover)
        end

        epub = EeePub.make

        epub.title book[:title]
        epub.creator @creator
        epub.publisher @creator
        epub.date Time.now
        epub.identifier "http://javy_liu.com/book/#{@folder_name}", :scheme => 'URL'
        epub.uid "http://javy_liu.com/book/#{@folder_name}"
        epub.cover @cover
        epub.subject book[:title]
        epub.description book[:description] if book[:description]

        book[:files] = book[:files][0...limit] if limit
        _files = []
        book[:files].collect! do |item|
          _file = File.join(book_path,item[:content])
          if test(?f, _file)
            _files.push(_file)
            item
          end
        end
        book[:files].compact!

        epub.files _files.push(File.join(book_path,@cover))
        epub.nav book[:files]
        yield self if block_given?

        epub.save(book[:file_abs_name])
      end
      #send mail

      if mail_to
        mailer = Mailer.new
        mailer.to = mail_to
        mailer.add_file book[:file_abs_name]
        mailer.body = "您创建的电子书[#{book[:title]}]见附件\n"
        mailer.send_mail
      end

    end
save_book() click to toggle source

save catalog file

# File lib/epub_book/book.rb, line 61
def save_book
  File.open(File.join(book_path,'index.yml' ),'w') do |f|
    f.write(@book.to_yaml)
  end
end

Private Instance Methods

generate_abs_url(url) click to toggle source
# File lib/epub_book/book.rb, line 244
def generate_abs_url(url)
  if url.start_with?("http")
    url
  elsif url.start_with?("/")
    "#{link_host}#{url}"
  else
    @path_name ||= @index_url[/.*\//]
    "#{@path_name}#{url}"
  end

end
get_des(doc) click to toggle source

得到书名,介绍,及封面

# File lib/epub_book/book.rb, line 224
def get_des(doc)
  book[:title] = doc.css(@title_css).text.strip

  #EpubBook.logger.info doc
  #EpubBook.logger.info @title_css

  #binding.pry
  if @cover_css && !book[:cover] && ext_name == 'epub'
    cover_url = doc.css(@cover_css).attr("src").to_s
    cover_url = generate_abs_url(cover_url) #link_host + cover_url unless cover_url.start_with?("http")
    cover_path = File.join(book_path,@cover)
    system("curl #{cover_url} -o #{cover_path} ")
    book[:cover] = cover_path
  end

  if @description_css && !book[:description]
    book[:description] = doc.css(@description_css).text
  end
end
judge_encoding(str) click to toggle source

is valid encoding

# File lib/epub_book/book.rb, line 214
def judge_encoding(str)
  EpubBook.logger.info str.encoding
  /<meta.*?charset\s*=[\s\"\']?utf-8/i =~ str ? str : str.force_encoding('gbk').encode!('utf-8',invalid: :replace, undef: :replace)
  str.scrub! unless str.valid_encoding?

  EpubBook.logger.info "-------encode 后 #{str.encoding}"
  str
end