class EpubBook::Book
Constants
- Referer
- Reg
- UserAgent
Attributes
body_css[RW]
cover[RW]
cover_css[RW]
creator[RW]
des_url[RW]
description_css[RW]
ext_name[RW]
folder_name[RW]
index_item_css[RW]
item_attr[RW]
limit[RW]
mail_to[RW]
page_attr[RW]
page_css[RW]
path[RW]
referer[RW]
title_css[RW]
user_agent[RW]
Public Class Methods
new(index_url,des_url=nil ) { |self| ... }
click to toggle source
# File lib/epub_book/book.rb, line 30 def initialize(index_url,des_url=nil ) @index_url = index_url @des_url = des_url @user_agent = UserAgent @referer = Referer @folder_name = Base64.urlsafe_encode64(Digest::MD5.digest(@index_url))[0..-3] @creator = 'javy_liu' @title_css = '.wrapper h1.title1' @index_item_css = 'ul.list3>li>a' @cover = 'cover.jpg' @body_css = '.articlebody' @item_attr = "href" @ext_name = 'epub' yield self if block_given? end
Public Instance Methods
book()
click to toggle source
# File lib/epub_book/book.rb, line 54 def book return @book if @book Dir.mkdir(book_path) unless test(?d,book_path) @book = test(?s,File.join(book_path,'index.yml')) ? YAML.load(File.open(File.join(book_path,'index.yml'))) : {files: []} end
book_path()
click to toggle source
# File lib/epub_book/book.rb, line 46 def book_path @book_path ||= File.join((@path || `pwd`.strip), @folder_name) end
fetch_book()
click to toggle source
# File lib/epub_book/book.rb, line 167 def fetch_book #重新得到书目,如果不存在或重新索引的话 #fetch_index if !test(?s,File.join(book_path,'index.yml')) EpubBook.logger.info "------Fetch book----------" #open a txt file to write if ext_name == 'txt' txt_file = File.open(book[:file_abs_name], 'a') end book[:files].each_with_index do |item,index| break if limit && index >= limit content_path = File.join(book_path,item[:content]) #如果文件存在且长度不为0则获取下一个 #binding.pry next if test(?s,content_path) begin doc_file = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent,'Referer'=> @referer).get(item[:url]).to_s)) EpubBook.logger.info item[:label] #binding.pry if ext_name == 'pub' File.open(content_path,'w') do |f| f.write("<h3>#{item[:label]}</h3>") f.write(doc_file.css(@body_css).to_s.gsub(Reg,'')) end else txt_file.write("\n\n") txt_file.write(item[:label]) txt_file.write("\n ") txt_file.write(doc_file.css(@body_css).text) end rescue Exception => e EpubBook.logger.info "Error:#{e.message},#{item.inspect}" #EpubBook.logger.info e.backtrace next end end txt_file.close if ext_name == 'txt' end
fetch_index(url=nil)
click to toggle source
得到书目索引
# File lib/epub_book/book.rb, line 126 def fetch_index(url=nil) book[:files] = [] url ||= @index_url doc = Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(URI.encode(url)).to_s)) #generate index.yml EpubBook.logger.info "------Fetch index--#{url}---------------" if !book[:title] doc1 = if @des_url.nil? doc else Nokogiri::HTML(judge_encoding(HTTP.headers("User-Agent" => @user_agent ,'Referer'=> @referer).get(URI.encode(generate_abs_url(doc.css(@des_url).attr("href").to_s))).to_s)) end get_des(doc1) end #binding.pry doc.css(@index_item_css).each do |item| _href = URI.encode(item.attr(@item_attr).to_s) next if _href.start_with?('javascript') || _href.start_with?('#') _href = generate_abs_url(_href) book[:files] << {label: item.text, url: _href} end #如果有分页 if @page_css && @page_attr if next_page = doc.css(@page_css).attr(@page_attr).to_s fetch_index(generate_abs_url(next_page)) else return end end book[:files].each_with_index{|item,index| item[:content] = "#{index}.html"} #保存书目 save_book end
generate_book(book_name=nil) { |self| ... }
click to toggle source
创建书本
# File lib/epub_book/book.rb, line 69 def generate_book(book_name=nil) #获取epub源数据 fetch_index if !test(?s,File.join(book_path,'index.yml')) book[:file_abs_name] = File.join(book_path,"#{book[:title]}.#{ext_name}") fetch_book if ext_name == 'epub' if !@cover_css && @cover generate_cover = <<-eof convert #{File.expand_path("../../../#{@cover}",__FILE__)} -font tsxc.ttf -gravity center -fill red -pointsize 16 -draw "text 0,0 '#{book[:title]}'" #{File.join(book_path,@cover)} eof system(generate_cover) end epub = EeePub.make epub.title book[:title] epub.creator @creator epub.publisher @creator epub.date Time.now epub.identifier "http://javy_liu.com/book/#{@folder_name}", :scheme => 'URL' epub.uid "http://javy_liu.com/book/#{@folder_name}" epub.cover @cover epub.subject book[:title] epub.description book[:description] if book[:description] book[:files] = book[:files][0...limit] if limit _files = [] book[:files].collect! do |item| _file = File.join(book_path,item[:content]) if test(?f, _file) _files.push(_file) item end end book[:files].compact! epub.files _files.push(File.join(book_path,@cover)) epub.nav book[:files] yield self if block_given? epub.save(book[:file_abs_name]) end #send mail if mail_to mailer = Mailer.new mailer.to = mail_to mailer.add_file book[:file_abs_name] mailer.body = "您创建的电子书[#{book[:title]}]见附件\n" mailer.send_mail end end
link_host()
click to toggle source
# File lib/epub_book/book.rb, line 50 def link_host @link_host ||= @index_url[/\A(https?:\/\/.*?)\/\w+/,1] end
save_book()
click to toggle source
save catalog file
# File lib/epub_book/book.rb, line 61 def save_book File.open(File.join(book_path,'index.yml' ),'w') do |f| f.write(@book.to_yaml) end end
Private Instance Methods
generate_abs_url(url)
click to toggle source
# File lib/epub_book/book.rb, line 244 def generate_abs_url(url) if url.start_with?("http") url elsif url.start_with?("/") "#{link_host}#{url}" else @path_name ||= @index_url[/.*\//] "#{@path_name}#{url}" end end
get_des(doc)
click to toggle source
得到书名,介绍,及封面
# File lib/epub_book/book.rb, line 224 def get_des(doc) book[:title] = doc.css(@title_css).text.strip #EpubBook.logger.info doc #EpubBook.logger.info @title_css #binding.pry if @cover_css && !book[:cover] && ext_name == 'epub' cover_url = doc.css(@cover_css).attr("src").to_s cover_url = generate_abs_url(cover_url) #link_host + cover_url unless cover_url.start_with?("http") cover_path = File.join(book_path,@cover) system("curl #{cover_url} -o #{cover_path} ") book[:cover] = cover_path end if @description_css && !book[:description] book[:description] = doc.css(@description_css).text end end
judge_encoding(str)
click to toggle source
is valid encoding
# File lib/epub_book/book.rb, line 214 def judge_encoding(str) EpubBook.logger.info str.encoding /<meta.*?charset\s*=[\s\"\']?utf-8/i =~ str ? str : str.force_encoding('gbk').encode!('utf-8',invalid: :replace, undef: :replace) str.scrub! unless str.valid_encoding? EpubBook.logger.info "-------encode 后 #{str.encoding}" str end