class PageRank
Constants
- DEFAULT_CONVERGENCE
- DEFAULT_DAMPING_FACTOR
- DEFAULT_MAX_ITERATIONS
- VERSION
Attributes
convergence[RW]
damping_factor[RW]
dangling_nodes[R]
divergence[R]
initialization_vector[R]
iterations[R]
matrix[R]
max_iterations[RW]
ranking[R]
size[R]
Public Class Methods
from_hash(hash, *args)
click to toggle source
# File lib/page_rank.rb 47 def from_hash(hash, *args) 48 s = hash.to_a.flatten.uniq 49 new(s.size, *args).seed(s.sort_by { |k| sortkey(k) }) << hash 50 end
from_json(path, *args)
click to toggle source
# File lib/page_rank.rb 52 def from_json(path, *args) 53 from_hash(JSON.parse(File.read(path)), *args) 54 end
new(size, options = {})
click to toggle source
# File lib/page_rank.rb 66 def initialize(size, options = {}) 67 @size, @idmap, @matrix, @iterations, @ranking, @divergence = 68 size, Hash.idmap(-1), GSL::Matrix.zeros(size), 0, [], [] 69 70 @dangling_nodes, @initialization_vector = 71 GSL::Vector::Col.alloc(size).set_all(1), 72 GSL::Vector::Col.alloc(size).set_all(Rational(1, size)) 73 74 @convergence = options.fetch(:convergence, DEFAULT_CONVERGENCE) 75 @max_iterations = options.fetch(:max_iterations, DEFAULT_MAX_ITERATIONS) 76 @damping_factor = options.fetch(:damping_factor, DEFAULT_DAMPING_FACTOR) 77 end
Private Class Methods
sortkey(k)
click to toggle source
# File lib/page_rank.rb 58 def sortkey(k) 59 Float(k) 60 rescue TypeError, ArgumentError 61 k 62 end
Public Instance Methods
<<(hash)
click to toggle source
# File lib/page_rank.rb 110 def <<(hash) 111 hash.each { |k, v| self[k] = v } 112 self 113 end
[]=(k, v)
click to toggle source
# File lib/page_rank.rb 115 def []=(k, v) 116 w = Rational(1, v.size) 117 dangling_nodes[i = id(k)] = 0 118 v.each { |l| matrix[i, id(l)] = w } 119 end
each() { |key(k), map! { |l| key(l) }| ... }
click to toggle source
# File lib/page_rank.rb 133 def each 134 return enum_for(__method__) unless block_given? 135 136 index = -1 137 138 matrix.each_row { |r| 139 k, v = index += 1, r.where.to_a 140 yield key(k), v.map! { |l| key(l) }.sort! unless v.empty? 141 } 142 143 self 144 end
inspect()
click to toggle source
# File lib/page_rank.rb 95 def inspect 96 s = '#<%s:0x%x>' % [self.class, object_id] 97 98 %w[size count convergence max_iterations damping_factor].each { |v| 99 s.insert(-2, " @#{v}=#{send(v).inspect}") 100 } 101 102 s 103 end
rank(personalization = true)
click to toggle source
# File lib/page_rank.rb 121 def rank(personalization = true) 122 iv, v = initialization_vector.dup, personalization 123 124 order(iterate(case v 125 when true then iv 126 when 0 then iv.set_zero 127 when Float then iv.set_all(v) 128 when Integer then iv.set_all(Rational(1, v)) 129 else v 130 end)) unless empty? 131 end
seed(list)
click to toggle source
# File lib/page_rank.rb 105 def seed(list) 106 list.each { |k| id(k) } 107 self 108 end
to_json()
click to toggle source
# File lib/page_rank.rb 146 def to_json 147 JSON.fast_generate(to_h) 148 end
to_table()
click to toggle source
# File lib/page_rank.rb 150 def to_table 151 matrix.to_a.map.with_index { |r, i| [key(i) || i + 1, 152 r.map.with_index { |v, j| [key(j) || j + 1, v] }] } 153 end
Private Instance Methods
iterate(pv)
click to toggle source
pv ? Langville/Meyer 2012 : Page/Brin 1998
# File lib/page_rank.rb 158 def iterate(pv) 159 iter, prev, diff = [], initialization_vector.trans, @divergence = [] 160 161 d, dn = 1 - df = damping_factor, dangling_nodes 162 d /= size.to_f unless pv 163 164 max_iterations.times { 165 curr = df * prev * matrix 166 iter << curr += pv ? (df * prev * dn + d) * pv : d 167 168 diff << [delta = (curr - prev).asum] 169 delta < convergence ? break : prev = curr 170 } 171 172 @iterations = iter.size 173 174 iter 175 end
order(iter)
click to toggle source
# File lib/page_rank.rb 177 def order(iter) 178 order, ranking, prev = [], @ranking = [], @idmap.to_a 179 180 iter.each { |t| 181 ranking << prev = @idmap.map { |k, i| [k, i, t[i]] } 182 .sort_by { |k, _, v| [-v, prev.index(prev.assoc(k))] } } 183 184 prev = prev.map { |k, i,| order << k; i } 185 186 @divergence.zip(ranking) { |diff, rank| 187 diff << prev.zip(rank.map { |r| r.delete_at(1) }).corr } 188 189 order 190 end