class PageRank

Constants

DEFAULT_CONVERGENCE
DEFAULT_DAMPING_FACTOR
DEFAULT_MAX_ITERATIONS
VERSION

Attributes

convergence[RW]
damping_factor[RW]
dangling_nodes[R]
divergence[R]
initialization_vector[R]
iterations[R]
matrix[R]
max_iterations[RW]
ranking[R]
size[R]

Public Class Methods

from_hash(hash, *args) click to toggle source
   # File lib/page_rank.rb
47 def from_hash(hash, *args)
48   s = hash.to_a.flatten.uniq
49   new(s.size, *args).seed(s.sort_by { |k| sortkey(k) }) << hash
50 end
from_json(path, *args) click to toggle source
   # File lib/page_rank.rb
52 def from_json(path, *args)
53   from_hash(JSON.parse(File.read(path)), *args)
54 end
new(size, options = {}) click to toggle source
   # File lib/page_rank.rb
66 def initialize(size, options = {})
67   @size, @idmap, @matrix, @iterations, @ranking, @divergence =
68     size, Hash.idmap(-1), GSL::Matrix.zeros(size), 0, [], []
69 
70   @dangling_nodes, @initialization_vector =
71     GSL::Vector::Col.alloc(size).set_all(1),
72     GSL::Vector::Col.alloc(size).set_all(Rational(1, size))
73 
74   @convergence    = options.fetch(:convergence,    DEFAULT_CONVERGENCE)
75   @max_iterations = options.fetch(:max_iterations, DEFAULT_MAX_ITERATIONS)
76   @damping_factor = options.fetch(:damping_factor, DEFAULT_DAMPING_FACTOR)
77 end

Private Class Methods

sortkey(k) click to toggle source
   # File lib/page_rank.rb
58 def sortkey(k)
59   Float(k)
60 rescue TypeError, ArgumentError
61   k
62 end

Public Instance Methods

<<(hash) click to toggle source
    # File lib/page_rank.rb
110 def <<(hash)
111   hash.each { |k, v| self[k] = v }
112   self
113 end
[]=(k, v) click to toggle source
    # File lib/page_rank.rb
115 def []=(k, v)
116   w = Rational(1, v.size)
117   dangling_nodes[i = id(k)] = 0
118   v.each { |l| matrix[i, id(l)] = w }
119 end
each() { |key(k), map! { |l| key(l) }| ... } click to toggle source
    # File lib/page_rank.rb
133 def each
134   return enum_for(__method__) unless block_given?
135 
136   index = -1
137 
138   matrix.each_row { |r|
139     k, v = index += 1, r.where.to_a
140     yield key(k), v.map! { |l| key(l) }.sort! unless v.empty?
141   }
142 
143   self
144 end
inspect() click to toggle source
    # File lib/page_rank.rb
 95 def inspect
 96   s = '#<%s:0x%x>' % [self.class, object_id]
 97 
 98   %w[size count convergence max_iterations damping_factor].each { |v|
 99     s.insert(-2, " @#{v}=#{send(v).inspect}")
100   }
101 
102   s
103 end
rank(personalization = true) click to toggle source
    # File lib/page_rank.rb
121 def rank(personalization = true)
122   iv, v = initialization_vector.dup, personalization
123 
124   order(iterate(case v
125     when true    then iv
126     when 0       then iv.set_zero
127     when Float   then iv.set_all(v)
128     when Integer then iv.set_all(Rational(1, v))
129     else              v
130   end)) unless empty?
131 end
seed(list) click to toggle source
    # File lib/page_rank.rb
105 def seed(list)
106   list.each { |k| id(k) }
107   self
108 end
to_json() click to toggle source
    # File lib/page_rank.rb
146 def to_json
147   JSON.fast_generate(to_h)
148 end
to_table() click to toggle source
    # File lib/page_rank.rb
150 def to_table
151   matrix.to_a.map.with_index { |r, i| [key(i) || i + 1,
152     r.map.with_index { |v, j| [key(j) || j + 1, v] }] }
153 end

Private Instance Methods

iterate(pv) click to toggle source

pv ? Langville/Meyer 2012 : Page/Brin 1998

    # File lib/page_rank.rb
158 def iterate(pv)
159   iter, prev, diff = [], initialization_vector.trans, @divergence = []
160 
161   d, dn = 1 - df = damping_factor, dangling_nodes
162   d /= size.to_f unless pv
163 
164   max_iterations.times {
165     curr = df * prev * matrix
166     iter << curr += pv ? (df * prev * dn + d) * pv : d
167 
168     diff << [delta = (curr - prev).asum]
169     delta < convergence ? break : prev = curr
170   }
171 
172   @iterations = iter.size
173 
174   iter
175 end
order(iter) click to toggle source
    # File lib/page_rank.rb
177 def order(iter)
178   order, ranking, prev = [], @ranking = [], @idmap.to_a
179 
180   iter.each { |t|
181     ranking << prev = @idmap.map { |k, i| [k, i, t[i]] }
182       .sort_by { |k, _, v| [-v, prev.index(prev.assoc(k))] } }
183 
184   prev = prev.map { |k, i,| order << k; i }
185 
186   @divergence.zip(ranking) { |diff, rank|
187     diff << prev.zip(rank.map { |r| r.delete_at(1) }).corr }
188 
189   order
190 end