class WSUInPerson::WSUInPerson

Public Instance Methods

get_campus() click to toggle source
# File lib/WSU_In_Person.rb, line 13
def get_campus
  @campuses = ["Pullman", "Spokane", "Tri-Cities", "Vancouver", "Everett", "DDP"]

  puts "For which campus do you want to get?\n
  1.Pullman\n
  2.Spokane\n
  3.Tri-Cities\n
  4.Vancouver\n
  5.Everett\n
  6.Global\n
  7.All\n"

  temp_campus = gets.chomp

  if temp_campus == "1"
    @campuses = @campuses.values_at(0)
  elsif temp_campus == "2"
    @campuses = @campuses.values_at(1)
  elsif temp_campus == "3"
    @campuses = @campuses.values_at(2)
  elsif temp_campus == "4"
    @campuses = @campuses.values_at(3)
  elsif temp_campus == "5"
    @campuses = @campuses.values_at(4)
  elsif temp_campus == "6"
    @campuses = @campuses.values_at(5)
  else
  end

  puts @campuses
  #return campuses
end
scrape_course_pages(subject_urls, prefixes, campus, time) click to toggle source
# File lib/WSU_In_Person.rb, line 88
    def scrape_course_pages(subject_urls, prefixes, campus, time)

      csv = ExportCSV::ExportCSV.new
      csv.create(campus, time)
      column_names = ["Prefix", "Course Number", "Course Title", "Section", "Class Number", "Credit", "Days & Times",
        "Bldg & Room", "Dates", "Instructor"]
      csv.name_column(column_names)


      # for prefix counter
      i = 0
      
      subject_urls.each do |subject_url|
        section_urls = []
        sections = []

        begin
          prefix =  prefixes.at(i)
          name = ""
          course_number = ""
          sec = ""
          classnum = ""
          credit = ""
          sched_days = ""
          sched_loc = ""
          sched_dates = ""
          instructor = ""
          name_on = 0
          sec_on = 0

          doc = Nokogiri::HTML(open("http://schedules.wsu.edu#{subject_url}"))
          #section_links = doc.css('.class_schedule').css('.section').css('a')


          trs = doc.css('.class_schedule').css('tr')
          trs.each do |tr|

            # this part need to be updated often
            if tr.css('td').text.strip.start_with?(prefix)  
              temp_name = tr.css('td').text.strip.split(' ').drop(1).join(' ')
              #puts temp_name
              course_number = temp_name.split.first
              name = temp_name.split(' ').drop(1).join(' ')
            end


            if tr.attr('class') == "section" || tr.attr('class') == "section subdued"
              sec = tr.css('td').map(&:text)[1].strip
              classnum = tr.css('td').map(&:text)[2].strip
              credit = tr.css('td').map(&:text)[3].strip
              sched_days = tr.css('td').map(&:text)[4].strip

              # some classes don't have sched_loc parts
              begin
                sched_loc = tr.css('td').map(&:text)[5].strip
              rescue NoMethodError => e
              rescue => e
              end

              # some classes don't have sched_dates parts
              begin
                sched_dates = tr.css('td').map(&:text)[6].strip
              rescue NoMethodError => e
              rescue => e
              end

              # some classes don't have instructor parts
              begin
                instructor = tr.css('td').map(&:text)[7].strip
              rescue NoMethodError => e
              rescue => e
              end
              

              #for when a section part has multiple lines
              if !sec.start_with?("0", "1", "2", "3", "4", "5", "6")
                sched_days = sec
                sched_loc = classnum
                sec = ""
                classnum = ""
                credit = ""
                sched_dates = ""
                instructor = ""
              end

              sec_on = 1
            end


            #if sched_loc != "WEB ARR" && sec_on == 1
            if sec_on == 1
              puts course_number + " " + name + " " + sec + " " + classnum + " " + credit + " " + sched_days + " " + sched_loc + " " + sched_dates + " " + instructor
              #csv << [prefix, course_number, name, sec, classnum, credit, sched_days, sched_loc, instructor]
              values = [prefix, course_number, name, sec, classnum, credit, sched_days, 
                        sched_loc, sched_dates, instructor]
              csv.write_row(values)

              sec = ""
              classnum = ""
              credit = ""
              sched_days = ""
              sched_loc = ""
              sched_dates = ""
              instructor = ""

              sec_on = 0
            end
            sec_on = 0
          end

=begin
          section_links.each do |section_link|
            section_urls << section_link.attribute('href').value
            sections << section_link.text.strip
          end
=end

          i+=1
        rescue OpenURI::HTTPError => e
          if e.message == '404 Not Found'
            #puts subject_url.text + "cannot be opend!!"
          else
            raise e
          end
          i+=1
        end
      end
    end
scrape_subject_urls() click to toggle source

puts “For which campus do you want to get?n 1.Pullmann 2.Spokanen 3.Tri-Citiesn 4.Vancouvern 5.Everettn 6.Globaln 7.Alln”

# File lib/WSU_In_Person.rb, line 62
def scrape_subject_urls

  @campuses.each do |campus|

    time = '20213'
    # time = '20212'
    # time = '20211'
    # time = '20203'

    doc = Nokogiri::HTML(open('http://schedules.wsu.edu/List/'+ campus+ '/' +time))

    subjects = doc.css('.prefixList').css('a')

    subject_urls = []
    prefixes = []

    subjects.each do |subject|
      subject_urls << subject.attribute('href').value
      prefixes << subject.text.strip
    end
    
    scrape_course_pages(subject_urls, prefixes, campus, time)
  end
end