patriot_web_parser.rb 3.96 KB
Newer Older
1
2
3
4
require_relative 'patriot_web_networker'
require 'nokogiri'

class String
5
  # Checks if a String is a alphanumeric
6
7
8
9
10
11
  def alpha?
    !!match(/^[[:alpha:]]+$/)
  end
end

module PatriotWeb
12
  # Contains methods for parsing data retrieved from Patriot Web
13
14
15
16
  class Parser
    def initialize
      @networker = PatriotWeb::Networker.new
    end
17
18
    
    # Parses all semesters avaliable on Patriot Web
19
    def parse_semesters
20
21
      response = @networker.fetch_page_containing_semester_data 
      document = Nokogiri::HTML(response) # parse the document from the HTTP response
22

23
      get_semesters_from_option_values(document).compact
24
25
    end

26
27
    # Parses subjects belonging to a given semester id
    # @param semester_id [Integer]
28
29
    def parse_subjects(semester_id)
      response = @networker.fetch_subjects(semester_id)
30
31
      document = Nokogiri::HTML(response)
      get_subject_codes_from_option_values(document)
32
33
    end

34
35
    # Parses all courses belonging to a given subject
    # @param subject [String]
36
    def parse_courses_in_subject(subject)
37
38
      response = @networker.fetch_courses_in_subject(subject)
      document = Nokogiri::HTML(response)
Zac Wood's avatar
Zac Wood committed
39
      get_courses(document)
40
41
42
    end

    private
43
44
45
46
47
48
49
50
    
    # Parse the values of all different options on the Patriot Web
    # semester select page
    # @param document [Nokogiri::HTML::Document]
    def get_semesters_from_option_values(document)
      document.css('option').map do |opt| # for each option value
        if opt.attr('value').start_with? '20' # ensure it is a semester value
          opt.attr('value') # return the value
51
52
53
54
        end
      end
    end

55
56
57
58
59
60
61
    # Parse all subject codes from the select element on the Patriot Web
    # subject select page
    # @param document [Nokogiri::HTML::Document]
    def get_subject_codes_from_option_values(document)
      document.xpath('//*[@id="subj_id"]/option').map do |opt| # for each option value under "subj_id"
        if opt.attr('value').strip.alpha? # if the value is alphanumeric
          opt.attr('value') # return the value
62
63
64
65
        end
      end
    end

Zac Wood's avatar
Zac Wood committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    # Parse all courses from the subject search page 
    # @param document [Nokogiri::HTML::Document]
    # @return [Array] courses
    def get_courses(document)
      table = document.css('html body div.pagebodydiv table.datadisplaytable').first
      rows = table.children.drop 2 # first two elements are junk
      
      # each section is represented by 6 rows in the table
      (0..(rows.length/6 - 1)).map do |i|
        start = i*6
        data = {}
        title = rows[start].text
        # the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
        # so split it by ' - ' and extract
        title_elements = title.split(' - ')
        data[:title] = title_elements[0].strip
        data[:crn] = title_elements[1]
        
        full_name = title_elements[2].split(' ')
        next unless full_name.length == 2
        data[:subj] = title_elements[2].split(' ')[0]
        data[:course_number] = title_elements[2].split(' ')[1]
        
        data[:section] = title_elements[3].strip
90

Zac Wood's avatar
Zac Wood committed
91
92
93
94
95
96
97
98
99
100
101
102
103
        # rows 1 to 3 contain info about registration and drop dates.
        # for now we're gonna ignore them and skip to row 4, which contains details
        detail_rows = rows[start+4].css('tr')
        next unless detail_rows.length > 0 # if there are no details, skip this item
        details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings
        
        times = details[1].split(' - ')
        if (times.length == 1)
          data[:start_time] = 'TBA'
          data[:end_time] = 'TBA'
        else
          data[:start_time] = times[0]
          data[:end_time] = times[1]
104
        end
Zac Wood's avatar
Zac Wood committed
105
106
107
108
109
110
111
112
113
114
115

        data[:days] = details[2].strip
        data[:location] = details[3].strip
        
        dates = details[4].split(' - ')
        data[:start_date] = dates[0]
        data[:end_date] = dates[1]
        
        data[:type] = details[5]
        data[:instructor] = details[6]
        data
116
117
118
119
      end
    end
  end
end