Commit b4635282 authored by Zac Wood's avatar Zac Wood
Browse files

okay it actually works this time I promise

parent 3cb72415
...@@ -36,7 +36,7 @@ module PatriotWeb ...@@ -36,7 +36,7 @@ module PatriotWeb
def parse_courses_in_subject(subject) def parse_courses_in_subject(subject)
response = @networker.fetch_courses_in_subject(subject) response = @networker.fetch_courses_in_subject(subject)
document = Nokogiri::HTML(response) document = Nokogiri::HTML(response)
get_courses(document) get_courses(document, subject)
end end
private private
...@@ -66,34 +66,40 @@ module PatriotWeb ...@@ -66,34 +66,40 @@ module PatriotWeb
# Parse all courses from the subject search page # Parse all courses from the subject search page
# @param document [Nokogiri::HTML::Document] # @param document [Nokogiri::HTML::Document]
# @return [Array] courses # @return [Array] courses
def get_courses(document) def get_courses(document, subject)
table = document.css('html body div.pagebodydiv table.datadisplaytable') table = document.css('html body div.pagebodydiv table.datadisplaytable')
rows = table.css('tr') rows = table.css('tr')
# rows[100..110].each_with_index do |row, i|
# puts i
# puts row
# end
data_from rows
end
def data_from(rows)
i = 0
title_index = 0
result = []
(0..(rows.length/6-1)).map do |i| while i < rows.length
start = i*5 if is_title(rows[i].text) # check if the row is a title
data = {} data = {}
title = rows[start].text
# the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001 title_elements = rows[i].text.split(' - ')
# so split it by ' - ' and extract
title_elements = title.split(' - ')
next unless title_elements.length == 4
data[:title] = title_elements[0].strip data[:title] = title_elements[0].strip
data[:crn] = title_elements[1] data[:crn] = title_elements[1]
full_name = title_elements[2].split(' ') full_name = title_elements[2].split(' ')
next unless full_name.length == 2 next unless full_name.length == 2
data[:subj] = title_elements[2].split(' ')[0] data[:subj] = full_name[0]
data[:course_number] = title_elements[2].split(' ')[1] data[:course_number] = full_name[1]
data[:section] = title_elements[3].strip data[:section] = title_elements[3].strip
# rows 1 to 3 contain info about registration and drop dates. details = rows[i+2].css('td table tr td')
# for now we're gonna ignore them and skip to row 4, which contains details unless details.length > 0
details = rows[start+2].css('td table tr td') puts "#{full_name.join(' ')} is fake news"
i += 1
next unless details.length > 0 # if there are no details, skip this item next
# details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings end
times = details[1].text.split(' - ') times = details[1].text.split(' - ')
if (times.length == 1) if (times.length == 1)
...@@ -113,73 +119,21 @@ module PatriotWeb ...@@ -113,73 +119,21 @@ module PatriotWeb
data[:type] = details[5].text data[:type] = details[5].text
data[:instructor] = details[6].text data[:instructor] = details[6].text
data
end
# puts rows[0].text
# puts rows[2].css('td table tr td')
# puts rows[5].text
# puts rows[7].css('td table tr td')
# puts rows[10].text
# puts rows[12].css('td table tr td')
# (0..(rows.length/3-1)).each do |i|
# start = i*3
# puts rows[start].search('th').first.text result << data
# section_data = rows[start+2].css('td table.datadisplaytable').search('td') i += 5 # skip to what we think is the next title
# puts section_data[1].text else
# end i += 1 # try the next row if this one was not a title
# puts rows[3] end
# puts rows[3].search('th').first.text end
# section_data = rows[5].css('td table.datadisplaytable').search('td')
# puts section_data[1].text
# end
# each section is represented by 6 rows in the table
# (0..(rows.length/6 - 1)).map do |i|
# start = i*6
# data = {}
# title = rows[start].text
# # the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
# # so split it by ' - ' and extract
# title_elements = title.split(' - ')
# next unless title_elements.length == 4
# data[:title] = title_elements[0].strip
# data[:crn] = title_elements[1]
# full_name = title_elements[2].split(' ')
# next unless full_name.length == 2
# data[:subj] = title_elements[2].split(' ')[0]
# data[:course_number] = title_elements[2].split(' ')[1]
# data[:section] = title_elements[3].strip
# # rows 1 to 3 contain info about registration and drop dates.
# # for now we're gonna ignore them and skip to row 4, which contains details
# detail_rows = rows[start+4].css('tr')
# next unless detail_rows.length > 0 # if there are no details, skip this item
# details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings
# times = details[1].split(' - ')
# if (times.length == 1)
# data[:start_time] = 'TBA'
# data[:end_time] = 'TBA'
# else
# data[:start_time] = times[0]
# data[:end_time] = times[1]
# end
# data[:days] = details[2].strip
# data[:location] = details[3].strip
# dates = details[4].split(' - ') result
# data[:start_date] = dates[0] end
# data[:end_date] = dates[1]
# data[:type] = details[5] # a title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
# data[:instructor] = details[6] def is_title(text)
# data elements = text.split(' - ')
# end elements.length == 4 && elements[2].split(' ').length == 2
end end
end end
end end
...@@ -19,17 +19,17 @@ puts "DDOSing Patriot Web, buckle up kids" ...@@ -19,17 +19,17 @@ puts "DDOSing Patriot Web, buckle up kids"
# parse all subjects and their courses in the semester # parse all subjects and their courses in the semester
parser.parse_subjects(semester).each do |subject| parser.parse_subjects(semester).each do |subject|
puts "Getting courses for #{subject}" puts "Getting courses for #{subject}"
threads << Thread.new { # threads << Thread.new {
total[subject] = parser.parse_courses_in_subject(subject) total[subject] = parser.parse_courses_in_subject(subject)
} # }
end end
# For testing, only get first subject # For testing, only get first subject
# subject = parser.parse_subjects(semester).first # subject = parser.parse_subjects(semester)[20]
# total[subject] = parser.parse_courses_in_subject(subject) # total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish # wait for all the threads to finish
ThreadsWait.all_waits(*threads) # ThreadsWait.all_waits(*threads)
# delete everything in the current database # delete everything in the current database
Closure.delete_all Closure.delete_all
...@@ -44,7 +44,10 @@ semester.save! ...@@ -44,7 +44,10 @@ semester.save!
total.each do |subject, sections| total.each do |subject, sections|
puts "Adding courses for #{subject}..." puts "Adding courses for #{subject}..."
sections.each do |section| sections.each do |section|
next if section.nil? || !section.key?(:subj) || !section.key?(:course_number) if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
end
# Find or create a course and set its semester # Find or create a course and set its semester
# TODO: this breaks when you try to do more than one semester, # TODO: this breaks when you try to do more than one semester,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment