Commit b4635282 authored by Zac Wood's avatar Zac Wood
Browse files

okay it actually works this time I promise

parent 3cb72415
...@@ -36,7 +36,7 @@ module PatriotWeb ...@@ -36,7 +36,7 @@ module PatriotWeb
def parse_courses_in_subject(subject) def parse_courses_in_subject(subject)
response = @networker.fetch_courses_in_subject(subject) response = @networker.fetch_courses_in_subject(subject)
document = Nokogiri::HTML(response) document = Nokogiri::HTML(response)
get_courses(document) get_courses(document, subject)
end end
private private
...@@ -66,120 +66,74 @@ module PatriotWeb ...@@ -66,120 +66,74 @@ module PatriotWeb
# Parse all courses from the subject search page # Parse all courses from the subject search page
# @param document [Nokogiri::HTML::Document] # @param document [Nokogiri::HTML::Document]
# @return [Array] courses # @return [Array] courses
def get_courses(document) def get_courses(document, subject)
table = document.css('html body div.pagebodydiv table.datadisplaytable') table = document.css('html body div.pagebodydiv table.datadisplaytable')
rows = table.css('tr') rows = table.css('tr')
# rows[100..110].each_with_index do |row, i|
# puts i
# puts row
# end
data_from rows
end
def data_from(rows)
i = 0
title_index = 0
result = []
while i < rows.length
if is_title(rows[i].text) # check if the row is a title
data = {}
(0..(rows.length/6-1)).map do |i| title_elements = rows[i].text.split(' - ')
start = i*5 data[:title] = title_elements[0].strip
data = {} data[:crn] = title_elements[1]
title = rows[start].text full_name = title_elements[2].split(' ')
# the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001 next unless full_name.length == 2
# so split it by ' - ' and extract data[:subj] = full_name[0]
title_elements = title.split(' - ') data[:course_number] = full_name[1]
next unless title_elements.length == 4 data[:section] = title_elements[3].strip
data[:title] = title_elements[0].strip
data[:crn] = title_elements[1]
full_name = title_elements[2].split(' ')
next unless full_name.length == 2
data[:subj] = title_elements[2].split(' ')[0]
data[:course_number] = title_elements[2].split(' ')[1]
data[:section] = title_elements[3].strip
# rows 1 to 3 contain info about registration and drop dates. details = rows[i+2].css('td table tr td')
# for now we're gonna ignore them and skip to row 4, which contains details unless details.length > 0
details = rows[start+2].css('td table tr td') puts "#{full_name.join(' ')} is fake news"
i += 1
next
end
times = details[1].text.split(' - ')
if (times.length == 1)
data[:start_time] = 'TBA'
data[:end_time] = 'TBA'
else
data[:start_time] = times[0]
data[:end_time] = times[1]
end
next unless details.length > 0 # if there are no details, skip this item data[:days] = details[2].text.strip
# details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings data[:location] = details[3].text.strip
times = details[1].text.split(' - ') dates = details[4].text.split(' - ')
if (times.length == 1) data[:start_date] = dates[0]
data[:start_time] = 'TBA' data[:end_date] = dates[1]
data[:end_time] = 'TBA'
data[:type] = details[5].text
data[:instructor] = details[6].text
result << data
i += 5 # skip to what we think is the next title
else else
data[:start_time] = times[0] i += 1 # try the next row if this one was not a title
data[:end_time] = times[1]
end end
data[:days] = details[2].text.strip
data[:location] = details[3].text.strip
dates = details[4].text.split(' - ')
data[:start_date] = dates[0]
data[:end_date] = dates[1]
data[:type] = details[5].text
data[:instructor] = details[6].text
data
end end
# puts rows[0].text
# puts rows[2].css('td table tr td')
# puts rows[5].text
# puts rows[7].css('td table tr td')
# puts rows[10].text result
# puts rows[12].css('td table tr td') end
# (0..(rows.length/3-1)).each do |i|
# start = i*3 # a title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
def is_title(text)
# puts rows[start].search('th').first.text elements = text.split(' - ')
# section_data = rows[start+2].css('td table.datadisplaytable').search('td') elements.length == 4 && elements[2].split(' ').length == 2
# puts section_data[1].text
# end
# puts rows[3]
# puts rows[3].search('th').first.text
# section_data = rows[5].css('td table.datadisplaytable').search('td')
# puts section_data[1].text
# end
# each section is represented by 6 rows in the table
# (0..(rows.length/6 - 1)).map do |i|
# start = i*6
# data = {}
# title = rows[start].text
# # the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
# # so split it by ' - ' and extract
# title_elements = title.split(' - ')
# next unless title_elements.length == 4
# data[:title] = title_elements[0].strip
# data[:crn] = title_elements[1]
# full_name = title_elements[2].split(' ')
# next unless full_name.length == 2
# data[:subj] = title_elements[2].split(' ')[0]
# data[:course_number] = title_elements[2].split(' ')[1]
# data[:section] = title_elements[3].strip
# # rows 1 to 3 contain info about registration and drop dates.
# # for now we're gonna ignore them and skip to row 4, which contains details
# detail_rows = rows[start+4].css('tr')
# next unless detail_rows.length > 0 # if there are no details, skip this item
# details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings
# times = details[1].split(' - ')
# if (times.length == 1)
# data[:start_time] = 'TBA'
# data[:end_time] = 'TBA'
# else
# data[:start_time] = times[0]
# data[:end_time] = times[1]
# end
# data[:days] = details[2].strip
# data[:location] = details[3].strip
# dates = details[4].split(' - ')
# data[:start_date] = dates[0]
# data[:end_date] = dates[1]
# data[:type] = details[5]
# data[:instructor] = details[6]
# data
# end
end end
end end
end end
...@@ -19,17 +19,17 @@ puts "DDOSing Patriot Web, buckle up kids" ...@@ -19,17 +19,17 @@ puts "DDOSing Patriot Web, buckle up kids"
# parse all subjects and their courses in the semester # parse all subjects and their courses in the semester
parser.parse_subjects(semester).each do |subject| parser.parse_subjects(semester).each do |subject|
puts "Getting courses for #{subject}" puts "Getting courses for #{subject}"
threads << Thread.new { # threads << Thread.new {
total[subject] = parser.parse_courses_in_subject(subject) total[subject] = parser.parse_courses_in_subject(subject)
} # }
end end
# For testing, only get first subject # For testing, only get first subject
# subject = parser.parse_subjects(semester).first # subject = parser.parse_subjects(semester)[20]
# total[subject] = parser.parse_courses_in_subject(subject) # total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish # wait for all the threads to finish
ThreadsWait.all_waits(*threads) # ThreadsWait.all_waits(*threads)
# delete everything in the current database # delete everything in the current database
Closure.delete_all Closure.delete_all
...@@ -44,7 +44,10 @@ semester.save! ...@@ -44,7 +44,10 @@ semester.save!
total.each do |subject, sections| total.each do |subject, sections|
puts "Adding courses for #{subject}..." puts "Adding courses for #{subject}..."
sections.each do |section| sections.each do |section|
next if section.nil? || !section.key?(:subj) || !section.key?(:course_number) if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
end
# Find or create a course and set its semester # Find or create a course and set its semester
# TODO: this breaks when you try to do more than one semester, # TODO: this breaks when you try to do more than one semester,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment