Commit 52c31168 authored by Zac Wood's avatar Zac Wood

Small refactor of patriot web parser, moved to seeds

parent 683bbe4d
.vscode
\ No newline at end of file
require 'httparty'
module PatriotWeb
class Networker
def fetch_page_containing_semester_data
HTTParty.get('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched')
end
def fetch_subjects(semester_id)
HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date',
body: "p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=#{semester_id}&p_by_date=Y&p_from_date=&p_to_date=",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
end
def fetch_courses_in_subject(subject)
HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec',
body: "term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=#{subject}&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
end
end
end
require './patriot_web_networker.rb'
require 'nokogiri'
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
module PatriotWeb
class Parser
def initialize
@networker = PatriotWeb::Networker.new
end
def parse_semesters
response = @networker.fetch_page_containing_semester_data
searcher = Nokogiri::HTML(response)
get_semesters_from_option_values(searcher)
end
def parse_subjects(semester_id)
response = @networker.fetch_subjects(semester_id)
searcher = Nokogiri::HTML(response)
get_alpha_option_values(searcher)
end
def parse_courses_in_subject(subject)
resp = @networker.fetch_courses_in_subject(subject)
searcher = Nokogiri::HTML(resp)
feed_course_info(searcher)
end
private
def get_alpha_option_values(searcher)
searcher.xpath('//*[@id="subj_id"]/option').map do |opt|
if opt.attr('value').strip.alpha?
opt.attr('value')
end
end
end
def get_semesters_from_option_values(searcher)
searcher.css('option').map do |opt|
if opt.attr('value').start_with? '20'
opt.attr('value')
end
end
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
end
end
parser = PatriotWeb::Parser.new
semesters = parser.parse_semesters.compact
puts semesters.first
subjects = parser.parse_subjects(semesters.first)
puts parser.parse_courses_in_subject(subjects.first)
require 'httparty'
module PatriotWeb
class Networker
def fetch_page_containing_semester_data
HTTParty.get('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched')
end
def fetch_subjects(semester_id)
HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date',
body: "p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=#{semester_id}&p_by_date=Y&p_from_date=&p_to_date=",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
end
def fetch_courses_in_subject(subject)
HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec',
body: "term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=#{subject}&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
end
end
end
require_relative 'patriot_web_networker'
require 'nokogiri'
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
module PatriotWeb
class Parser
def initialize
@networker = PatriotWeb::Networker.new
end
def parse_semesters
response = @networker.fetch_page_containing_semester_data
searcher = Nokogiri::HTML(response)
get_semesters_from_option_values(searcher).compact
end
def parse_subjects(semester_id)
response = @networker.fetch_subjects(semester_id)
searcher = Nokogiri::HTML(response)
get_alpha_option_values(searcher)
end
def parse_courses_in_subject(subject)
resp = @networker.fetch_courses_in_subject(subject)
searcher = Nokogiri::HTML(resp)
feed_course_info(searcher)
end
private
def get_alpha_option_values(searcher)
searcher.xpath('//*[@id="subj_id"]/option').map do |opt|
if opt.attr('value').strip.alpha?
opt.attr('value')
end
end
end
def get_semesters_from_option_values(searcher)
searcher.css('option').map do |opt|
if opt.attr('value').start_with? '20'
opt.attr('value')
end
end
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
end
end
# This file should contain all the record creation needed to seed the database with its default values.
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
#
# Examples:
#
# movies = Movie.create([{ name: 'Star Wars' }, { name: 'Lord of the Rings' }])
# Character.create(name: 'Luke', movie: movies.first)
require 'rubyXL'
# require_relative 'excel_loader'
require_relative 'parse_patriot_web'
# loader = if Rails.env.test?
# ExcelLoader.new 'db/data/testdata.xlsx'
# else
# ExcelLoader.new 'db/data/allsections.xlsx'
# end
# loader.load_data
load_data
require_relative 'patriot_web_parser'
require 'thwait'
require 'httparty'
require 'nokogiri'
require 'json'
threads = []
total = []
parser = PatriotWeb::Parser.new
semester = parser.parse_semesters.first
parser.parse_subjects(semester).each do |subject|
threads << Thread.new {
total << parser.parse_courses_in_subject(subject)
}
end
# For testing, only get first subject
# subject = parser.parse_subjects(semester).first
# total << parser.parse_courses_in_subject(subject)
ThreadsWait.all_waits(*threads)
Section.delete_all
Course.delete_all
Semester.delete_all
semester = Semester.create! season: 'Fall', year: 2018
semester.save!
total.each do |subject|
subject.each_value do |section|
next unless (section.has_key? "date_range") && (section.has_key? "instructors")
course = Course.find_or_create_by(subject: section[:subj],
course_number: section[:code])
course.semester = semester
course.save!
section_name = "#{section[:subj]} #{section[:code]} #{section[:sect]} #{section[:name]}"
puts "Adding #{section_name}..."
start_time = if section.has_key? "time"
section["time"].split(' - ').first
else
"N/A"
end
end_time = if section.has_key? "time"
section["time"].split(' - ').last
else
"N/A"
end
Section.create!(name: section_name,
crn: section[:crn],
title: section[:name],
location: section["where"],
start_date: section["date_range"].split(' - ').first,
end_date: section["date_range"].split(' - ').last,
start_time: start_time,
end_time: end_time,
instructor: section["instructors"].split(' ').map { |word| word unless word.empty? }.join(' '),
course: course)
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment