Commit 31de7866 authored by Zac Wood's avatar Zac Wood

Merge branch 'parser' into 4-ical-gen

parents e1f43351 bcefa699
.vscode
\ No newline at end of file
......@@ -52,6 +52,8 @@ end
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
gem 'httparty'
gem 'nokogiri'
gem 'rubyXL'
gem 'icalendar'
......@@ -61,6 +61,8 @@ GEM
ffi (1.9.23)
globalid (0.4.1)
activesupport (>= 4.2.0)
httparty (0.16.2)
multi_xml (>= 0.5.2)
i18n (1.0.0)
concurrent-ruby (~> 1.0)
icalendar (2.4.1)
......@@ -81,6 +83,7 @@ GEM
mini_portile2 (2.3.0)
minitest (5.11.3)
multi_json (1.13.1)
multi_xml (0.6.0)
nio4r (2.3.0)
nokogiri (1.8.2)
mini_portile2 (~> 2.3.0)
......@@ -182,11 +185,18 @@ PLATFORMS
DEPENDENCIES
byebug
capybara (~> 2.13)
<<<<<<< HEAD:schedules_api/Gemfile.lock
icalendar
jbuilder (~> 2.5)
listen (>= 3.0.5, < 3.2)
pry
pry-doc
=======
httparty
jbuilder (~> 2.5)
listen (>= 3.0.5, < 3.2)
nokogiri
>>>>>>> parser:schedules_api/Gemfile.lock
puma (~> 3.7)
rails (~> 5.1.6)
rubyXL
......
......@@ -11,8 +11,8 @@ class Section < ApplicationRecord
# Unsure if necessary
# validates :section_type, presence: true
validates :title, presence: true
validates :start_date, presence: true
validates :end_date, presence: true
validates :days, presence: true
# validates :start_date, presence: true
# validates :end_date, presence: true
# validates :days, presence: true
validates :course_id, presence: true
end
<h1>Schedules</h1>
<input type="text" id="search" title="search_text" placeholder="Enter CRN..."/>
<button title="search" onclick="search()">Search</button>
<!-- <h2>Search results</h2>
<table id="searchTable">
<tr>
<th>Course</th>
<th>Section Name</th>
<th>CRN</th>
<th>Professor</th>
<th>Location</th>
<th>Days</th>
<th>Times</th>
</tr>
</table>
-->
<!-- <br><br> -->
<input type="text" title="search_text" placeholder="Enter CRN..."/>
<button title="search">Search</button>
<h2>Your classes</h2>
<table id="scheduleTable">
<table>
<tr>
<th>Course</th>
<th>Section Name</th>
......
# frozen_string_literal: true
require 'thwait'
require 'httparty'
require 'nokogiri'
require 'json'
#
# USAGE:
#
# Just run it and it dynamically dumps the latest semester. There's a bit to do it for all of the ones in history commented out below but it'll thrash your RAM and probably piss off PatriotWeb. Also note this script could be trivially modified to correlate human readable names to semester IDs since they're just the .text attribute of the option node.
#
# There's a few minor issues like multiple spaces in teacher names and we could be scraping out email addresses but no major ones.
#
# DISCLAIMER/WARNING:
#
# This opens a number of connections pretty transparently from a script to PatriotWeb. I am not liable if you run this a million times and somehow kill over PatriotWeb. It's a scraper, not a DoS utility.
#
# Credit stackoverflow
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def get_crn(title, code, section)
puts "TODO #{title} #{code} #{section}"
end
def full_major(major)
resp = HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec',
body: "term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=#{major}&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
searcher = Nokogiri::HTML(resp)
data = feed_course_info(searcher)
end
def initialize_req(subj, num)
base_url = 'https://patriotweb.gmu.edu/pls/prod/bwckctlg.p_disp_listcrse?term_in=201870'
stub = "subj_in=#{subj}&crse_in=#{num}&schd_in=%25"
resp = HTTParty.get("#{base_url}&#{stub}")
searcher = Nokogiri::HTML(resp)
data = feed_course_info(searcher)
end
def getSemesters
semesters = []
resp = HTTParty.get('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched')
searcher = Nokogiri::HTML(resp)
searcher.css('option').each do |opt|
if opt.attr('value').start_with? '20'
semesters.push(opt.attr('value'))
end
end
semesters
end
def getCourses(semester)
semesters = []
resp = HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date',
body: "p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=#{semester}&p_by_date=Y&p_from_date=&p_to_date=",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
searcher = Nokogiri::HTML(resp)
# puts searcher.inspect
searcher.xpath('//*[@id="subj_id"]/option').each do |opt|
if opt.attr('value').strip.alpha?
semesters.push(opt.attr('value'))
end
end
semesters
end
# end
# total.each { |subject|
# puts subject.first
# subject[1].each { |section|
# puts section
# }
# }
def load_data
# Initialize threads to be waited on array
threads = []
total = {}
# below will get you literally all semesters which is wildly overkill
# getSemesters.each do |semester|
semester = getSemesters.first
getCourses(semester).each do |course|
threads << Thread.new {
total[course] = full_major(course)
}
end
ThreadsWait.all_waits(*threads)
Semester.delete_all
Course.delete_all
Section.delete_all
semester = Semester.create! season: 'Fall', year: '2018'
semester.save!
total.each { |subject|
subject[1].each { |crn|
section = crn[1]
course = Course.find_or_create_by(subject: section[:subj],
course_number: section[:code])
course.semester = semester
course.save!
section_name = "#{section[:subj]} #{section[:code]} #{section[:sect]}"
Section.create!(name: section_name,
crn: section[:crn],
title: section[:name],
course: course)
puts "#{section[:subj]} #{section[:code]} #{section[:sect]} #{section[:name]}"
}
}
end
require 'httparty'
module PatriotWeb
class Networker
def fetch_page_containing_semester_data
HTTParty.get('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched')
end
def fetch_subjects(semester_id)
HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date',
body: "p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=#{semester_id}&p_by_date=Y&p_from_date=&p_to_date=",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
end
def fetch_courses_in_subject(subject)
HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec',
body: "term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=#{subject}&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
end
end
end
require_relative 'patriot_web_networker'
require 'nokogiri'
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
module PatriotWeb
class Parser
def initialize
@networker = PatriotWeb::Networker.new
end
def parse_semesters
response = @networker.fetch_page_containing_semester_data
searcher = Nokogiri::HTML(response)
get_semesters_from_option_values(searcher).compact
end
def parse_subjects(semester_id)
response = @networker.fetch_subjects(semester_id)
searcher = Nokogiri::HTML(response)
get_alpha_option_values(searcher)
end
def parse_courses_in_subject(subject)
resp = @networker.fetch_courses_in_subject(subject)
searcher = Nokogiri::HTML(resp)
feed_course_info(searcher)
end
private
def get_alpha_option_values(searcher)
searcher.xpath('//*[@id="subj_id"]/option').map do |opt|
if opt.attr('value').strip.alpha?
opt.attr('value')
end
end
end
def get_semesters_from_option_values(searcher)
searcher.css('option').map do |opt|
if opt.attr('value').start_with? '20'
opt.attr('value')
end
end
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
end
end
require 'test_helper'
class HomeControllerTest < ActionDispatch::IntegrationTest
# test 'should get index' do
# get url_for controller: 'home', action: 'index'
# assert_response :success
# end
test 'should get index' do
get home_index_url
assert_response :success
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment