Commit bcefa699 authored by Zac Wood's avatar Zac Wood

Prepare for merge

parent 56a9d97c
source 'https://rubygems.org'
gem 'httparty'
gem 'nokogiri'
GEM
remote: https://rubygems.org/
specs:
httparty (0.16.2)
multi_xml (>= 0.5.2)
mini_portile2 (2.3.0)
multi_xml (0.6.0)
nokogiri (1.8.2)
mini_portile2 (~> 2.3.0)
PLATFORMS
ruby
DEPENDENCIES
httparty
nokogiri
BUNDLED WITH
1.16.1
# PatriotWeb Parser
Known working as of 2:46PM April 12th.
### Usage
Just: `gem install bundler`, `bundle install` and `ruby parse_patriot_web.rb`
Requires nokogiri which may require actual packages as it's a native extension.
## Holding Pattern
Presently for this script we are in a holding pattern since they plan on changing the PatriotWeb backend to CAS April 15th. Should they choose to change the endpoints we hit to authenticated, we're pretty screwed, so there's no reason to flesh out a full service before then.
## Future Plans
In the future, this should write to a persistent region like JSON or MySQL to be the backend of an API for Schedules and any other services.
### Enhancements to Make Soon:
- Strip multiple (regex?) spaces from teacher names
- Rubocop complaint
- Make it write to an actual persistent database storage engine
# frozen_string_literal: true
require 'thwait'
require 'httparty'
require 'nokogiri'
require 'json'
#
# USAGE:
#
# Just run it and it dynamically dumps the latest semester. There's a bit to do it for all of the ones in history commented out below but it'll thrash your RAM and probably piss off PatriotWeb. Also note this script could be trivially modified to correlate human readable names to semester IDs since they're just the .text attribute of the option node.
#
# There's a few minor issues like multiple spaces in teacher names and we could be scraping out email addresses but no major ones.
#
# DISCLAIMER/WARNING:
#
# This opens a number of connections pretty transparently from a script to PatriotWeb. I am not liable if you run this a million times and somehow kill over PatriotWeb. It's a scraper, not a DoS utility.
#
# Credit stackoverflow
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def get_crn(title, code, section)
puts "TODO #{title} #{code} #{section}"
end
def full_major(major)
resp = HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec',
body: "term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=#{major}&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
searcher = Nokogiri::HTML(resp)
data = feed_course_info(searcher)
end
def initialize_req(subj, num)
base_url = 'https://patriotweb.gmu.edu/pls/prod/bwckctlg.p_disp_listcrse?term_in=201870'
stub = "subj_in=#{subj}&crse_in=#{num}&schd_in=%25"
resp = HTTParty.get("#{base_url}&#{stub}")
searcher = Nokogiri::HTML(resp)
data = feed_course_info(searcher)
end
def getSemesters
semesters = []
resp = HTTParty.get('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched')
searcher = Nokogiri::HTML(resp)
searcher.css('option').each do |opt|
if opt.attr('value').start_with? '20'
semesters.push(opt.attr('value'))
end
end
semesters
end
def getCourses(semester)
semesters = []
resp = HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date',
body: "p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=#{semester}&p_by_date=Y&p_from_date=&p_to_date=",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
searcher = Nokogiri::HTML(resp)
# puts searcher.inspect
searcher.xpath('//*[@id="subj_id"]/option').each do |opt|
if opt.attr('value').strip.alpha?
semesters.push(opt.attr('value'))
end
end
semesters
end
# Initialize threads to be waited on array
threads = []
total = {}
# below will get you literally all semesters which is wildly overkill
# getSemesters.each do |semester|
semester = getSemesters.first
getCourses(semester).each do |course|
threads << Thread.new {
total[course] = full_major(course)
}
end
# end
ThreadsWait.all_waits(*threads)
# total.each { |subject|
# puts subject.first
# subject[1].each { |section|
# puts section
# }
# }
total.each { |subject|
subject[1].each { |crn|
section = crn[1]
puts section
# puts "#{section[:subj]} #{section[:code]} #{section[:sect]} #{section[:name]}"
}
}
require './patriot_web_networker.rb'
require 'nokogiri'
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
module PatriotWeb
class Parser
def initialize
@networker = PatriotWeb::Networker.new
end
def parse_semesters
response = @networker.fetch_page_containing_semester_data
searcher = Nokogiri::HTML(response)
get_semesters_from_option_values(searcher)
end
def parse_subjects(semester_id)
response = @networker.fetch_subjects(semester_id)
searcher = Nokogiri::HTML(response)
get_alpha_option_values(searcher)
end
def parse_courses_in_subject(subject)
resp = @networker.fetch_courses_in_subject(subject)
searcher = Nokogiri::HTML(resp)
feed_course_info(searcher)
end
private
def get_alpha_option_values(searcher)
searcher.xpath('//*[@id="subj_id"]/option').map do |opt|
if opt.attr('value').strip.alpha?
opt.attr('value')
end
end
end
def get_semesters_from_option_values(searcher)
searcher.css('option').map do |opt|
if opt.attr('value').start_with? '20'
opt.attr('value')
end
end
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
end
end
parser = PatriotWeb::Parser.new
semesters = parser.parse_semesters.compact
puts semesters.first
subjects = parser.parse_subjects(semesters.first)
puts parser.parse_courses_in_subject(subjects.first)
inherit_from:
- .rubocop_autogen.yml # Some auto-generated Rails code we shouldn't worry about
- .rubocop_relaxed.yml
AllCops:
TargetRubyVersion: 2.5.0
Style/FrozenStringLiteralComment:
Enabled: false
Style/SymbolArray:
EnforcedStyle: brackets # [:symbol1, :symbol2]
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2018-04-09 23:19:33 -0400 using RuboCop version 0.54.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.
# Offense count: 2
# Cop supports --auto-correct.
# Configuration parameters: EnforcedStyle.
# SupportedStyles: empty_lines, no_empty_lines
Layout/EmptyLinesAroundBlockBody:
Exclude:
- 'db/schema.rb'
# Offense count: 2
# Cop supports --auto-correct.
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBrackets.
# SupportedStyles: space, no_space, compact
# SupportedStylesForEmptyBrackets: space, no_space
Layout/SpaceInsideArrayLiteralBrackets:
Exclude:
- 'config/environments/production.rb'
# Offense count: 1
# Configuration parameters: CountComments, ExcludedMethods.
# Metrics/BlockLength:
# Max: 35
Metrics/MethodLength:
Exclude:
- 'db/migrate/*'
Metrics/BlockLength:
Exclude:
- 'db/schema.rb'
# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: AutoCorrect, EnforcedStyle.
# SupportedStyles: nested, compact
Style/ClassAndModuleChildren:
Exclude:
- 'test/test_helper.rb'
# Offense count: 12
Style/Documentation:
Exclude:
- 'spec/**/*'
- 'test/**/*'
- 'app/helpers/*'
- 'app/mailers/application_mailer.rb'
- 'app/models/application_record.rb'
- 'config/application.rb'
- 'db/migrate/20180407190422_create_semesters.rb'
- 'db/migrate/20180407190502_create_courses.rb'
- 'db/migrate/20180407190750_create_sections.rb'
# Offense count: 4
# Cop supports --auto-correct.
Style/ExpandPathArguments:
Exclude:
- 'bin/bundle'
- 'bin/setup'
- 'bin/update'
- 'test/test_helper.rb'
# Offense count: 2
Style/MixinUsage:
Exclude:
- 'bin/setup'
- 'bin/update'
# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: Strict.
Style/NumericLiterals:
MinDigits: 15
# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: PreferredDelimiters.
Style/PercentLiteralDelimiters:
Exclude:
- 'config/spring.rb'
# Offense count: 2
# Cop supports --auto-correct.
Style/StderrPuts:
Exclude:
- 'bin/yarn'
# Offense count: 47
# Cop supports --auto-correct.
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
# SupportedStyles: single_quotes, double_quotes
Style/StringLiterals:
Exclude:
- 'Gemfile'
- 'bin/yarn'
- 'config/environments/production.rb'
- 'config/puma.rb'
- 'db/schema.rb'
- 'test/application_system_test_case.rb'
Rails/FilePath:
Exclude:
- "config/environments/development.rb" # auto generated file
# Relaxed.Ruby.Style
## Version 2.2
Style/Alias:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylealias
Style/AsciiComments:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleasciicomments
Style/BeginBlock:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylebeginblock
Style/BlockDelimiters:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleblockdelimiters
Style/CommentAnnotation:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylecommentannotation
Style/Documentation:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styledocumentation
Layout/DotPosition:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#layoutdotposition
Style/DoubleNegation:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styledoublenegation
Style/EndBlock:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleendblock
Style/FormatString:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleformatstring
Style/IfUnlessModifier:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleifunlessmodifier
Style/Lambda:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylelambda
Style/ModuleFunction:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylemodulefunction
Style/MultilineBlockChain:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylemultilineblockchain
Style/NegatedIf:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylenegatedif
Style/NegatedWhile:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylenegatedwhile
Style/ParallelAssignment:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleparallelassignment
Style/PercentLiteralDelimiters:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylepercentliteraldelimiters
Style/PerlBackrefs:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleperlbackrefs
Style/Semicolon:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylesemicolon
Style/SignalException:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylesignalexception
Style/SingleLineBlockParams:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylesinglelineblockparams
Style/SingleLineMethods:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylesinglelinemethods
Layout/SpaceBeforeBlockBraces:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#layoutspacebeforeblockbraces
Layout/SpaceInsideParens:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#layoutspaceinsideparens
Style/SpecialGlobalVars:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylespecialglobalvars
Style/StringLiterals:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylestringliterals
Style/TrailingCommaInArguments:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styletrailingcommainarguments
Style/TrailingCommaInArrayLiteral:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styletrailingcommainarrayliteral
Style/TrailingCommaInHashLiteral:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styletrailingcommainhashliteral
Style/WhileUntilModifier:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylewhileuntilmodifier
Style/WordArray:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylewordarray
Lint/AmbiguousRegexpLiteral:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#lintambiguousregexpliteral
Lint/AssignmentInCondition:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#lintassignmentincondition
Metrics/AbcSize:
Enabled: false
Metrics/BlockNesting:
Enabled: false
Metrics/ClassLength:
Enabled: false
Metrics/ModuleLength:
Enabled: false
Metrics/CyclomaticComplexity:
Enabled: false
Metrics/LineLength:
Enabled: false
Metrics/MethodLength:
Enabled: false
Metrics/ParameterLists:
Enabled: false
Metrics/PerceivedComplexity:
Enabled: false
Style/Alias:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylealias
Style/AsciiComments:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleasciicomments
Style/BeginBlock:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylebeginblock
Style/BlockDelimiters:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleblockdelimiters
Style/CommentAnnotation:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#stylecommentannotation
Style/Documentation:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styledocumentation
Layout/DotPosition:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#layoutdotposition
Style/DoubleNegation:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styledoublenegation
Style/EndBlock:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleendblock
Style/FormatString:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleformatstring
Style/IfUnlessModifier:
Enabled: false
StyleGuide: http://relaxed.ruby.style/#styleifunlessmodifier