Commit dc751ea1 authored by Zac Wood's avatar Zac Wood
Browse files

monthly linting

parent eb721652
Pipeline #2825 passed with stage
in 2 minutes and 15 seconds
......@@ -52,3 +52,5 @@ gem 'icalendar'
gem 'nokogiri'
gem 'rack-cors', require: 'rack/cors'
gem 'rubyXL'
gem "rubocop", "~> 0.58.2"
......@@ -41,6 +41,7 @@ GEM
addressable (2.5.2)
public_suffix (>= 2.0.2, < 4.0)
arel (8.0.0)
ast (2.4.0)
bindex (0.5.0)
builder (3.2.3)
byebug (10.0.2)
......@@ -65,6 +66,7 @@ GEM
i18n (1.0.1)
concurrent-ruby (~> 1.0)
icalendar (2.4.1)
jaro_winkler (1.5.1)
jbuilder (2.7.0)
activesupport (>= 4.2.0)
multi_json (>= 1.2)
......@@ -86,6 +88,10 @@ GEM
nio4r (2.3.1)
nokogiri (1.8.3)
mini_portile2 (~> 2.3.0)
parallel (1.12.1)
parser (2.5.1.2)
ast (~> 2.4.0)
powerpack (0.1.2)
pry (0.11.3)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
......@@ -121,10 +127,20 @@ GEM
method_source
rake (>= 0.8.7)
thor (>= 0.18.1, < 2.0)
rainbow (3.0.0)
rake (12.3.1)
rb-fsevent (0.10.3)
rb-inotify (0.9.10)
ffi (>= 0.5.0, < 2)
rubocop (0.58.2)
jaro_winkler (~> 1.5.1)
parallel (~> 1.10)
parser (>= 2.5, != 2.5.1.1)
powerpack (~> 0.1)
rainbow (>= 2.2.2, < 4.0)
ruby-progressbar (~> 1.7)
unicode-display_width (~> 1.0, >= 1.0.1)
ruby-progressbar (1.10.0)
rubyXL (3.3.29)
nokogiri (>= 1.4.4)
rubyzip (>= 1.1.6)
......@@ -153,6 +169,7 @@ GEM
turbolinks-source (5.1.0)
tzinfo (1.2.5)
thread_safe (~> 0.1)
unicode-display_width (1.4.0)
web-console (3.6.2)
actionview (>= 5.0)
activemodel (>= 5.0)
......@@ -181,6 +198,7 @@ DEPENDENCIES
puma (~> 3.7)
rack-cors
rails (~> 5.1.6)
rubocop (~> 0.58.2)
rubyXL
selenium-webdriver
spring
......@@ -191,4 +209,4 @@ DEPENDENCIES
web-console (>= 3.3.0)
BUNDLED WITH
1.16.2
1.16.3
......@@ -19,7 +19,7 @@ module Schedules
config.middleware.insert_before 0, Rack::Cors do
allow do
origins '*'
resource '*', :headers => :any, :methods => [:get, :post, :options]
resource '*', headers: :any, methods: [:get, :post, :options]
end
end
end
......
......@@ -69,7 +69,8 @@ class ExcelLoader
section_name = row.cells[2]&.value
# If there is no valid section name, just continue to the next row
unless section_name.blank? || section_name == 'Total'
return nil if section_name.blank? || section_name == 'Total'
# The time field in the spreadsheet uses the format "start_time - end_time" i.e. "12:00 PM - 1:15 PM".
# So, split the times string by the - character
times = row.cells[23]&.value
......@@ -104,5 +105,4 @@ class ExcelLoader
section
end
end
end
# frozen_string_literal: true
require 'thwait'
require 'httparty'
require 'nokogiri'
require 'json'
#
# USAGE:
#
# Just run it and it dynamically dumps the latest semester. There's a bit to do it for all of the ones in history commented out below but it'll thrash your RAM and probably piss off PatriotWeb. Also note this script could be trivially modified to correlate human readable names to semester IDs since they're just the .text attribute of the option node.
#
# There's a few minor issues like multiple spaces in teacher names and we could be scraping out email addresses but no major ones.
#
# DISCLAIMER/WARNING:
#
# This opens a number of connections pretty transparently from a script to PatriotWeb. I am not liable if you run this a million times and somehow kill over PatriotWeb. It's a scraper, not a DoS utility.
#
# Credit stackoverflow
class String
def alpha?
!!match(/^[[:alpha:]]+$/)
end
end
def get_details(data, titledetails, titledata)
crn = titledetails[1].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip }
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip }
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip }
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1]
[data, data[crn]]
end
def sort_item(item, currentobj, data)
if item.name == 'th'
if item.to_html.include? '-'
titletxt = item.text
if item.text.include? ' - Honors'
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end
titledetails = titletxt.split(' - ')
if titledetails.count > 4
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]]
end
titledata = titledetails[2].split(' ')
begin
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def feed_course_info(searcher)
table = searcher.css('html body div.pagebodydiv table.datadisplaytable')
data = {}
currentobj = nil
table.css('table.datadisplaytable').first.children.each do |row|
next unless row.name == 'tr'
row.children.each do |item|
currentobj = sort_item(item, currentobj, data)
end
end
data
end
def get_crn(title, code, section)
puts "TODO #{title} #{code} #{section}"
end
def full_major(major)
resp = HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec',
body: "term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=#{major}&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
searcher = Nokogiri::HTML(resp)
data = feed_course_info(searcher)
end
def initialize_req(subj, num)
base_url = 'https://patriotweb.gmu.edu/pls/prod/bwckctlg.p_disp_listcrse?term_in=201870'
stub = "subj_in=#{subj}&crse_in=#{num}&schd_in=%25"
resp = HTTParty.get("#{base_url}&#{stub}")
searcher = Nokogiri::HTML(resp)
data = feed_course_info(searcher)
end
def getSemesters
semesters = []
resp = HTTParty.get('https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched')
searcher = Nokogiri::HTML(resp)
searcher.css('option').each do |opt|
if opt.attr('value').start_with? '20'
semesters.push(opt.attr('value'))
end
end
semesters
end
def getCourses(semester)
semesters = []
resp = HTTParty.post('https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date',
body: "p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=#{semester}&p_by_date=Y&p_from_date=&p_to_date=",
headers: {
'Content-Type' => 'application/x-www-form-urlencoded',
'charset' => 'utf-8'
})
searcher = Nokogiri::HTML(resp)
# puts searcher.inspect
searcher.xpath('//*[@id="subj_id"]/option').each do |opt|
if opt.attr('value').strip.alpha?
semesters.push(opt.attr('value'))
end
end
semesters
end
# end
# total.each { |subject|
# puts subject.first
# subject[1].each { |section|
# puts section
# }
# }
def load_data
# Initialize threads to be waited on array
threads = []
total = {}
# below will get you literally all semesters which is wildly overkill
# getSemesters.each do |semester|
semester = getSemesters.first
getCourses(semester).each do |course|
threads << Thread.new {
total[course] = full_major(course)
}
end
ThreadsWait.all_waits(*threads)
Semester.delete_all
Course.delete_all
Section.delete_all
semester = Semester.create! season: 'Fall', year: '2018'
semester.save!
total.each { |subject|
subject[1].each { |crn|
section = crn[1]
course = Course.find_or_create_by(subject: section[:subj],
course_number: section[:code])
course.semester = semester
course.save!
section_name = "#{section[:subj]} #{section[:code]} #{section[:sect]}"
Section.create!(name: section_name,
crn: section[:crn],
title: section[:name],
course: course)
puts "#{section[:subj]} #{section[:code]} #{section[:sect]} #{section[:name]}"
}
}
end
......@@ -66,7 +66,7 @@ module PatriotWeb
# Parse all courses from the subject search page
# @param document [Nokogiri::HTML::Document]
# @return [Array] courses
def get_courses(document, subject)
def get_courses(document, _subject)
table = document.css('html body div.pagebodydiv table.datadisplaytable')
rows = table.css('tr')
# rows[100..110].each_with_index do |row, i|
......@@ -78,11 +78,10 @@ module PatriotWeb
def data_from(rows)
i = 0
title_index = 0
result = []
while i < rows.length
if is_title(rows[i].text) # check if the row is a title
if title?(rows[i].text) # check if the row is a title
data = {}
title_elements = rows[i].text.split(' - ')
......@@ -94,15 +93,15 @@ module PatriotWeb
data[:course_number] = full_name[1]
data[:section] = title_elements[3].strip
details = rows[i+2].css('td table tr td')
unless details.length > 0
details = rows[i + 2].css('td table tr td')
unless !details.empty?
puts "#{full_name.join(' ')} is fake news"
i += 1
next
end
times = details[1].text.split(' - ')
if (times.length == 1)
if times.length == 1
data[:start_time] = 'TBA'
data[:end_time] = 'TBA'
else
......@@ -131,7 +130,7 @@ module PatriotWeb
end
# a title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
def is_title(text)
def title?(text)
elements = text.split(' - ')
elements.length == 4 && elements[2].split(' ').length == 2
end
......
......@@ -9,5 +9,3 @@ Semester.delete_all
loader = ExcelLoader.new 'db/data/fall2018.xlsx'
loader.load_data
......@@ -7,7 +7,7 @@ require 'httparty'
require 'nokogiri'
require 'json'
threads = []
# threads = []
total = {}
parser = PatriotWeb::Parser.new
......@@ -74,7 +74,6 @@ total.each do |subject, sections|
end_time: section[:end_time],
location: section[:location],
course: course)
end
end
......
......@@ -41,5 +41,4 @@ class CoursesControllerTest < ActionDispatch::IntegrationTest
assert_equal cs_112_sections.count, sections_returned.count
end
end
......@@ -8,10 +8,8 @@ class SchedulesControllertest < ActionDispatch::IntegrationTest
# DTSTAMP and UID lines uniquely identify events, so we can't test against them.
# so remove all the lines starting with them.
# the \r characters are also annoying so just remove them too
gen = @response.body.split("\n").select {|line| !line.include?("DTSTAMP") && !line.include?("UID")}.join("\n").gsub(/\r/, "")
correct_ical = File.open("test/test.ics").read.gsub(/\r/, "")
gen = @response.body.split("\n").select { |line| !line.include?("DTSTAMP") && !line.include?("UID") }.join("\n").delete("\r")
correct_ical = File.open("test/test.ics").read.delete("\r")
assert_equal correct_ical, gen
end
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment