Commit b5bd4381 authored by Zac Wood's avatar Zac Wood
Browse files

Merge branch 'api-cleanup' into 2-pretty-馃拝

parents a5565466 7ac3f9f2
...@@ -2,118 +2,118 @@ require_relative 'patriot_web_networker' ...@@ -2,118 +2,118 @@ require_relative 'patriot_web_networker'
require 'nokogiri' require 'nokogiri'
class String class String
# Checks if a String is a alphanumeric
def alpha? def alpha?
!!match(/^[[:alpha:]]+$/) !!match(/^[[:alpha:]]+$/)
end end
end end
module PatriotWeb module PatriotWeb
# Contains methods for parsing data retrieved from Patriot Web
class Parser class Parser
def initialize def initialize
@networker = PatriotWeb::Networker.new @networker = PatriotWeb::Networker.new
end end
# Parses all semesters avaliable on Patriot Web
def parse_semesters def parse_semesters
response = @networker.fetch_page_containing_semester_data response = @networker.fetch_page_containing_semester_data
searcher = Nokogiri::HTML(response) document = Nokogiri::HTML(response) # parse the document from the HTTP response
get_semesters_from_option_values(searcher).compact get_semesters_from_option_values(document).compact
end end
# Parses subjects belonging to a given semester id
# @param semester_id [Integer]
def parse_subjects(semester_id) def parse_subjects(semester_id)
response = @networker.fetch_subjects(semester_id) response = @networker.fetch_subjects(semester_id)
searcher = Nokogiri::HTML(response) document = Nokogiri::HTML(response)
get_subject_codes_from_option_values(document)
get_alpha_option_values(searcher)
end end
# Parses all courses belonging to a given subject
# @param subject [String]
def parse_courses_in_subject(subject) def parse_courses_in_subject(subject)
resp = @networker.fetch_courses_in_subject(subject) response = @networker.fetch_courses_in_subject(subject)
searcher = Nokogiri::HTML(resp) document = Nokogiri::HTML(response)
feed_course_info(searcher) get_courses(document)
end end
private private
def get_alpha_option_values(searcher) # Parse the values of all different options on the Patriot Web
searcher.xpath('//*[@id="subj_id"]/option').map do |opt| # semester select page
if opt.attr('value').strip.alpha? # @param document [Nokogiri::HTML::Document]
opt.attr('value') def get_semesters_from_option_values(document)
document.css('option').map do |opt| # for each option value
if opt.attr('value').start_with? '20' # ensure it is a semester value
opt.attr('value') # return the value
end end
end end
end end
def get_semesters_from_option_values(searcher) # Parse all subject codes from the select element on the Patriot Web
searcher.css('option').map do |opt| # subject select page
if opt.attr('value').start_with? '20' # @param document [Nokogiri::HTML::Document]
opt.attr('value') def get_subject_codes_from_option_values(document)
document.xpath('//*[@id="subj_id"]/option').map do |opt| # for each option value under "subj_id"
if opt.attr('value').strip.alpha? # if the value is alphanumeric
opt.attr('value') # return the value
end end
end end
end end
def feed_course_info(searcher) # Parse all courses from the subject search page
table = searcher.css('html body div.pagebodydiv table.datadisplaytable') # @param document [Nokogiri::HTML::Document]
data = {} # @return [Array] courses
currentobj = nil def get_courses(document)
table.css('table.datadisplaytable').first.children.each do |row| table = document.css('html body div.pagebodydiv table.datadisplaytable').first
next unless row.name == 'tr' rows = table.children.drop 2 # first two elements are junk
row.children.each do |item|
currentobj = sort_item(item, currentobj, data) # each section is represented by 6 rows in the table
end (0..(rows.length/6 - 1)).map do |i|
end start = i*6
data data = {}
end title = rows[start].text
# the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
# so split it by ' - ' and extract
title_elements = title.split(' - ')
data[:title] = title_elements[0].strip
data[:crn] = title_elements[1]
full_name = title_elements[2].split(' ')
next unless full_name.length == 2
data[:subj] = title_elements[2].split(' ')[0]
data[:course_number] = title_elements[2].split(' ')[1]
data[:section] = title_elements[3].strip
def sort_item(item, currentobj, data) # rows 1 to 3 contain info about registration and drop dates.
if item.name == 'th' # for now we're gonna ignore them and skip to row 4, which contains details
if item.to_html.include? '-' detail_rows = rows[start+4].css('tr')
titletxt = item.text next unless detail_rows.length > 0 # if there are no details, skip this item
if item.text.include? ' - Honors' details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings
titletxt = titletxt.gsub(' - Honors', ' (Honors)')
end times = details[1].split(' - ')
titledetails = titletxt.split(' - ') if (times.length == 1)
if titledetails.count > 4 data[:start_time] = 'TBA'
titledetails = ["#{titledetails[0]} #{titledetails[1]}", titledetails[2], titledetails[3], titledetails[4]] data[:end_time] = 'TBA'
end else
titledata = titledetails[2].split(' ') data[:start_time] = times[0]
begin data[:end_time] = times[1]
data = get_details(data, titledetails, titledata)[0]
currentobj = get_details(data, titledetails, titledata)[1]
rescue StandardError => e
puts item
puts e
exit(1)
end
currentobj[:fields] = []
end end
elsif item.is_a? Nokogiri::XML::Element
item.css('th').each do |field|
currentobj[:fields].push(field.text.downcase.tr(' ', '_'))
end
iter = 0
if currentobj
if currentobj[:fields]
upper = currentobj[:fields].count - 1
while iter <= upper
assign = item.css('td')[iter].text
currentobj[currentobj[:fields][iter]] = assign
iter += 1
end
end
end
end
currentobj
end
def get_details(data, titledetails, titledata) data[:days] = details[2].strip
crn = titledetails[1].strip data[:location] = details[3].strip
data[crn] = {} unless data[titledetails[1]]
crsinfo = { 'name': titledetails[0].strip } dates = details[4].split(' - ')
uniquedata = { 'sect': titledetails[3].strip, 'crn': titledetails[1].strip } data[:start_date] = dates[0]
general = { 'subj': titledata[0].strip, 'code': titledata[1].strip } data[:end_date] = dates[1]
data[crn] = general.merge(uniquedata.merge(crsinfo))
data[crn][:code] = titledetails[2].split(' ')[1] data[:type] = details[5]
[data, data[crn]] data[:instructor] = details[6]
data
end
end end
end end
end end
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180505195736) do ActiveRecord::Schema.define(version: 20180619011649) do
create_table "closures", force: :cascade do |t| create_table "closures", force: :cascade do |t|
t.date "date" t.date "date"
...@@ -20,16 +20,7 @@ ActiveRecord::Schema.define(version: 20180505195736) do ...@@ -20,16 +20,7 @@ ActiveRecord::Schema.define(version: 20180505195736) do
t.index ["semester_id"], name: "index_closures_on_semester_id" t.index ["semester_id"], name: "index_closures_on_semester_id"
end end
create_table "courses", force: :cascade do |t| create_table "course_sections", force: :cascade do |t|
t.string "subject"
t.string "course_number"
t.integer "semester_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["semester_id"], name: "index_courses_on_semester_id"
end
create_table "sections", force: :cascade do |t|
t.string "name" t.string "name"
t.string "crn" t.string "crn"
t.string "section_type" t.string "section_type"
...@@ -48,7 +39,16 @@ ActiveRecord::Schema.define(version: 20180505195736) do ...@@ -48,7 +39,16 @@ ActiveRecord::Schema.define(version: 20180505195736) do
t.integer "course_id" t.integer "course_id"
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.index ["course_id"], name: "index_sections_on_course_id" t.index ["course_id"], name: "index_course_sections_on_course_id"
end
create_table "courses", force: :cascade do |t|
t.string "subject"
t.string "course_number"
t.integer "semester_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["semester_id"], name: "index_courses_on_semester_id"
end end
create_table "semesters", force: :cascade do |t| create_table "semesters", force: :cascade do |t|
......
# This file should contain all the record creation needed to seed the database with its default values. # This file should contain all the record creation needed to seed the database with its default values.
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup). # The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
#
# Examples:
#
# movies = Movie.create([{ name: 'Star Wars' }, { name: 'Lord of the Rings' }])
# Character.create(name: 'Luke', movie: movies.first)
require_relative 'patriot_web_parser' require_relative 'patriot_web_parser'
require 'thwait' require 'thwait'
...@@ -13,69 +8,75 @@ require 'nokogiri' ...@@ -13,69 +8,75 @@ require 'nokogiri'
require 'json' require 'json'
threads = [] threads = []
total = [] total = {}
parser = PatriotWeb::Parser.new parser = PatriotWeb::Parser.new
# get the first semester only
semester = parser.parse_semesters.first semester = parser.parse_semesters.first
puts "DDOSing Patriot Web, buckle up kids"
# parse all subjects and their courses in the semester
parser.parse_subjects(semester).each do |subject| parser.parse_subjects(semester).each do |subject|
puts "Getting courses for #{subject}"
threads << Thread.new { threads << Thread.new {
total << parser.parse_courses_in_subject(subject) total[subject] = parser.parse_courses_in_subject(subject)
} }
end end
# For testing, only get first subject # For testing, only get first subject
# subject = parser.parse_subjects(semester).first # subject = parser.parse_subjects(semester).first
# total << parser.parse_courses_in_subject(subject) # total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish
ThreadsWait.all_waits(*threads) ThreadsWait.all_waits(*threads)
# delete everything in the current database
Closure.delete_all Closure.delete_all
Section.delete_all CourseSection.delete_all
Course.delete_all Course.delete_all
Semester.delete_all Semester.delete_all
# create a semester for the next semester
semester = Semester.create! season: 'Fall', year: 2018 semester = Semester.create! season: 'Fall', year: 2018
semester.save! semester.save!
total.each do |subject| total.each do |subject, sections|
subject.each_value do |section| puts "Adding courses for #{subject}..."
next unless (section.key? "date_range") && (section.key? "instructors") && (section.key? "days") sections.each do |section|
next if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
# Find or create a course and set its semester
# TODO: this breaks when you try to do more than one semester,
# since just the subject + course_number do not uniquely identify a course
# Check the semester as well
course = Course.find_or_create_by(subject: section[:subj], course = Course.find_or_create_by(subject: section[:subj],
course_number: section[:code]) course_number: section[:course_number])
course.semester = semester course.semester = semester
course.save! course.save!
section_name = "#{section[:subj]} #{section[:code]} #{section[:sect]}" section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
puts "Adding #{section_name}..." puts "Adding #{section_name}..."
start_time = if section.key? "time" CourseSection.create!(name: section_name,
section["time"].split(' - ').first
else
"N/A"
end
end_time = if section.key? "time"
section["time"].split(' - ').last
else
"N/A"
end
Section.create!(name: section_name,
crn: section[:crn], crn: section[:crn],
title: section[:name], section_type: section[:type],
location: section["where"], title: section[:title],
days: section["days"], instructor: section[:instructor],
start_date: section["date_range"].split(' - ').first, start_date: section[:start_date],
end_date: section["date_range"].split(' - ').last, end_date: section[:end_date],
start_time: start_time, days: section[:days],
end_time: end_time, start_time: section[:start_time],
instructor: section["instructors"].split(' ').map { |word| word unless word.empty? }.join(' '), end_time: section[:end_time],
location: section[:location],
course: course) course: course)
end end
end end
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester } (21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
......
#!/bin/sh
# generate a secret key for rails to use
export SECRET_KEY_BASE=$(rails secret)
# uncomment for faster docker builds during testing
#cp db/development.sqlite3 db/production.sqlite3
# load data from patriot web into database
# rails db:migrate
# rails db:seed
# start the server
rails s
...@@ -2,7 +2,16 @@ require 'test_helper' ...@@ -2,7 +2,16 @@ require 'test_helper'
class CalendarGeneratorControllerTest < ActionDispatch::IntegrationTest class CalendarGeneratorControllerTest < ActionDispatch::IntegrationTest
test "should get generate" do test "should get generate" do
assert true # TODO: Implement test crns = [course_sections(:cs112001).crn, course_sections(:cs112002).crn]
# assert_response :success post "/api/generate", params: crns.to_json, headers: { 'CONTENT_TYPE' => 'application/json' }
# DTSTAMP and UID lines uniquely identify events, so we can't test against them.
# so remove all the lines starting with them.
# the \r characters are also annoying so just remove them too
gen = @response.body.split("\n").select {|line| !line.include?("DTSTAMP") && !line.include?("UID")}.join("\n").gsub(/\r/, "")
correct_ical = File.open("test/test.ics").read.gsub(/\r/, "")
assert_equal correct_ical, gen
end end
end end
require 'test_helper'
class CourseSectionsControllerTest < ActionDispatch::IntegrationTest
test 'should get index' do
get course_sections_url course_id: courses(:cs112).id
assert_response :success
sections_returned = JSON.parse @response.body
num_sections = CourseSection.where(course_id: courses(:cs112).id).count
assert_equal num_sections, sections_returned.count
end
end
require 'test_helper' require 'test_helper'
class CoursesControllerTest < ActionDispatch::IntegrationTest class CoursesControllerTest < ActionDispatch::IntegrationTest
test 'should get index' do test '#index should return all courses' do
get url_for controller: 'courses', action: 'index' get courses_url
assert_response :success assert_response :success
courses_returned = JSON.parse @response.body
courses_count = Course.all.count
assert_equal courses_count, courses_returned.count
end end
test '#index should return filtered by subject case insensitive' do
get courses_url subject: "Cs"
assert_response :success
courses_returned = JSON.parse @response.body
courses_count = Course.where(subject: "CS").count
assert_equal courses_count, courses_returned.count
end
test '#index should return filtered by subject and course number' do
get courses_url subject: "CS", course_number: "112"
assert_response :success
courses_returned = JSON.parse @response.body
courses_count = Course.where(subject: "CS", course_number: "112").count
assert_equal courses_count, courses_returned.count
end
test '#show should return course_sections for course' do
cs_112_id = courses(:cs112).id
get course_url id: cs_112_id
assert_response :success
sections_returned = JSON.parse @response.body
cs_112_sections = CourseSection.where(course_id: cs_112_id)
assert_equal cs_112_sections.count, sections_returned.count
end
end end
require 'test_helper'
class HomeControllerTest < ActionDispatch::IntegrationTest
test 'should get index' do
get home_index_url
assert_response :success
end
end
require 'test_helper'
class SearchControllerTest < ActionDispatch::IntegrationTest
test "should get index and search by crn" do
get url_for controller: 'search', action: 'index', crn: 'MyString'
assert_response :success
end
test "should 404 without crn" do
get url_for controller: 'search', action: 'index'
assert_response :missing
end
end
require 'test_helper'
class SectionsControllerTest < ActionDispatch::IntegrationTest
test 'should get index' do
get url_for controller: 'sections', action: 'index', course_id: 1
assert_response :success
end
end
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one: may1st: # tuesday
date: 2018-05-05 date: <%= Date.new(2018, 5, 30) %>
semester: one semester: fall2018
two: may2nd: # wednesday
date: 2018-05-05 date: <%= Date.new(2018, 5, 31) %>
semester: two semester: fall2018
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html # Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html
one: cs112001:
id: 1
name: MyString name: MyString
crn: MyString crn: MyString
section_type: MyString section_type: MyString
title: MyString title: MyString
instructor: MyString instructor: MyString
start_date: 2018-04-07