Commit 894575fb authored by Zac Wood's avatar Zac Wood
Browse files

Merge branch '25-load-catalog' into 'dev-v2'

Resolve "Load course data from the GMU Catalog"

See merge request srct/schedules!24
parents 8970cc67 318dee77
Pipeline #2902 passed with stage
in 2 minutes and 15 seconds
require 'nokogiri'
require 'httparty'
require 'thwait'
require_relative 'patriot_web_parser'
def nbsp
[160].pack('U*')
end
def get_courses(subj)
response = HTTParty.get("https://catalog.gmu.edu/courses/#{subj}")
document = Nokogiri::HTML(response)
course_blocks = document.css('.courseblock')
course_blocks.map do |course|
full_title = course.css('.courseblocktitle').first.text
subj, num = full_title.split(': ').first.split(nbsp)
next if subj.nil?
name, credits_full = full_title.split(': ')[1..-1].join(': ').split('. ')
credits_num = credits_full.split.first
description = course.css('.courseblockdesc').text
{
subject: subj,
title: name,
course_number: num,
credits: credits_num,
description: description
}
end
end
class AddFieldsToCourse < ActiveRecord::Migration[5.1]
def change
add_column :courses, :description, :string
add_column :courses, :credits, :string
add_column :courses, :prerequisite, :string
add_column :courses, :restrictions, :string
end
end
class AddTitleToCourses < ActiveRecord::Migration[5.1]
def change
add_column :courses, :title, :string
end
end
......@@ -76,6 +76,7 @@ module PatriotWeb
data_from rows
end
# Extract data about all course sections from the rows
def data_from(rows)
i = 0
result = []
......@@ -95,7 +96,7 @@ module PatriotWeb
details = rows[i + 2].css('td table tr td')
unless !details.empty?
puts "#{full_name.join(' ')} is fake news"
# puts "#{full_name.join(' ')} is fake news"
i += 1
next
end
......
......@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180910213148) do
ActiveRecord::Schema.define(version: 20180914210918) do
create_table "closures", force: :cascade do |t|
t.date "date"
......@@ -49,6 +49,11 @@ ActiveRecord::Schema.define(version: 20180910213148) do
t.integer "semester_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.string "description"
t.string "credits"
t.string "prerequisite"
t.string "restrictions"
t.string "title"
t.index ["semester_id"], name: "index_courses_on_semester_id"
end
......
......@@ -2,99 +2,128 @@
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
require_relative 'patriot_web_parser'
require_relative 'courses_loader'
require 'thwait'
require 'httparty'
require 'nokogiri'
require 'json'
threads = []
total = {}
parser = PatriotWeb::Parser.new
def parse_courses(subjects)
courses = []
# get the first semester only
semester = parser.parse_semesters.first
threads = subjects.map do |subject|
Thread.new do
courses.push(*get_courses(subject.downcase))
end
end
ThreadsWait.all_waits(*threads)
puts "DDOSing Patriot Web, buckle up kids"
courses
end
# parse all subjects and their courses in the semester
parser.parse_subjects(semester).each do |subject|
puts "Getting courses for #{subject}"
threads << Thread.new {
total[subject] = parser.parse_courses_in_subject(subject)
}
def load_courses(courses, semester)
courses.each do |course|
Course.create!(subject: course[:subject],
title: course[:title],
course_number: course[:course_number],
credits: course[:credits],
description: course[:description],
semester: semester)
end
end
# For testing, only get first subject
# subject = parser.parse_subjects(semester)[0]
# total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish
ThreadsWait.all_waits(*threads)
# delete everything in the current database
Closure.delete_all
CourseSection.delete_all
Course.delete_all
Semester.delete_all
# create a semester for the next semester
semester = Semester.create! season: 'Fall', year: 2018
semester.save!
# Taking a course and a list of courses, checks if the course is already in that
# list. If it isn't, create an active record and store it in the list for that
# course. If it is, grab the pre-existing database entry.
def get_course(course, all_courses)
all_courses.each do |c|
if c[:course_number] == course[:course_number]
return c[:db_object]
def parse_sections(subjects)
parser = PatriotWeb::Parser.new
sections_in = {}
threads = subjects.map do |subject|
Thread.new do
sections_in[subject] = parser.parse_courses_in_subject(subject)
end
end
course[:db_object] = Course.create!(course)
all_courses.push(course)
course[:db_object]
end
ThreadsWait.all_waits(*threads)
total.each do |subject, sections|
puts "Adding courses for #{subject}..."
all_sections = []
all_courses = []
sections_in
end
sections.each do |section|
if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
def load_sections(sections_in, semester)
sections_in.each do |subject, sections|
all_sections = []
sections.each do |section|
if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
end
course = Course.find_or_create_by!(subject: section[:subj],
course_number: section[:course_number],
semester: semester)
instructor = Instructor.find_or_create_by!(name: section[:instructor])
section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
all_sections.push(name: section_name,
crn: section[:crn],
section_type: section[:type],
title: section[:title],
start_date: section[:start_date],
end_date: section[:end_date],
days: section[:days],
start_time: section[:start_time],
end_time: section[:end_time],
location: section[:location],
course: course,
instructor: instructor)
end
course = get_course({ subject: section[:subj],
course_number: section[:course_number],
semester: semester }, all_courses)
instructor = Instructor.find_or_create_by!(name: section[:instructor])
section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
all_sections.push(name: section_name,
crn: section[:crn],
section_type: section[:type],
title: section[:title],
start_date: section[:start_date],
end_date: section[:end_date],
days: section[:days],
start_time: section[:start_time],
end_time: section[:end_time],
location: section[:location],
course: course,
instructor: instructor)
CourseSection.create!(all_sections)
end
end
def wipe_db
Closure.delete_all
CourseSection.delete_all
Course.delete_all
Semester.delete_all
end
def load_closures(semester)
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
(10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
end
def main
wipe_db
parser = PatriotWeb::Parser.new
puts "Parsing subjects..."
semester = parser.parse_semesters.first
subjects = parser.parse_subjects(semester)
puts "Parsing courses from catalog.gmu.edu..."
courses = parse_courses(subjects)
db_semester = Semester.create! season: 'Fall', year: 2018
puts "Loading courses..."
load_courses(courses, db_semester)
puts "Parsing sections from Patriot Web..."
sections_in = parse_sections(subjects)
puts "Loading sections..."
load_sections(sections_in, db_semester)
CourseSection.create!(all_sections)
load_closures(db_semester)
end
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
(10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
main
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment