Commit 894575fb authored by Zac Wood's avatar Zac Wood
Browse files

Merge branch '25-load-catalog' into 'dev-v2'

Resolve "Load course data from the GMU Catalog"

See merge request !24
parents 8970cc67 318dee77
Pipeline #2902 passed with stage
in 2 minutes and 15 seconds
require 'nokogiri'
require 'httparty'
require 'thwait'
require_relative 'patriot_web_parser'
def nbsp
[160].pack('U*')
end
def get_courses(subj)
response = HTTParty.get("https://catalog.gmu.edu/courses/#{subj}")
document = Nokogiri::HTML(response)
course_blocks = document.css('.courseblock')
course_blocks.map do |course|
full_title = course.css('.courseblocktitle').first.text
subj, num = full_title.split(': ').first.split(nbsp)
next if subj.nil?
name, credits_full = full_title.split(': ')[1..-1].join(': ').split('. ')
credits_num = credits_full.split.first
description = course.css('.courseblockdesc').text
{
subject: subj,
title: name,
course_number: num,
credits: credits_num,
description: description
}
end
end
class AddFieldsToCourse < ActiveRecord::Migration[5.1]
def change
add_column :courses, :description, :string
add_column :courses, :credits, :string
add_column :courses, :prerequisite, :string
add_column :courses, :restrictions, :string
end
end
class AddTitleToCourses < ActiveRecord::Migration[5.1]
def change
add_column :courses, :title, :string
end
end
...@@ -76,6 +76,7 @@ module PatriotWeb ...@@ -76,6 +76,7 @@ module PatriotWeb
data_from rows data_from rows
end end
# Extract data about all course sections from the rows
def data_from(rows) def data_from(rows)
i = 0 i = 0
result = [] result = []
...@@ -95,7 +96,7 @@ module PatriotWeb ...@@ -95,7 +96,7 @@ module PatriotWeb
details = rows[i + 2].css('td table tr td') details = rows[i + 2].css('td table tr td')
unless !details.empty? unless !details.empty?
puts "#{full_name.join(' ')} is fake news" # puts "#{full_name.join(' ')} is fake news"
i += 1 i += 1
next next
end end
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180910213148) do ActiveRecord::Schema.define(version: 20180914210918) do
create_table "closures", force: :cascade do |t| create_table "closures", force: :cascade do |t|
t.date "date" t.date "date"
...@@ -49,6 +49,11 @@ ActiveRecord::Schema.define(version: 20180910213148) do ...@@ -49,6 +49,11 @@ ActiveRecord::Schema.define(version: 20180910213148) do
t.integer "semester_id" t.integer "semester_id"
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.string "description"
t.string "credits"
t.string "prerequisite"
t.string "restrictions"
t.string "title"
t.index ["semester_id"], name: "index_courses_on_semester_id" t.index ["semester_id"], name: "index_courses_on_semester_id"
end end
......
...@@ -2,99 +2,128 @@ ...@@ -2,99 +2,128 @@
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup). # The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
require_relative 'patriot_web_parser' require_relative 'patriot_web_parser'
require_relative 'courses_loader'
require 'thwait' require 'thwait'
require 'httparty' require 'httparty'
require 'nokogiri' require 'nokogiri'
require 'json' require 'json'
threads = [] def parse_courses(subjects)
total = {} courses = []
parser = PatriotWeb::Parser.new
# get the first semester only threads = subjects.map do |subject|
semester = parser.parse_semesters.first Thread.new do
courses.push(*get_courses(subject.downcase))
end
end
ThreadsWait.all_waits(*threads)
puts "DDOSing Patriot Web, buckle up kids" courses
end
# parse all subjects and their courses in the semester def load_courses(courses, semester)
parser.parse_subjects(semester).each do |subject| courses.each do |course|
puts "Getting courses for #{subject}" Course.create!(subject: course[:subject],
threads << Thread.new { title: course[:title],
total[subject] = parser.parse_courses_in_subject(subject) course_number: course[:course_number],
} credits: course[:credits],
description: course[:description],
semester: semester)
end
end end
# For testing, only get first subject def parse_sections(subjects)
# subject = parser.parse_subjects(semester)[0] parser = PatriotWeb::Parser.new
# total[subject] = parser.parse_courses_in_subject(subject) sections_in = {}
# wait for all the threads to finish threads = subjects.map do |subject|
ThreadsWait.all_waits(*threads) Thread.new do
sections_in[subject] = parser.parse_courses_in_subject(subject)
# delete everything in the current database
Closure.delete_all
CourseSection.delete_all
Course.delete_all
Semester.delete_all
# create a semester for the next semester
semester = Semester.create! season: 'Fall', year: 2018
semester.save!
# Taking a course and a list of courses, checks if the course is already in that
# list. If it isn't, create an active record and store it in the list for that
# course. If it is, grab the pre-existing database entry.
def get_course(course, all_courses)
all_courses.each do |c|
if c[:course_number] == course[:course_number]
return c[:db_object]
end end
end end
course[:db_object] = Course.create!(course) ThreadsWait.all_waits(*threads)
all_courses.push(course)
course[:db_object]
end
total.each do |subject, sections| sections_in
puts "Adding courses for #{subject}..." end
all_sections = []
all_courses = []
sections.each do |section| def load_sections(sections_in, semester)
if section.nil? || !section.key?(:subj) || !section.key?(:course_number) sections_in.each do |subject, sections|
puts "#{subject} failed section: #{section.class}" all_sections = []
next
sections.each do |section|
if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
end
course = Course.find_or_create_by!(subject: section[:subj],
course_number: section[:course_number],
semester: semester)
instructor = Instructor.find_or_create_by!(name: section[:instructor])
section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
all_sections.push(name: section_name,
crn: section[:crn],
section_type: section[:type],
title: section[:title],
start_date: section[:start_date],
end_date: section[:end_date],
days: section[:days],
start_time: section[:start_time],
end_time: section[:end_time],
location: section[:location],
course: course,
instructor: instructor)
end end
course = get_course({ subject: section[:subj], CourseSection.create!(all_sections)
course_number: section[:course_number],
semester: semester }, all_courses)
instructor = Instructor.find_or_create_by!(name: section[:instructor])
section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
all_sections.push(name: section_name,
crn: section[:crn],
section_type: section[:type],
title: section[:title],
start_date: section[:start_date],
end_date: section[:end_date],
days: section[:days],
start_time: section[:start_time],
end_time: section[:end_time],
location: section[:location],
course: course,
instructor: instructor)
end end
end
def wipe_db
Closure.delete_all
CourseSection.delete_all
Course.delete_all
Semester.delete_all
end
def load_closures(semester)
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
(10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
end
def main
wipe_db
parser = PatriotWeb::Parser.new
puts "Parsing subjects..."
semester = parser.parse_semesters.first
subjects = parser.parse_subjects(semester)
puts "Parsing courses from catalog.gmu.edu..."
courses = parse_courses(subjects)
db_semester = Semester.create! season: 'Fall', year: 2018
puts "Loading courses..."
load_courses(courses, db_semester)
puts "Parsing sections from Patriot Web..."
sections_in = parse_sections(subjects)
puts "Loading sections..."
load_sections(sections_in, db_semester)
CourseSection.create!(all_sections) load_closures(db_semester)
end end
# create closures for the days there will be no classes main
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
(10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment