Commit ac7fa097 authored by Zac Wood's avatar Zac Wood
Browse files

Refactored seeds.rb. Added script to parse courses from GMU catalog

parent e3c17737
Pipeline #2900 failed with stage
in 2 minutes and 20 seconds
require 'nokogiri'
require 'httparty'
require 'thwait'
require_relative 'patriot_web_parser'
def nbsp
[160].pack('U*')
end
def get_courses(subj)
response = HTTParty.get("https://catalog.gmu.edu/courses/#{subj}")
document = Nokogiri::HTML(response)
course_blocks = document.css('.courseblock')
course_blocks.map do |course|
full_title = course.css('.courseblocktitle').first.text
subj, num = full_title.split(': ').first.split(nbsp)
next if subj.nil?
name, credits_full = full_title.split(': ')[1..-1].join(': ').split('. ')
credits_num = credits_full.split.first
description = course.css('.courseblockdesc').text
{
subject: subj,
title: name,
course_number: num,
credits: credits_num,
description: description
}
end
end
class AddTitleToCourses < ActiveRecord::Migration[5.1]
def change
add_column :courses, :title, :string
end
end
......@@ -76,6 +76,7 @@ module PatriotWeb
data_from rows
end
# Extract data about all course sections from the rows
def data_from(rows)
i = 0
result = []
......@@ -95,7 +96,7 @@ module PatriotWeb
details = rows[i + 2].css('td table tr td')
unless !details.empty?
puts "#{full_name.join(' ')} is fake news"
# puts "#{full_name.join(' ')} is fake news"
i += 1
next
end
......
......@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180914141722) do
ActiveRecord::Schema.define(version: 20180914210918) do
create_table "closures", force: :cascade do |t|
t.date "date"
......@@ -53,6 +53,7 @@ ActiveRecord::Schema.define(version: 20180914141722) do
t.string "credits"
t.string "prerequisite"
t.string "restrictions"
t.string "title"
t.index ["semester_id"], name: "index_courses_on_semester_id"
end
......
......@@ -2,99 +2,238 @@
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
require_relative 'patriot_web_parser'
require_relative 'courses_loader'
require 'thwait'
require 'httparty'
require 'nokogiri'
require 'json'
threads = []
total = {}
parser = PatriotWeb::Parser.new
def parse_courses(subjects)
courses = []
# get the first semester only
semester = parser.parse_semesters.first
threads = subjects.map do |subject|
Thread.new do
courses.push(*get_courses(subject.downcase))
end
end
ThreadsWait.all_waits(*threads)
puts "DDOSing Patriot Web, buckle up kids"
courses
end
# parse all subjects and their courses in the semester
parser.parse_subjects(semester).each do |subject|
puts "Getting courses for #{subject}"
threads << Thread.new {
total[subject] = parser.parse_courses_in_subject(subject)
}
def load_courses(courses, semester)
courses.each do |course|
Course.create!(subject: course[:subject],
title: course[:title],
course_number: course[:course_number],
credits: course[:credits],
description: course[:description],
semester: semester)
end
end
# For testing, only get first subject
# subject = parser.parse_subjects(semester)[0]
# total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish
ThreadsWait.all_waits(*threads)
# delete everything in the current database
Closure.delete_all
CourseSection.delete_all
Course.delete_all
Semester.delete_all
# create a semester for the next semester
semester = Semester.create! season: 'Fall', year: 2018
semester.save!
# Taking a course and a list of courses, checks if the course is already in that
# list. If it isn't, create an active record and store it in the list for that
# course. If it is, grab the pre-existing database entry.
def get_course(course, all_courses)
all_courses.each do |c|
if c[:course_number] == course[:course_number]
return c[:db_object]
def parse_sections(subjects)
parser = PatriotWeb::Parser.new
sections_in = {}
threads = subjects.map do |subject|
Thread.new do
sections_in[subject] = parser.parse_courses_in_subject(subject)
end
end
course[:db_object] = Course.create!(course)
all_courses.push(course)
course[:db_object]
end
ThreadsWait.all_waits(*threads)
total.each do |subject, sections|
puts "Adding courses for #{subject}..."
all_sections = []
all_courses = []
sections_in
end
sections.each do |section|
if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
def load_sections(sections_in, semester)
semester = Semester.find_by season: 'Fall', year: 2018
sections_in.each do |subject, sections|
all_sections = []
sections.each do |section|
if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
puts "#{subject} failed section: #{section.class}"
next
end
course = Course.find_or_create_by!(subject: section[:subj],
course_number: section[:course_number],
semester: semester)
instructor = Instructor.find_or_create_by!(name: section[:instructor])
section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
all_sections.push(name: section_name,
crn: section[:crn],
section_type: section[:type],
title: section[:title],
start_date: section[:start_date],
end_date: section[:end_date],
days: section[:days],
start_time: section[:start_time],
end_time: section[:end_time],
location: section[:location],
course: course,
instructor: instructor)
end
course = get_course({ subject: section[:subj],
course_number: section[:course_number],
semester: semester }, all_courses)
instructor = Instructor.find_or_create_by!(name: section[:instructor])
section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
all_sections.push(name: section_name,
crn: section[:crn],
section_type: section[:type],
title: section[:title],
start_date: section[:start_date],
end_date: section[:end_date],
days: section[:days],
start_time: section[:start_time],
end_time: section[:end_time],
location: section[:location],
course: course,
instructor: instructor)
CourseSection.create!(all_sections)
end
end
def wipe_db
Closure.delete_all
CourseSection.delete_all
Course.delete_all
Semester.delete_all
end
def load_closures
semester = Semester.find_by season: 'Fall', year: 2018
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
(10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
end
def main
wipe_db
parser = PatriotWeb::Parser.new
puts "Parsing subjects..."
semester = parser.parse_semesters.first
subjects = parser.parse_subjects(semester)
puts "Parsing courses from catalog.gmu.edu..."
courses = parse_courses(subjects)
db_semester = Semester.create! season: 'Fall', year: 2018
puts "Loading courses..."
load_courses(courses, db_semester)
puts "Parsing sections from Patriot Web..."
sections_in = parse_sections(subjects)
puts "Loading sections..."
load_sections(sections_in, db_semester)
CourseSection.create!(all_sections)
load_closures
end
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure.create! date: Date.new(2018, 9, 3), semester: semester
Closure.create! date: Date.new(2018, 10, 8), semester: semester
(21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
(10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
main
# threads = []
# total = {}
# courses = []
# # get the first semester only
# semester = parser.parse_semesters.first
# puts "DDOSing Patriot Web, buckle up kids"
# # parse all subjects and their courses in the semester
# parser.parse_subjects(semester).each do |subject|
# puts "Getting courses for #{subject}"
# threads << Thread.new {
# courses.push(*get_courses(subject.downcase))
# }
# threads << Thread.new {
# total[subject] = parser.parse_courses_in_subject(subject)
# }
# end
# puts courses.length
# # For testing, only get first subject
# # subject = parser.parse_subjects(semester)[0]
# # total[subject] = parser.parse_courses_in_subject(subject)
# # wait for all the threads to finish
# ThreadsWait.all_waits(*threads)
# # delete everything in the current database
# Closure.delete_all
# CourseSection.delete_all
# Course.delete_all
# Semester.delete_all
# # create a semester for the next semester
# semester = Semester.create! season: 'Fall', year: 2018
# semester.save!
# puts "Adding courses..."
# courses.each do |course|
# Course.create!(subject: course[:subject],
# title: course[:title],
# course_number: course[:course_number],
# credits: course[:credits],
# description: course[:description],
# semester: semester)
# end
# # Taking a course and a list of courses, checks if the course is already in that
# # list. If it isn't, create an active record and store it in the list for that
# # course. If it is, grab the pre-existing database entry.
# def get_course(course, all_courses)
# all_courses.each do |c|
# if c[:course_number] == course[:course_number]
# return c[:db_object]
# end
# end
# course[:db_object] = Course.create!(course)
# all_courses.push(course)
# course[:db_object]
# end
# total.each do |subject, sections|
# # puts "Adding courses for #{subject}..."
# all_sections = []
# all_courses = []
# sections.each do |section|
# if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
# puts "#{subject} failed section: #{section.class}"
# next
# end
# course = get_course({ subject: section[:subj],
# course_number: section[:course_number],
# semester: semester }, all_courses)
# instructor = Instructor.find_or_create_by!(name: section[:instructor])
# section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
# all_sections.push(name: section_name,
# crn: section[:crn],
# section_type: section[:type],
# title: section[:title],
# start_date: section[:start_date],
# end_date: section[:end_date],
# days: section[:days],
# start_time: section[:start_time],
# end_time: section[:end_time],
# location: section[:location],
# course: course,
# instructor: instructor)
# end
# CourseSection.create!(all_sections)
# end
# # create closures for the days there will be no classes
# # see: https://registrar.gmu.edu/calendars/fall-2018/
# Closure.create! date: Date.new(2018, 9, 3), semester: semester
# Closure.create! date: Date.new(2018, 10, 8), semester: semester
# (21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
# (10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment