Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SRCT
schedules
Commits
ac7fa097
Commit
ac7fa097
authored
Sep 17, 2018
by
Zac Wood
Browse files
Refactored seeds.rb. Added script to parse courses from GMU catalog
parent
e3c17737
Pipeline
#2900
failed with stage
in 2 minutes and 20 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
schedules_api/db/courses_loader.rb
0 → 100644
View file @
ac7fa097
require
'nokogiri'
require
'httparty'
require
'thwait'
require_relative
'patriot_web_parser'
def
nbsp
[
160
].
pack
(
'U*'
)
end
def
get_courses
(
subj
)
response
=
HTTParty
.
get
(
"https://catalog.gmu.edu/courses/
#{
subj
}
"
)
document
=
Nokogiri
::
HTML
(
response
)
course_blocks
=
document
.
css
(
'.courseblock'
)
course_blocks
.
map
do
|
course
|
full_title
=
course
.
css
(
'.courseblocktitle'
).
first
.
text
subj
,
num
=
full_title
.
split
(
': '
).
first
.
split
(
nbsp
)
next
if
subj
.
nil?
name
,
credits_full
=
full_title
.
split
(
': '
)[
1
..-
1
].
join
(
': '
).
split
(
'. '
)
credits_num
=
credits_full
.
split
.
first
description
=
course
.
css
(
'.courseblockdesc'
).
text
{
subject:
subj
,
title:
name
,
course_number:
num
,
credits:
credits_num
,
description:
description
}
end
end
schedules_api/db/migrate/20180914210918_add_title_to_courses.rb
0 → 100644
View file @
ac7fa097
class
AddTitleToCourses
<
ActiveRecord
::
Migration
[
5.1
]
def
change
add_column
:courses
,
:title
,
:string
end
end
schedules_api/db/patriot_web_parser.rb
View file @
ac7fa097
...
...
@@ -76,6 +76,7 @@ module PatriotWeb
data_from
rows
end
# Extract data about all course sections from the rows
def
data_from
(
rows
)
i
=
0
result
=
[]
...
...
@@ -95,7 +96,7 @@ module PatriotWeb
details
=
rows
[
i
+
2
].
css
(
'td table tr td'
)
unless
!
details
.
empty?
puts
"
#{
full_name
.
join
(
' '
)
}
is fake news"
#
puts "#{full_name.join(' ')} is fake news"
i
+=
1
next
end
...
...
schedules_api/db/schema.rb
View file @
ac7fa097
...
...
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord
::
Schema
.
define
(
version:
20180914
141722
)
do
ActiveRecord
::
Schema
.
define
(
version:
20180914
210918
)
do
create_table
"closures"
,
force: :cascade
do
|
t
|
t
.
date
"date"
...
...
@@ -53,6 +53,7 @@ ActiveRecord::Schema.define(version: 20180914141722) do
t
.
string
"credits"
t
.
string
"prerequisite"
t
.
string
"restrictions"
t
.
string
"title"
t
.
index
[
"semester_id"
],
name:
"index_courses_on_semester_id"
end
...
...
schedules_api/db/seeds.rb
View file @
ac7fa097
...
...
@@ -2,99 +2,238 @@
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
require_relative
'patriot_web_parser'
require_relative
'courses_loader'
require
'thwait'
require
'httparty'
require
'nokogiri'
require
'json'
threads
=
[]
total
=
{}
parser
=
PatriotWeb
::
Parser
.
new
def
parse_courses
(
subjects
)
courses
=
[]
# get the first semester only
semester
=
parser
.
parse_semesters
.
first
threads
=
subjects
.
map
do
|
subject
|
Thread
.
new
do
courses
.
push
(
*
get_courses
(
subject
.
downcase
))
end
end
ThreadsWait
.
all_waits
(
*
threads
)
puts
"DDOSing Patriot Web, buckle up kids"
courses
end
# parse all subjects and their courses in the semester
parser
.
parse_subjects
(
semester
).
each
do
|
subject
|
puts
"Getting courses for
#{
subject
}
"
threads
<<
Thread
.
new
{
total
[
subject
]
=
parser
.
parse_courses_in_subject
(
subject
)
}
def
load_courses
(
courses
,
semester
)
courses
.
each
do
|
course
|
Course
.
create!
(
subject:
course
[
:subject
],
title:
course
[
:title
],
course_number:
course
[
:course_number
],
credits:
course
[
:credits
],
description:
course
[
:description
],
semester:
semester
)
end
end
# For testing, only get first subject
# subject = parser.parse_subjects(semester)[0]
# total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish
ThreadsWait
.
all_waits
(
*
threads
)
# delete everything in the current database
Closure
.
delete_all
CourseSection
.
delete_all
Course
.
delete_all
Semester
.
delete_all
# create a semester for the next semester
semester
=
Semester
.
create!
season:
'Fall'
,
year:
2018
semester
.
save!
# Taking a course and a list of courses, checks if the course is already in that
# list. If it isn't, create an active record and store it in the list for that
# course. If it is, grab the pre-existing database entry.
def
get_course
(
course
,
all_courses
)
all_courses
.
each
do
|
c
|
if
c
[
:course_number
]
==
course
[
:course_number
]
return
c
[
:db_object
]
def
parse_sections
(
subjects
)
parser
=
PatriotWeb
::
Parser
.
new
sections_in
=
{}
threads
=
subjects
.
map
do
|
subject
|
Thread
.
new
do
sections_in
[
subject
]
=
parser
.
parse_courses_in_subject
(
subject
)
end
end
course
[
:db_object
]
=
Course
.
create!
(
course
)
all_courses
.
push
(
course
)
course
[
:db_object
]
end
ThreadsWait
.
all_waits
(
*
threads
)
total
.
each
do
|
subject
,
sections
|
puts
"Adding courses for
#{
subject
}
..."
all_sections
=
[]
all_courses
=
[]
sections_in
end
sections
.
each
do
|
section
|
if
section
.
nil?
||
!
section
.
key?
(
:subj
)
||
!
section
.
key?
(
:course_number
)
puts
"
#{
subject
}
failed section:
#{
section
.
class
}
"
next
def
load_sections
(
sections_in
,
semester
)
semester
=
Semester
.
find_by
season:
'Fall'
,
year:
2018
sections_in
.
each
do
|
subject
,
sections
|
all_sections
=
[]
sections
.
each
do
|
section
|
if
section
.
nil?
||
!
section
.
key?
(
:subj
)
||
!
section
.
key?
(
:course_number
)
puts
"
#{
subject
}
failed section:
#{
section
.
class
}
"
next
end
course
=
Course
.
find_or_create_by!
(
subject:
section
[
:subj
],
course_number:
section
[
:course_number
],
semester:
semester
)
instructor
=
Instructor
.
find_or_create_by!
(
name:
section
[
:instructor
])
section_name
=
"
#{
section
[
:subj
]
}
#{
section
[
:course_number
]
}
#{
section
[
:section
]
}
"
all_sections
.
push
(
name:
section_name
,
crn:
section
[
:crn
],
section_type:
section
[
:type
],
title:
section
[
:title
],
start_date:
section
[
:start_date
],
end_date:
section
[
:end_date
],
days:
section
[
:days
],
start_time:
section
[
:start_time
],
end_time:
section
[
:end_time
],
location:
section
[
:location
],
course:
course
,
instructor:
instructor
)
end
course
=
get_course
({
subject:
section
[
:subj
],
course_number:
section
[
:course_number
],
semester:
semester
},
all_courses
)
instructor
=
Instructor
.
find_or_create_by!
(
name:
section
[
:instructor
])
section_name
=
"
#{
section
[
:subj
]
}
#{
section
[
:course_number
]
}
#{
section
[
:section
]
}
"
all_sections
.
push
(
name:
section_name
,
crn:
section
[
:crn
],
section_type:
section
[
:type
],
title:
section
[
:title
],
start_date:
section
[
:start_date
],
end_date:
section
[
:end_date
],
days:
section
[
:days
],
start_time:
section
[
:start_time
],
end_time:
section
[
:end_time
],
location:
section
[
:location
],
course:
course
,
instructor:
instructor
)
CourseSection
.
create!
(
all_sections
)
end
end
def
wipe_db
Closure
.
delete_all
CourseSection
.
delete_all
Course
.
delete_all
Semester
.
delete_all
end
def
load_closures
semester
=
Semester
.
find_by
season:
'Fall'
,
year:
2018
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure
.
create!
date:
Date
.
new
(
2018
,
9
,
3
),
semester:
semester
Closure
.
create!
date:
Date
.
new
(
2018
,
10
,
8
),
semester:
semester
(
21
..
25
).
each
{
|
n
|
Closure
.
create!
date:
Date
.
new
(
2018
,
11
,
n
),
semester:
semester
}
(
10
..
19
).
each
{
|
n
|
Closure
.
create!
date:
Date
.
new
(
2018
,
12
,
n
),
semester:
semester
}
end
def
main
wipe_db
parser
=
PatriotWeb
::
Parser
.
new
puts
"Parsing subjects..."
semester
=
parser
.
parse_semesters
.
first
subjects
=
parser
.
parse_subjects
(
semester
)
puts
"Parsing courses from catalog.gmu.edu..."
courses
=
parse_courses
(
subjects
)
db_semester
=
Semester
.
create!
season:
'Fall'
,
year:
2018
puts
"Loading courses..."
load_courses
(
courses
,
db_semester
)
puts
"Parsing sections from Patriot Web..."
sections_in
=
parse_sections
(
subjects
)
puts
"Loading sections..."
load_sections
(
sections_in
,
db_semester
)
CourseSection
.
create!
(
all_sections
)
load_closures
end
# create closures for the days there will be no classes
# see: https://registrar.gmu.edu/calendars/fall-2018/
Closure
.
create!
date:
Date
.
new
(
2018
,
9
,
3
),
semester:
semester
Closure
.
create!
date:
Date
.
new
(
2018
,
10
,
8
),
semester:
semester
(
21
..
25
).
each
{
|
n
|
Closure
.
create!
date:
Date
.
new
(
2018
,
11
,
n
),
semester:
semester
}
(
10
..
19
).
each
{
|
n
|
Closure
.
create!
date:
Date
.
new
(
2018
,
12
,
n
),
semester:
semester
}
main
# threads = []
# total = {}
# courses = []
# # get the first semester only
# semester = parser.parse_semesters.first
# puts "DDOSing Patriot Web, buckle up kids"
# # parse all subjects and their courses in the semester
# parser.parse_subjects(semester).each do |subject|
# puts "Getting courses for #{subject}"
# threads << Thread.new {
# courses.push(*get_courses(subject.downcase))
# }
# threads << Thread.new {
# total[subject] = parser.parse_courses_in_subject(subject)
# }
# end
# puts courses.length
# # For testing, only get first subject
# # subject = parser.parse_subjects(semester)[0]
# # total[subject] = parser.parse_courses_in_subject(subject)
# # wait for all the threads to finish
# ThreadsWait.all_waits(*threads)
# # delete everything in the current database
# Closure.delete_all
# CourseSection.delete_all
# Course.delete_all
# Semester.delete_all
# # create a semester for the next semester
# semester = Semester.create! season: 'Fall', year: 2018
# semester.save!
# puts "Adding courses..."
# courses.each do |course|
# Course.create!(subject: course[:subject],
# title: course[:title],
# course_number: course[:course_number],
# credits: course[:credits],
# description: course[:description],
# semester: semester)
# end
# # Taking a course and a list of courses, checks if the course is already in that
# # list. If it isn't, create an active record and store it in the list for that
# # course. If it is, grab the pre-existing database entry.
# def get_course(course, all_courses)
# all_courses.each do |c|
# if c[:course_number] == course[:course_number]
# return c[:db_object]
# end
# end
# course[:db_object] = Course.create!(course)
# all_courses.push(course)
# course[:db_object]
# end
# total.each do |subject, sections|
# # puts "Adding courses for #{subject}..."
# all_sections = []
# all_courses = []
# sections.each do |section|
# if section.nil? || !section.key?(:subj) || !section.key?(:course_number)
# puts "#{subject} failed section: #{section.class}"
# next
# end
# course = get_course({ subject: section[:subj],
# course_number: section[:course_number],
# semester: semester }, all_courses)
# instructor = Instructor.find_or_create_by!(name: section[:instructor])
# section_name = "#{section[:subj]} #{section[:course_number]} #{section[:section]}"
# all_sections.push(name: section_name,
# crn: section[:crn],
# section_type: section[:type],
# title: section[:title],
# start_date: section[:start_date],
# end_date: section[:end_date],
# days: section[:days],
# start_time: section[:start_time],
# end_time: section[:end_time],
# location: section[:location],
# course: course,
# instructor: instructor)
# end
# CourseSection.create!(all_sections)
# end
# # create closures for the days there will be no classes
# # see: https://registrar.gmu.edu/calendars/fall-2018/
# Closure.create! date: Date.new(2018, 9, 3), semester: semester
# Closure.create! date: Date.new(2018, 10, 8), semester: semester
# (21..25).each { |n| Closure.create! date: Date.new(2018, 11, n), semester: semester }
# (10..19).each { |n| Closure.create! date: Date.new(2018, 12, n), semester: semester }
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment