Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Khalid Ali
schedules
Commits
52c31168
Commit
52c31168
authored
May 09, 2018
by
Zac Wood
Browse files
Small refactor of patriot web parser, moved to seeds
parent
683bbe4d
Changes
6
Hide whitespace changes
Inline
Side-by-side
.gitignore
0 → 100644
View file @
52c31168
.vscode
\ No newline at end of file
parser/patriot_web_networker.rb
0 → 100644
View file @
52c31168
require
'httparty'
module
PatriotWeb
class
Networker
def
fetch_page_containing_semester_data
HTTParty
.
get
(
'https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched'
)
end
def
fetch_subjects
(
semester_id
)
HTTParty
.
post
(
'https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date'
,
body:
"p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=
#{
semester_id
}
&p_by_date=Y&p_from_date=&p_to_date="
,
headers:
{
'Content-Type'
=>
'application/x-www-form-urlencoded'
,
'charset'
=>
'utf-8'
})
end
def
fetch_courses_in_subject
(
subject
)
HTTParty
.
post
(
'https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec'
,
body:
"term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=
#{
subject
}
&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x"
,
headers:
{
'Content-Type'
=>
'application/x-www-form-urlencoded'
,
'charset'
=>
'utf-8'
})
end
end
end
parser/patriot_web_parser.rb
0 → 100644
View file @
52c31168
require
'./patriot_web_networker.rb'
require
'nokogiri'
class
String
def
alpha?
!!
match
(
/^[[:alpha:]]+$/
)
end
end
module
PatriotWeb
class
Parser
def
initialize
@networker
=
PatriotWeb
::
Networker
.
new
end
def
parse_semesters
response
=
@networker
.
fetch_page_containing_semester_data
searcher
=
Nokogiri
::
HTML
(
response
)
get_semesters_from_option_values
(
searcher
)
end
def
parse_subjects
(
semester_id
)
response
=
@networker
.
fetch_subjects
(
semester_id
)
searcher
=
Nokogiri
::
HTML
(
response
)
get_alpha_option_values
(
searcher
)
end
def
parse_courses_in_subject
(
subject
)
resp
=
@networker
.
fetch_courses_in_subject
(
subject
)
searcher
=
Nokogiri
::
HTML
(
resp
)
feed_course_info
(
searcher
)
end
private
def
get_alpha_option_values
(
searcher
)
searcher
.
xpath
(
'//*[@id="subj_id"]/option'
).
map
do
|
opt
|
if
opt
.
attr
(
'value'
).
strip
.
alpha?
opt
.
attr
(
'value'
)
end
end
end
def
get_semesters_from_option_values
(
searcher
)
searcher
.
css
(
'option'
).
map
do
|
opt
|
if
opt
.
attr
(
'value'
).
start_with?
'20'
opt
.
attr
(
'value'
)
end
end
end
def
feed_course_info
(
searcher
)
table
=
searcher
.
css
(
'html body div.pagebodydiv table.datadisplaytable'
)
data
=
{}
currentobj
=
nil
table
.
css
(
'table.datadisplaytable'
).
first
.
children
.
each
do
|
row
|
next
unless
row
.
name
==
'tr'
row
.
children
.
each
do
|
item
|
currentobj
=
sort_item
(
item
,
currentobj
,
data
)
end
end
data
end
def
sort_item
(
item
,
currentobj
,
data
)
if
item
.
name
==
'th'
if
item
.
to_html
.
include?
'-'
titletxt
=
item
.
text
if
item
.
text
.
include?
' - Honors'
titletxt
=
titletxt
.
gsub
(
' - Honors'
,
' (Honors)'
)
end
titledetails
=
titletxt
.
split
(
' - '
)
if
titledetails
.
count
>
4
titledetails
=
[
"
#{
titledetails
[
0
]
}
#{
titledetails
[
1
]
}
"
,
titledetails
[
2
],
titledetails
[
3
],
titledetails
[
4
]]
end
titledata
=
titledetails
[
2
].
split
(
' '
)
begin
data
=
get_details
(
data
,
titledetails
,
titledata
)[
0
]
currentobj
=
get_details
(
data
,
titledetails
,
titledata
)[
1
]
rescue
StandardError
=>
e
puts
item
puts
e
exit
(
1
)
end
currentobj
[
:fields
]
=
[]
end
elsif
item
.
is_a?
Nokogiri
::
XML
::
Element
item
.
css
(
'th'
).
each
do
|
field
|
currentobj
[
:fields
].
push
(
field
.
text
.
downcase
.
tr
(
' '
,
'_'
))
end
iter
=
0
if
currentobj
if
currentobj
[
:fields
]
upper
=
currentobj
[
:fields
].
count
-
1
while
iter
<=
upper
assign
=
item
.
css
(
'td'
)[
iter
].
text
currentobj
[
currentobj
[
:fields
][
iter
]]
=
assign
iter
+=
1
end
end
end
end
currentobj
end
def
get_details
(
data
,
titledetails
,
titledata
)
crn
=
titledetails
[
1
].
strip
data
[
crn
]
=
{}
unless
data
[
titledetails
[
1
]]
crsinfo
=
{
'name'
:
titledetails
[
0
].
strip
}
uniquedata
=
{
'sect'
:
titledetails
[
3
].
strip
,
'crn'
:
titledetails
[
1
].
strip
}
general
=
{
'subj'
:
titledata
[
0
].
strip
,
'code'
:
titledata
[
1
].
strip
}
data
[
crn
]
=
general
.
merge
(
uniquedata
.
merge
(
crsinfo
))
data
[
crn
][
:code
]
=
titledetails
[
2
].
split
(
' '
)[
1
]
[
data
,
data
[
crn
]]
end
end
end
parser
=
PatriotWeb
::
Parser
.
new
semesters
=
parser
.
parse_semesters
.
compact
puts
semesters
.
first
subjects
=
parser
.
parse_subjects
(
semesters
.
first
)
puts
parser
.
parse_courses_in_subject
(
subjects
.
first
)
schedules/db/patriot_web_networker.rb
0 → 100644
View file @
52c31168
require
'httparty'
module
PatriotWeb
class
Networker
def
fetch_page_containing_semester_data
HTTParty
.
get
(
'https://patriotweb.gmu.edu/pls/prod/bwckschd.p_disp_dyn_sched'
)
end
def
fetch_subjects
(
semester_id
)
HTTParty
.
post
(
'https://patriotweb.gmu.edu/pls/prod/bwckgens.p_proc_term_date'
,
body:
"p_calling_proc=bwckschd.p_disp_dyn_sched&p_term=
#{
semester_id
}
&p_by_date=Y&p_from_date=&p_to_date="
,
headers:
{
'Content-Type'
=>
'application/x-www-form-urlencoded'
,
'charset'
=>
'utf-8'
})
end
def
fetch_courses_in_subject
(
subject
)
HTTParty
.
post
(
'https://patriotweb.gmu.edu/pls/prod/bwckschd.p_get_crse_unsec'
,
body:
"term_in=201870&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=
#{
subject
}
&sel_crse=&sel_title=&sel_schd=%25&sel_from_cred=&sel_to_cred=&sel_camp=%25&sel_levl=%25&sel_ptrm=%25&sel_instr=%25&begin_hh=0&begin_mi=0&begin_ap=x&end_hh=0&end_mi=0&end_ap=x"
,
headers:
{
'Content-Type'
=>
'application/x-www-form-urlencoded'
,
'charset'
=>
'utf-8'
})
end
end
end
schedules/db/patriot_web_parser.rb
0 → 100644
View file @
52c31168
require_relative
'patriot_web_networker'
require
'nokogiri'
class
String
def
alpha?
!!
match
(
/^[[:alpha:]]+$/
)
end
end
module
PatriotWeb
class
Parser
def
initialize
@networker
=
PatriotWeb
::
Networker
.
new
end
def
parse_semesters
response
=
@networker
.
fetch_page_containing_semester_data
searcher
=
Nokogiri
::
HTML
(
response
)
get_semesters_from_option_values
(
searcher
).
compact
end
def
parse_subjects
(
semester_id
)
response
=
@networker
.
fetch_subjects
(
semester_id
)
searcher
=
Nokogiri
::
HTML
(
response
)
get_alpha_option_values
(
searcher
)
end
def
parse_courses_in_subject
(
subject
)
resp
=
@networker
.
fetch_courses_in_subject
(
subject
)
searcher
=
Nokogiri
::
HTML
(
resp
)
feed_course_info
(
searcher
)
end
private
def
get_alpha_option_values
(
searcher
)
searcher
.
xpath
(
'//*[@id="subj_id"]/option'
).
map
do
|
opt
|
if
opt
.
attr
(
'value'
).
strip
.
alpha?
opt
.
attr
(
'value'
)
end
end
end
def
get_semesters_from_option_values
(
searcher
)
searcher
.
css
(
'option'
).
map
do
|
opt
|
if
opt
.
attr
(
'value'
).
start_with?
'20'
opt
.
attr
(
'value'
)
end
end
end
def
feed_course_info
(
searcher
)
table
=
searcher
.
css
(
'html body div.pagebodydiv table.datadisplaytable'
)
data
=
{}
currentobj
=
nil
table
.
css
(
'table.datadisplaytable'
).
first
.
children
.
each
do
|
row
|
next
unless
row
.
name
==
'tr'
row
.
children
.
each
do
|
item
|
currentobj
=
sort_item
(
item
,
currentobj
,
data
)
end
end
data
end
def
sort_item
(
item
,
currentobj
,
data
)
if
item
.
name
==
'th'
if
item
.
to_html
.
include?
'-'
titletxt
=
item
.
text
if
item
.
text
.
include?
' - Honors'
titletxt
=
titletxt
.
gsub
(
' - Honors'
,
' (Honors)'
)
end
titledetails
=
titletxt
.
split
(
' - '
)
if
titledetails
.
count
>
4
titledetails
=
[
"
#{
titledetails
[
0
]
}
#{
titledetails
[
1
]
}
"
,
titledetails
[
2
],
titledetails
[
3
],
titledetails
[
4
]]
end
titledata
=
titledetails
[
2
].
split
(
' '
)
begin
data
=
get_details
(
data
,
titledetails
,
titledata
)[
0
]
currentobj
=
get_details
(
data
,
titledetails
,
titledata
)[
1
]
rescue
StandardError
=>
e
puts
item
puts
e
exit
(
1
)
end
currentobj
[
:fields
]
=
[]
end
elsif
item
.
is_a?
Nokogiri
::
XML
::
Element
item
.
css
(
'th'
).
each
do
|
field
|
currentobj
[
:fields
].
push
(
field
.
text
.
downcase
.
tr
(
' '
,
'_'
))
end
iter
=
0
if
currentobj
if
currentobj
[
:fields
]
upper
=
currentobj
[
:fields
].
count
-
1
while
iter
<=
upper
assign
=
item
.
css
(
'td'
)[
iter
].
text
currentobj
[
currentobj
[
:fields
][
iter
]]
=
assign
iter
+=
1
end
end
end
end
currentobj
end
def
get_details
(
data
,
titledetails
,
titledata
)
crn
=
titledetails
[
1
].
strip
data
[
crn
]
=
{}
unless
data
[
titledetails
[
1
]]
crsinfo
=
{
'name'
:
titledetails
[
0
].
strip
}
uniquedata
=
{
'sect'
:
titledetails
[
3
].
strip
,
'crn'
:
titledetails
[
1
].
strip
}
general
=
{
'subj'
:
titledata
[
0
].
strip
,
'code'
:
titledata
[
1
].
strip
}
data
[
crn
]
=
general
.
merge
(
uniquedata
.
merge
(
crsinfo
))
data
[
crn
][
:code
]
=
titledetails
[
2
].
split
(
' '
)[
1
]
[
data
,
data
[
crn
]]
end
end
end
schedules/db/seeds.rb
View file @
52c31168
# This file should contain all the record creation needed to seed the database with its default values.
# The data can then be loaded with the rails db:seed command (or created alongside the database with db:setup).
#
# Examples:
#
# movies = Movie.create([{ name: 'Star Wars' }, { name: 'Lord of the Rings' }])
# Character.create(name: 'Luke', movie: movies.first)
require
'rubyXL'
# require_relative 'excel_loader'
require_relative
'parse_patriot_web'
# loader = if Rails.env.test?
# ExcelLoader.new 'db/data/testdata.xlsx'
# else
# ExcelLoader.new 'db/data/allsections.xlsx'
# end
# loader.load_data
load_data
require_relative
'patriot_web_parser'
require
'thwait'
require
'httparty'
require
'nokogiri'
require
'json'
threads
=
[]
total
=
[]
parser
=
PatriotWeb
::
Parser
.
new
semester
=
parser
.
parse_semesters
.
first
parser
.
parse_subjects
(
semester
).
each
do
|
subject
|
threads
<<
Thread
.
new
{
total
<<
parser
.
parse_courses_in_subject
(
subject
)
}
end
# For testing, only get first subject
# subject = parser.parse_subjects(semester).first
# total << parser.parse_courses_in_subject(subject)
ThreadsWait
.
all_waits
(
*
threads
)
Section
.
delete_all
Course
.
delete_all
Semester
.
delete_all
semester
=
Semester
.
create!
season:
'Fall'
,
year:
2018
semester
.
save!
total
.
each
do
|
subject
|
subject
.
each_value
do
|
section
|
next
unless
(
section
.
has_key?
"date_range"
)
&&
(
section
.
has_key?
"instructors"
)
course
=
Course
.
find_or_create_by
(
subject:
section
[
:subj
],
course_number:
section
[
:code
])
course
.
semester
=
semester
course
.
save!
section_name
=
"
#{
section
[
:subj
]
}
#{
section
[
:code
]
}
#{
section
[
:sect
]
}
#{
section
[
:name
]
}
"
puts
"Adding
#{
section_name
}
..."
start_time
=
if
section
.
has_key?
"time"
section
[
"time"
].
split
(
' - '
).
first
else
"N/A"
end
end_time
=
if
section
.
has_key?
"time"
section
[
"time"
].
split
(
' - '
).
last
else
"N/A"
end
Section
.
create!
(
name:
section_name
,
crn:
section
[
:crn
],
title:
section
[
:name
],
location:
section
[
"where"
],
start_date:
section
[
"date_range"
].
split
(
' - '
).
first
,
end_date:
section
[
"date_range"
].
split
(
' - '
).
last
,
start_time:
start_time
,
end_time:
end_time
,
instructor:
section
[
"instructors"
].
split
(
' '
).
map
{
|
word
|
word
unless
word
.
empty?
}.
join
(
' '
),
course:
course
)
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment