Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Khalid Ali
schedules
Commits
b4635282
Commit
b4635282
authored
Aug 29, 2018
by
Zac Wood
Browse files
okay it actually works this time I promise
parent
3cb72415
Changes
2
Show whitespace changes
Inline
Side-by-side
schedules_api/db/patriot_web_parser.rb
View file @
b4635282
...
@@ -36,7 +36,7 @@ module PatriotWeb
...
@@ -36,7 +36,7 @@ module PatriotWeb
def
parse_courses_in_subject
(
subject
)
def
parse_courses_in_subject
(
subject
)
response
=
@networker
.
fetch_courses_in_subject
(
subject
)
response
=
@networker
.
fetch_courses_in_subject
(
subject
)
document
=
Nokogiri
::
HTML
(
response
)
document
=
Nokogiri
::
HTML
(
response
)
get_courses
(
document
)
get_courses
(
document
,
subject
)
end
end
private
private
...
@@ -66,34 +66,40 @@ module PatriotWeb
...
@@ -66,34 +66,40 @@ module PatriotWeb
# Parse all courses from the subject search page
# Parse all courses from the subject search page
# @param document [Nokogiri::HTML::Document]
# @param document [Nokogiri::HTML::Document]
# @return [Array] courses
# @return [Array] courses
def
get_courses
(
document
)
def
get_courses
(
document
,
subject
)
table
=
document
.
css
(
'html body div.pagebodydiv table.datadisplaytable'
)
table
=
document
.
css
(
'html body div.pagebodydiv table.datadisplaytable'
)
rows
=
table
.
css
(
'tr'
)
rows
=
table
.
css
(
'tr'
)
# rows[100..110].each_with_index do |row, i|
# puts i
# puts row
# end
data_from
rows
end
def
data_from
(
rows
)
i
=
0
title_index
=
0
result
=
[]
(
0
..
(
rows
.
length
/
6
-
1
)).
map
do
|
i
|
while
i
<
rows
.
length
start
=
i
*
5
if
is_title
(
rows
[
i
].
text
)
# check if the row is a title
data
=
{}
data
=
{}
title
=
rows
[
start
].
text
# the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
title_elements
=
rows
[
i
].
text
.
split
(
' - '
)
# so split it by ' - ' and extract
title_elements
=
title
.
split
(
' - '
)
next
unless
title_elements
.
length
==
4
data
[
:title
]
=
title_elements
[
0
].
strip
data
[
:title
]
=
title_elements
[
0
].
strip
data
[
:crn
]
=
title_elements
[
1
]
data
[
:crn
]
=
title_elements
[
1
]
full_name
=
title_elements
[
2
].
split
(
' '
)
full_name
=
title_elements
[
2
].
split
(
' '
)
next
unless
full_name
.
length
==
2
next
unless
full_name
.
length
==
2
data
[
:subj
]
=
title_elements
[
2
].
split
(
' '
)[
0
]
data
[
:subj
]
=
full_name
[
0
]
data
[
:course_number
]
=
title_elements
[
2
].
split
(
' '
)[
1
]
data
[
:course_number
]
=
full_name
[
1
]
data
[
:section
]
=
title_elements
[
3
].
strip
data
[
:section
]
=
title_elements
[
3
].
strip
# rows 1 to 3 contain info about registration and drop dates.
details
=
rows
[
i
+
2
].
css
(
'td table tr td'
)
# for now we're gonna ignore them and skip to row 4, which contains details
unless
details
.
length
>
0
details
=
rows
[
start
+
2
].
css
(
'td table tr td'
)
puts
"
#{
full_name
.
join
(
' '
)
}
is fake news"
i
+=
1
next
unless
details
.
length
>
0
# if there are no details, skip this item
next
# details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings
end
times
=
details
[
1
].
text
.
split
(
' - '
)
times
=
details
[
1
].
text
.
split
(
' - '
)
if
(
times
.
length
==
1
)
if
(
times
.
length
==
1
)
...
@@ -113,73 +119,21 @@ module PatriotWeb
...
@@ -113,73 +119,21 @@ module PatriotWeb
data
[
:type
]
=
details
[
5
].
text
data
[
:type
]
=
details
[
5
].
text
data
[
:instructor
]
=
details
[
6
].
text
data
[
:instructor
]
=
details
[
6
].
text
data
end
# puts rows[0].text
# puts rows[2].css('td table tr td')
# puts rows[5].text
# puts rows[7].css('td table tr td')
# puts rows[10].text
# puts rows[12].css('td table tr td')
# (0..(rows.length/3-1)).each do |i|
# start = i*3
# puts rows[start].search('th').first.text
result
<<
data
# section_data = rows[start+2].css('td table.datadisplaytable').search('td')
i
+=
5
# skip to what we think is the next title
# puts section_data[1].text
else
# end
i
+=
1
# try the next row if this one was not a title
# puts rows[3]
end
# puts rows[3].search('th').first.text
end
# section_data = rows[5].css('td table.datadisplaytable').search('td')
# puts section_data[1].text
# end
# each section is represented by 6 rows in the table
# (0..(rows.length/6 - 1)).map do |i|
# start = i*6
# data = {}
# title = rows[start].text
# # the title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
# # so split it by ' - ' and extract
# title_elements = title.split(' - ')
# next unless title_elements.length == 4
# data[:title] = title_elements[0].strip
# data[:crn] = title_elements[1]
# full_name = title_elements[2].split(' ')
# next unless full_name.length == 2
# data[:subj] = title_elements[2].split(' ')[0]
# data[:course_number] = title_elements[2].split(' ')[1]
# data[:section] = title_elements[3].strip
# # rows 1 to 3 contain info about registration and drop dates.
# # for now we're gonna ignore them and skip to row 4, which contains details
# detail_rows = rows[start+4].css('tr')
# next unless detail_rows.length > 0 # if there are no details, skip this item
# details = detail_rows.last.text.split("\n").compact.reject(&:empty?) # skip empty strings
# times = details[1].split(' - ')
# if (times.length == 1)
# data[:start_time] = 'TBA'
# data[:end_time] = 'TBA'
# else
# data[:start_time] = times[0]
# data[:end_time] = times[1]
# end
# data[:days] = details[2].strip
# data[:location] = details[3].strip
# dates = details[4].split(' - ')
result
# data[:start_date] = dates[0]
end
# data[:end_date] = dates[1]
# data[:type] = details[5]
# a title looks this: Survey of Accounting - 71117 - ACCT 203 - 001
# data[:instructor] = details[6]
def
is_title
(
text
)
# data
elements
=
text
.
split
(
' - '
)
# end
elements
.
length
==
4
&&
elements
[
2
].
split
(
' '
).
length
==
2
end
end
end
end
end
end
schedules_api/db/seeds.rb
View file @
b4635282
...
@@ -19,17 +19,17 @@ puts "DDOSing Patriot Web, buckle up kids"
...
@@ -19,17 +19,17 @@ puts "DDOSing Patriot Web, buckle up kids"
# parse all subjects and their courses in the semester
# parse all subjects and their courses in the semester
parser
.
parse_subjects
(
semester
).
each
do
|
subject
|
parser
.
parse_subjects
(
semester
).
each
do
|
subject
|
puts
"Getting courses for
#{
subject
}
"
puts
"Getting courses for
#{
subject
}
"
threads
<<
Thread
.
new
{
#
threads << Thread.new {
total
[
subject
]
=
parser
.
parse_courses_in_subject
(
subject
)
total
[
subject
]
=
parser
.
parse_courses_in_subject
(
subject
)
}
#
}
end
end
# For testing, only get first subject
# For testing, only get first subject
# subject = parser.parse_subjects(semester)
.first
# subject = parser.parse_subjects(semester)
[20]
# total[subject] = parser.parse_courses_in_subject(subject)
# total[subject] = parser.parse_courses_in_subject(subject)
# wait for all the threads to finish
# wait for all the threads to finish
ThreadsWait
.
all_waits
(
*
threads
)
#
ThreadsWait.all_waits(*threads)
# delete everything in the current database
# delete everything in the current database
Closure
.
delete_all
Closure
.
delete_all
...
@@ -44,7 +44,10 @@ semester.save!
...
@@ -44,7 +44,10 @@ semester.save!
total
.
each
do
|
subject
,
sections
|
total
.
each
do
|
subject
,
sections
|
puts
"Adding courses for
#{
subject
}
..."
puts
"Adding courses for
#{
subject
}
..."
sections
.
each
do
|
section
|
sections
.
each
do
|
section
|
next
if
section
.
nil?
||
!
section
.
key?
(
:subj
)
||
!
section
.
key?
(
:course_number
)
if
section
.
nil?
||
!
section
.
key?
(
:subj
)
||
!
section
.
key?
(
:course_number
)
puts
"
#{
subject
}
failed section:
#{
section
.
class
}
"
next
end
# Find or create a course and set its semester
# Find or create a course and set its semester
# TODO: this breaks when you try to do more than one semester,
# TODO: this breaks when you try to do more than one semester,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment