Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
laundry
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
SRCT
laundry
Commits
a036fe8a
Commit
a036fe8a
authored
Apr 02, 2017
by
Michael Bailey
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
All that's left is to dump entirecampus somewhere sensical db-wise
parent
c7d02445
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
58 additions
and
8 deletions
+58
-8
scrape_gmu.py
scrape_gmu.py
+58
-8
No files found.
scrape_gmu.py
View file @
a036fe8a
...
...
@@ -3,14 +3,64 @@ from bs4 import BeautifulSoup
import
requests
import
re
import
json
# Collect the HTML of the laundry page so we can parse it
r
=
requests
.
get
(
'https://housing.gmu.edu/laundry'
)
def
getLocations
():
try
:
# Collect the HTML of the laundry page so we can parse it
r
=
requests
.
get
(
'https://housing.gmu.edu/laundry'
)
# Don't just assume the site doesn't time out or shit itself
if
r
.
status_code
==
200
:
# Isolate Speedqueen URLS
for
location
in
re
.
findall
(
'(?<=quantum\.speedqueen\.com\/wa\/)(.*)(?=
\"
)'
,
r
.
text
):
# Print Location name
print
location
# Don't just assume the site doesn't time out or shit itself
if
r
.
status_code
==
200
:
# Isolate Speedqueen URLS
locations
=
re
.
findall
(
'(?<=quantum\.speedqueen\.com\/wa\/)(.*)(?=
\"
)'
,
r
.
text
)
return
locations
else
:
return
[]
except
requests
.
exceptions
.
RequestException
:
return
[]
def
scrapeLaundry
(
location
):
machines
=
[]
try
:
r
=
requests
.
get
(
"http://quantum.speedqueen.com/wa/{0}"
.
format
(
location
))
if
r
.
status_code
==
200
:
outerparse
=
BeautifulSoup
(
r
.
text
,
'html.parser'
)
innerframe
=
outerparse
.
iframe
[
'src'
]
innerreq
=
requests
.
get
(
innerframe
)
if
innerreq
.
status_code
==
200
:
innerparse
=
BeautifulSoup
(
innerreq
.
text
,
'html.parser'
)
for
list
in
innerparse
.
find_all
(
'tr'
):
try
:
classtxt
=
list
[
'class'
]
classed
=
True
except
:
classed
=
False
if
classed
==
True
:
bullets
=
list
.
find_all
(
'td'
)
for
bullet
in
bullets
:
if
bullet
[
'class'
]
==
[
'name'
]:
name
=
bullet
.
text
if
bullet
[
'class'
]
==
[
'type'
]:
type
=
bullet
.
text
if
bullet
[
'class'
]
==
[
'status'
]:
status
=
bullet
.
text
if
bullet
[
'class'
]
==
[
'time'
]:
time
=
bullet
.
text
obj
=
[
name
,
type
,
status
,
time
]
if
obj
[
2
]
!=
"In use"
:
obj
[
3
]
=
""
machines
.
append
(
obj
)
except
requests
.
exceptions
.
RequestException
:
return
[]
return
machines
entirecampus
=
[]
for
loc
in
getLocations
():
location
=
[]
obj
=
[
loc
]
+
scrapeLaundry
(
loc
)
entirecampus
.
append
(
obj
)
print
(
entirecampus
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment