Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SRCT
mason-today-web
Commits
7241df82
Commit
7241df82
authored
Apr 27, 2018
by
Landon DeCoito
Browse files
cleaned up getconnectedscript and made error handling even better!
parent
0c0bc39c
Changes
1
Show whitespace changes
Inline
Side-by-side
mason-today/getconnectedscript.py
View file @
7241df82
import
requests
from
bs4
import
BeautifulSoup
# app imports
from
parscript
import
cleanup
,
convertTime
# third party imports
import
feedparser
from
bs4
import
BeautifulSoup
import
requests
# DEV REMINDER: CHANGE THE LINES IN INTIALISATION ERROR MESSAGE (LINE 138)
# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST
def
splitAndConvertTime
(
strin
):
strin
=
strin
.
replace
(
" "
,
""
)
strin
=
strin
.
split
(
"-"
)
returnlist
=
[
""
,
""
]
returnlist
=
[
""
,
""
]
returnlist
[
1
]
=
convertTime
(
strin
[
1
])
if
not
(
strin
[
0
][
-
2
:]
==
"am"
or
strin
[
0
][
-
2
:]
==
"AM"
)
and
not
(
strin
[
0
][
-
2
:]
==
"pm"
or
strin
[
0
][
-
2
:]
==
"PM"
):
if
not
(
strin
[
0
][
-
2
:]
==
"am"
or
strin
[
0
][
-
2
:]
==
"AM"
)
\
and
not
(
strin
[
0
][
-
2
:]
==
"pm"
or
strin
[
0
][
-
2
:]
==
"PM"
):
if
(
strin
[
1
][
-
2
:]
==
"am"
):
returnlist
[
0
]
=
convertTime
(
strin
[
0
]
+
"am"
)
else
:
...
...
@@ -19,23 +24,30 @@ def splitAndConvertTime(strin):
returnlist
[
0
]
=
convertTime
(
strin
[
0
])
return
returnlist
def
load_getconn_data
():
feedtext
=
requests
.
get
(
"https://getconnected.gmu.edu/events/events.rss"
).
text
feedtext
=
requests
.
get
(
"https://getconnected.gmu.edu/events/events.rss"
).
text
feedtext
=
cleanup
(
feedtext
)
# this calls the RSS feed parser from !feedparser
feed
=
feedparser
.
parse
(
feedtext
)
dictlist
=
[]
error
=
[]
for
entry
in
feed
.
entries
:
# this collects the attributes which are always there
# also creates the Soup and sets up for the rest of the parsing
try
:
uniqueid
=
entry
.
id
[
-
7
:]
# print uniqueid
title
=
entry
.
title
# print title
sumdetsoup
=
BeautifulSoup
(
entry
.
summary_detail
[
"value"
].
encode
(
"utf-8"
),
"html.parser"
)
sumdetsoup
=
BeautifulSoup
(
entry
.
summary_detail
[
"value"
]
.
encode
(
"utf-8"
),
"html.parser"
)
location
=
[
sumdetsoup
.
div
.
span
.
text
]
# print location
...
...
@@ -43,32 +55,51 @@ def load_getconn_data():
description
=
sumdetsoup
.
find_all
(
"div"
)[
1
].
text
# print description
datetime
=
sumdetsoup
.
b
.
text
# print datetime
except
Exception
as
e
:
error
.
append
(
str
(
e
))
# this handles events which start and end on the same day
if
(
datetime
.
count
(
"("
)
==
1
):
# if we've found an error, there's not point in continuing
if
(
datetime
.
count
(
"("
)
==
1
)
and
(
error
==
[]):
# parses out date info
try
:
datesplit
=
datetime
.
split
(
", "
)
weekday
=
datesplit
[
0
]
temp
=
datesplit
[
1
].
split
(
" "
)
monthday
=
temp
[
1
]
month
=
temp
[
0
]
year
=
datesplit
[
2
][:
5
]
except
Exception
as
e
:
error
.
append
(
str
(
e
))
# uses helper function to get the start and end time
try
:
parsedtimelist
=
splitAndConvertTime
(
datesplit
[
2
][
6
:
-
1
])
timestart
=
parsedtimelist
[
0
]
timestop
=
parsedtimelist
[
1
]
dictlist
.
append
({
"id"
:
uniqueid
,
"title"
:
title
,
"dayofweek"
:
weekday
,
"dayofmonth"
:
monthday
,
"month"
:
month
,
"year"
:
year
,
"timestart"
:
timestart
,
"timestop"
:
timestop
,
"location"
:
location
,
"description"
:
description
})
# this handles events which start on one day and end on another
except
Exception
as
e
:
error
.
append
(
str
(
e
))
# appends the dictlist if no errors were found
if
error
==
[]:
dictlist
.
append
({
"multiday"
:
False
,
"id"
:
uniqueid
,
"title"
:
title
,
"dayofweek"
:
weekday
,
"dayofmonth"
:
monthday
,
"month"
:
month
,
"year"
:
year
,
"timestart"
:
timestart
,
"timestop"
:
timestop
,
"location"
:
location
,
"description"
:
description
})
else
:
datesplit
=
datetime
.
split
(
" - "
)
dictlist
.
append
({
"id"
:
uniqueid
,
"error"
:
str
(
e
),
"errorlocation"
:
""
})
# this handles events which start on one day and end on another
elif
error
==
[]:
# getting the information for the start day/time
try
:
datesplit
=
datetime
.
split
(
" - "
)
tempsplits
=
datesplit
[
0
].
split
(
", "
)
weekday
=
tempsplits
[
0
]
month
=
tempsplits
[
1
].
split
(
" "
)[
0
]
...
...
@@ -76,8 +107,11 @@ def load_getconn_data():
year
=
tempsplits
[
2
].
split
(
" "
)[
0
]
timestart
=
datesplit
[
0
].
split
(
"("
)[
1
][:
-
1
]
timestart
=
convertTime
(
timestart
)
except
Exception
as
e
:
error
.
append
(
str
(
e
))
# getting the information for the end day/time
try
:
tempsplits
=
datesplit
[
1
].
split
(
", "
)
endweekday
=
tempsplits
[
0
]
endmonth
=
tempsplits
[
1
].
split
(
" "
)[
0
]
...
...
@@ -85,15 +119,30 @@ def load_getconn_data():
endyear
=
tempsplits
[
2
].
split
(
" "
)[
0
]
timestop
=
datesplit
[
1
].
split
(
"("
)[
1
][:
-
1
]
timestop
=
convertTime
(
timestop
)
except
Exception
as
e
:
error
.
append
(
str
(
e
))
# append the dictlist if no errors were found
if
error
==
[]:
dictlist
.
append
({
"multiday"
:
True
,
"id"
:
uniqueid
,
"title"
:
title
,
"dayofweek"
:
weekday
,
"dayofmonth"
:
monthday
,
"month"
:
month
,
"year"
:
year
,
"timestart"
:
timestart
,
"timestop"
:
timestop
,
"location"
:
location
,
"description"
:
description
,
"enddayofweek"
:
endweekday
,
"enddayofmonth"
:
endmonthday
,
"endmonth"
:
endmonth
,
"endyear"
:
endyear
})
else
:
dictlist
.
append
({
"id"
:
uniqueid
,
"error"
:
str
(
e
)})
else
:
dictlist
.
append
(
"error"
:
"issue in initialization of event.
\
check lines 40-56 in getconnectedscript.py"
)
dictlist
.
append
({
"id"
:
uniqueid
,
"title"
:
title
,
"dayofweek"
:
weekday
,
"dayofmonth"
:
monthday
,
"month"
:
month
,
"year"
:
year
,
"timestart"
:
timestart
,
"timestop"
:
timestop
,
"location"
:
location
,
"description"
:
description
,
"enddayofweek"
:
endweekday
,
"enddayofmonth"
:
endmonthday
,
"endmonth"
:
endmonth
,
"endyear"
:
endyear
})
return
dictlist
# dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
# This was intended to figure out what objects are in each entry and what appears only sometimes
# The results are:
# Every event has:
# -------summary
# -------published_parsed
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment