Commit 45ed3dd9 authored by Landon DeCoito's avatar Landon DeCoito

GC script now parses ALL events, including multi-day events. Also renamed the...

GC script now parses ALL events, including multi-day events. Also renamed the doTheTime function to splitAndConvertTime and moved it to getconnected script
parent b40071ef
import requests
from bs4 import BeautifulSoup
from parscript import cleanup, doTheTime
from parscript import cleanup, convertTime
import feedparser
# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST
def splitAndConvertTime(strin):
strin = strin.replace(" ", "")
strin = strin.split("-")
returnlist = ["",""]
returnlist[1] = convertTime(strin[1])
if not (strin[0][-2:] == "am" or strin[0][-2:] == "AM") and not (strin[0][-2:] == "pm" or strin[0][-2:] == "PM"):
if (strin[1][-2:] == "am"):
returnlist[0] = convertTime(strin[0] + "am")
else:
returnlist[0] = convertTime(strin[0] + "pm")
else:
returnlist[0] = convertTime(strin[0])
return returnlist
# woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text)
# soup = BeautifulSoup(woah, "lxml")
# print soup.prettify
def load_getconn_data():
feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text
feedtext = cleanup(feedtext)
feed = feedparser.parse(feedtext) # this calls the RSS feed parser from !feedparser
# this calls the RSS feed parser from !feedparser
feed = feedparser.parse(feedtext)
# print feed, "\n\n\n"
# ctr = 0
dictlist = []
for entry in feed.entries:
# print"==================================="
uniqueid = entry.id[-7:]
# print uniqueid
title = entry.title
# print title
sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser")
sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("utf-8"), "html.parser")
location = [sumdetsoup.div.span.text]
# print location
......@@ -39,41 +46,67 @@ def load_getconn_data():
datetime = sumdetsoup.b.text
# print datetime
# this handles events which start and end on the same day
if (datetime.count("(") == 1):
datesplit = datetime.split(", ")
weekday = datesplit[0]
month = datesplit[1].split(" ")
monthday = month[1]
month = month[0]
temp = datesplit[1].split(" ")
monthday = temp[1]
month = temp[0]
year = datesplit[2][:5]
parsedtimelist = doTheTime(datesplit[2][6:-1])
# uses helper function to get the start and end time
parsedtimelist = splitAndConvertTime(datesplit[2][6:-1])
timestart = parsedtimelist[0]
timestop = parsedtimelist[1]
# print {"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}
dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
return dictlist
#print "\n\n", sumdetsoup.prettify()
#print"==================================="
dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month,
"year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
# this handles events which start on one day and end on another
else:
datesplit = datetime.split(" - ")
# getting the information for the start day/time
tempsplits = datesplit[0].split(", ")
weekday = tempsplits[0]
month = tempsplits[1].split(" ")[0]
monthday = tempsplits[1].split(" ")[1]
year = tempsplits[2].split(" ")[0]
timestart = datesplit[0].split("(")[1][:-1]
timestart = convertTime(timestart)
# getting the information for the end day/time
tempsplits = datesplit[1].split(", ")
endweekday = tempsplits[0]
endmonth = tempsplits[1].split(" ")[0]
endmonthday = tempsplits[1].split(" ")[1]
endyear = tempsplits[2].split(" ")[0]
timestop = datesplit[1].split("(")[1][:-1]
timestop = convertTime(timestop)
#dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
#This was intended to figure out what objects are in each entry and what appears only sometimes
#The results are:
####Every event has:
#-------summary
#-------published_parsed
#-------links
#-------author
#-------summary
#-------guidislink
#-------title_detail
#-------link
#-------authors
#-------title
#-------author_detail
#-------id
#-------published
####Some events have:
#-------tags
dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month,
"year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description,
"enddayofweek":endweekday, "enddayofmonth":endmonthday, "endmonth":endmonth, "endyear":endyear})
return dictlist
# dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
# This was intended to figure out what objects are in each entry and what appears only sometimes
# The results are:
# Every event has:
# -------summary
# -------published_parsed
# -------links
# -------author
# -------summary
# -------guidislink
# -------title_detail
# -------link
# -------authors
# -------title
# -------author_detail
# -------id
# -------published
# Some events have:
# -------tags
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment