From f6c6a179f083aa6d1a5df1b663340686f0dc940e Mon Sep 17 00:00:00 2001 From: Landon DeCoito Date: Wed, 25 Apr 2018 20:02:35 -0400 Subject: [PATCH] Getconnectedparscript now returns a 25Live data -esque JSON --- mason-today/app.py | 10 +- mason-today/getconnectedscript.py | 170 ++++++++++-------------------- mason-today/parscript.py | 1 + 3 files changed, 64 insertions(+), 117 deletions(-) diff --git a/mason-today/app.py b/mason-today/app.py index b3bcc6f..1d11d08 100644 --- a/mason-today/app.py +++ b/mason-today/app.py @@ -1,11 +1,19 @@ from flask import Flask from flask import Response from parscript import load_data +from getconnectedscript import load_getconn_data import json + app = Flask(__name__) -@app.route("/") +@app.route("/25Live") def display_data(): resp = Response(json.dumps(load_data(), ensure_ascii=False).encode('utf-8')) resp.headers['Content-Type'] = 'application/json; charset=utf-8' return resp + +@app.route("/getconn") +def display_GC_data(): + resp = Response(json.dumps(load_getconn_data(), ensure_ascii=False).encode('utf-8')) + resp.headers['Content-Type'] = 'application/json; charset=utf-8' + return resp diff --git a/mason-today/getconnectedscript.py b/mason-today/getconnectedscript.py index 33a6268..316f6da 100644 --- a/mason-today/getconnectedscript.py +++ b/mason-today/getconnectedscript.py @@ -3,103 +3,60 @@ from bs4 import BeautifulSoup from parscript import cleanup, doTheTime import feedparser -#TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST - - -#woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text) -#soup = BeautifulSoup(woah, "lxml") -#print soup.prettify -feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text -feedtext = cleanup(feedtext) -feedtext = feedtext.replace("’", "'") - -feed = feedparser.parse(feedtext)#this calls the RSS feed parser from !feedparser - -#print feed, "\n\n\n" -#ctr = 0 -dictlist = [] - -for entry in feed.entries: - # templist = {} - #print entry.summary_detail.value - # templist["summary_detail"] = entry.summary_detail - - '''print "----------------------------------" - print "1) ", entry.published_parsed, "\n" - templist["published_parsed"] = entry.published_parsed - - print entry.links, "\n" - templist["links"] = entry.links - - print "3) ", entry.author, "\n" - templist["author"] = entry.author - - print entry.summary, "\n" - templist["summary"] = entry.summary - - print "5) ", entry.guidislink, "\n" - templist["guidislink"] = entry.guidislink - - print entry.title_detail, "\n" - templist["title_detail"] = entry.title_detail - - print "6) ", entry.link, "\n" - templist["link"] = entry.link - - print entry.authors, "\n" - templist["authors"] = entry.authors - - print "7) ", entry.title, "\n" - templist["title"] = entry.title - - print entry.author_detail, "\n" - templist["author_detail"] = entry.author_detail - - print "9) ", entry.id, "\n" - templist["id"] = entry.id - - print entry.published, "\n" - templist["published"] = entry.published - print"-----------------------------------"''' - - - - #print"===================================" - uniqueid = entry.id[-7:] - #print uniqueid - - title = entry.title - #print title - - sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser") - - location = [sumdetsoup.div.span.text] - #print location - - description = sumdetsoup.find_all("div")[1].p.text - #print description - - - datetime = sumdetsoup.b.text - #print datetime - - if (datetime.count("(") == 1): - datesplit = datetime.split(", ") - weekday = datesplit[0] - month = datesplit[1].split(" ") - monthday = month[1] - month = month[0] - year = datesplit[2][:5] - parsedtimelist = doTheTime(datesplit[2][6:-1]) - timestart = parsedtimelist[0] - timestop = parsedtimelist[1] - dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}) - -print dictlist - - - #print "\n\n", sumdetsoup.prettify() - #print"===================================" +# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST + + +# woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text) +# soup = BeautifulSoup(woah, "lxml") +# print soup.prettify +def load_getconn_data(): + feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text + feedtext = cleanup(feedtext) + + + feed = feedparser.parse(feedtext) # this calls the RSS feed parser from !feedparser + + # print feed, "\n\n\n" + # ctr = 0 + dictlist = [] + + for entry in feed.entries: + # print"===================================" + uniqueid = entry.id[-7:] + # print uniqueid + + title = entry.title + # print title + + sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser") + + location = [sumdetsoup.div.span.text] + # print location + + description = sumdetsoup.find_all("div")[1].text + # print description + + + datetime = sumdetsoup.b.text + # print datetime + + if (datetime.count("(") == 1): + datesplit = datetime.split(", ") + weekday = datesplit[0] + month = datesplit[1].split(" ") + monthday = month[1] + month = month[0] + year = datesplit[2][:5] + parsedtimelist = doTheTime(datesplit[2][6:-1]) + timestart = parsedtimelist[0] + timestop = parsedtimelist[1] + # print {"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description} + dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}) + return dictlist + + + #print "\n\n", sumdetsoup.prettify() + #print"===================================" #dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}) #This was intended to figure out what objects are in each entry and what appears only sometimes @@ -120,22 +77,3 @@ print dictlist #-------published ####Some events have: #-------tags - - -'''for key in feed.entries[0].keys(): - everyone.append(key) -some = [] - -for entry in feed.entries: - #print "----------------------------------" - for key in entry.keys(): - if not key in everyone: - some.append(key) - for key in everyone: - if not (key in entry.keys()): - everyone.remove(key) - some.append(key) - #print"-----------------------------------" - #ctr += 1 -print "Everyone: \n", everyone -print "Some: \n", some''' diff --git a/mason-today/parscript.py b/mason-today/parscript.py index 02b5102..7c01ca2 100644 --- a/mason-today/parscript.py +++ b/mason-today/parscript.py @@ -14,6 +14,7 @@ def cleanup(str): #this function cleans up some of the useless html leftovers to str = str.replace("Performing any medical procedures?: FALSE \n" , "") str = str.replace("Parking Needed?: FALSE \n" , "") str = str[0:len(str) - 1] + str = str.replace("’", "'") return str class eventException: #this class is just an exception for our use -- GitLab