Commit 57081238 authored by Landon DeCoito's avatar Landon DeCoito

Merge branch 'getconnectedparser' into 'master'

Getconnectedparser

See merge request !28
parents 43e64b27 22120e5f
from flask import Flask
from flask import Response
from parscript import load_data
from getconnectedscript import load_getconn_data
import json
app = Flask(__name__)
@app.route("/")
@app.route("/api/25live")
def display_data():
resp = Response(json.dumps(load_data(), ensure_ascii=False).encode('utf-8'))
resp.headers['Content-Type'] = 'application/json; charset=utf-8'
return resp
@app.route("/api/getconnected")
def display_GC_data():
resp = Response(json.dumps(load_getconn_data(), ensure_ascii=False).encode('utf-8'))
resp.headers['Content-Type'] = 'application/json; charset=utf-8'
return resp
import requests
from bs4 import BeautifulSoup
from parscript import cleanup, doTheTime
import feedparser
# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST
# woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text)
# soup = BeautifulSoup(woah, "lxml")
# print soup.prettify
def load_getconn_data():
feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text
feedtext = cleanup(feedtext)
feed = feedparser.parse(feedtext) # this calls the RSS feed parser from !feedparser
# print feed, "\n\n\n"
# ctr = 0
dictlist = []
for entry in feed.entries:
# print"==================================="
uniqueid = entry.id[-7:]
# print uniqueid
title = entry.title
# print title
sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser")
location = [sumdetsoup.div.span.text]
# print location
description = sumdetsoup.find_all("div")[1].text
# print description
datetime = sumdetsoup.b.text
# print datetime
if (datetime.count("(") == 1):
datesplit = datetime.split(", ")
weekday = datesplit[0]
month = datesplit[1].split(" ")
monthday = month[1]
month = month[0]
year = datesplit[2][:5]
parsedtimelist = doTheTime(datesplit[2][6:-1])
timestart = parsedtimelist[0]
timestop = parsedtimelist[1]
# print {"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}
dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
return dictlist
#print "\n\n", sumdetsoup.prettify()
#print"==================================="
#dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
#This was intended to figure out what objects are in each entry and what appears only sometimes
#The results are:
####Every event has:
#-------summary
#-------published_parsed
#-------links
#-------author
#-------summary
#-------guidislink
#-------title_detail
#-------link
#-------authors
#-------title
#-------author_detail
#-------id
#-------published
####Some events have:
#-------tags
......@@ -14,6 +14,7 @@ def cleanup(str): #this function cleans up some of the useless html leftovers to
str = str.replace("Performing any medical procedures?: FALSE \n" , "")
str = str.replace("Parking Needed?: FALSE \n" , "")
str = str[0:len(str) - 1]
str = str.replace("’", "'")
return str
class eventException: #this class is just an exception for our use
......@@ -25,8 +26,26 @@ class eventException: #this class is just an exception for our use
def __str__(self):
return self.__message
def doTheTime(strin):
strin = strin.replace(" ", "")
strin = strin.split("-")
returnlist = ["",""]
try:
returnlist[1] = convertTime(strin[1])
except ValueError:
raise eventException(str(strin))
if not (strin[0][-2:] == "am") and not (strin[0][-2:] == "pm"):
if (strin[1][-2:] == "am"):
returnlist[0] = convertTime(strin[0] + "am")
else:
returnlist[0] = convertTime(strin[0] + "pm")
else:
returnlist[0] = convertTime(strin[0])
return returnlist
#convertTime accepts strings in the form of ""
def convertTime(stri): #this function is used for splicing the event times.
if (stri[-2:] == "pm"): #checks to see if the time presented is pm
if (stri[-2:] == "pm" or stri[-2:] == "PM"): #checks to see if the time presented is pm
if not ((stri[0] == "1") and (stri[1] == "2")): #if the time is pm, then the 12:00 hour is noon and shouldn't get 12 added to it
try: #this try block works with the exception handler to add 12 to any pm times
stri = stri.replace(stri[0:2], str(int(stri[0:2]) + 12), 1)
......@@ -44,7 +63,7 @@ def convertTime(stri): #this function is used for splicing the event times.
return (int(stri[0:2])) * 60
except:
return (int(stri[0])) * 60
elif (stri[-2:] == "am"): #checks if the time presented is am, and executes identical code from the pm block, just without adding 12
elif (stri[-2:] == "am" or stri[-2:] == "AM"): #checks if the time presented is am, and executes identical code from the pm block, just without adding 12
if ":" in stri:
try:
return (int(stri[0:2]) * 60) + int(stri[3:5])
......@@ -256,5 +275,4 @@ def load_data():
dictlist.append({"id":uniqueid, "error":error})
return dictlist
#everything in the house is fuzzy, stupid dogs were acting like pollinators, if that's how you even spell it
......@@ -2,10 +2,16 @@ arrow==0.4.2
beautifulsoup4==4.6.0
certifi==2017.11.5
chardet==3.0.4
click==6.7
feedparser==5.2.1
Flask==0.12.2
idna==2.6
itsdangerous==0.24
Jinja2==2.10
lxml==4.1.1
MarkupSafe==1.0
python-dateutil==2.6.1
requests==2.18.4
six==1.11.0
urllib3==1.22
flask
Werkzeug==0.14.1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment