Commit 2ed1ebd3 authored by Landon DeCoito's avatar Landon DeCoito
Browse files

Merge branch '12-getconnectedfixer' into 'master'

Resolve "Fix the Get Connected Parse because you shouldn't have pushed it"

Closes #12

See merge request !29
parents b40071ef e1cd9773
# flask imports
from flask import Flask from flask import Flask
from flask import Response from flask import Response
from flask import render_template
# app imports
from parscript import load_data from parscript import load_data
from getconnectedscript import load_getconn_data from getconnectedscript import load_getconn_data
# python imports
import json import json
app = Flask(__name__) app = Flask(__name__)
@app.route("/") @app.route("/")
def display_default(): def display_default():
resp = Response(("Welcome to the masontoday API! Go to https://git.gmu.edu/srct/mason-today-web <br/><br/>" resp = render_template('welcomepage.html')
+ "Feel free to go to /api/25live/ or /api/getconnected/ to find our api!").encode('utf-8')) return resp
return resp
@app.route("/api/25live") @app.route("/api/25live")
def display_data(): def display_data():
resp = Response(json.dumps(load_data(), ensure_ascii=False).encode('utf-8')) resp = Response(json.dumps(load_data(), ensure_ascii=False)
.encode('utf-8'))
resp.headers['Content-Type'] = 'application/json; charset=utf-8' resp.headers['Content-Type'] = 'application/json; charset=utf-8'
return resp return resp
@app.route("/api/getconnected") @app.route("/api/getconnected")
def display_GC_data(): def display_GC_data():
resp = Response(json.dumps(load_getconn_data(), ensure_ascii=False).encode('utf-8')) resp = Response(json.dumps(load_getconn_data(), ensure_ascii=False)
resp.headers['Content-Type'] = 'application/json; charset=utf-8' .encode('utf-8'))
return resp resp.headers['Content-Type'] = 'application/json; charset=utf-8'
return resp
This diff is collapsed.
import requests # app imports
from bs4 import BeautifulSoup from parscript import cleanup, convertTime
from parscript import cleanup, doTheTime
# third party imports
import feedparser import feedparser
from bs4 import BeautifulSoup
import requests
# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST # DEV REMINDER: CHANGE THE LINES IN INTIALISATION ERROR MESSAGE (LINE 138)
def splitAndConvertTime(strin):
strin = strin.replace(" ", "")
strin = strin.split("-")
returnlist = ["", ""]
returnlist[1] = convertTime(strin[1])
if not (strin[0][-2:] == "am" or strin[0][-2:] == "AM") \
and not (strin[0][-2:] == "pm" or strin[0][-2:] == "PM"):
if (strin[1][-2:] == "am"):
returnlist[0] = convertTime(strin[0] + "am")
else:
returnlist[0] = convertTime(strin[0] + "pm")
else:
returnlist[0] = convertTime(strin[0])
return returnlist
# woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text)
# soup = BeautifulSoup(woah, "lxml")
# print soup.prettify
def load_getconn_data(): def load_getconn_data():
feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text feedtext = requests.get(
"https://getconnected.gmu.edu/events/events.rss").text
feedtext = cleanup(feedtext) feedtext = cleanup(feedtext)
feed = feedparser.parse(feedtext) # this calls the RSS feed parser from !feedparser # this calls the RSS feed parser from !feedparser
feed = feedparser.parse(feedtext)
# print feed, "\n\n\n"
# ctr = 0
dictlist = [] dictlist = []
for entry in feed.entries: for entry in feed.entries:
# print"===================================" error = []
uniqueid = entry.id[-7:] # this collects the attributes which are always there
# print uniqueid # also creates the Soup and sets up for the rest of the parsing
try:
title = entry.title uniqueid = entry.id[-7:]
# print title # print uniqueid
sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser") title = entry.title
# print title
location = [sumdetsoup.div.span.text]
# print location sumdetsoup = BeautifulSoup(entry.summary_detail["value"]
.encode("utf-8"), "html.parser")
description = sumdetsoup.find_all("div")[1].text
# print description location = [sumdetsoup.div.span.text]
# print location
datetime = sumdetsoup.b.text description = sumdetsoup.find_all("div")[1].text
# print datetime # print description
if (datetime.count("(") == 1): datetime = sumdetsoup.b.text
datesplit = datetime.split(", ") # print datetime
weekday = datesplit[0] except Exception as e:
month = datesplit[1].split(" ") error.append(str(e))
monthday = month[1]
month = month[0] # this handles events which start and end on the same day
year = datesplit[2][:5] # if we've found an error, there's no point in continuing
parsedtimelist = doTheTime(datesplit[2][6:-1]) if (datetime.count("(") == 1) and (error == []):
timestart = parsedtimelist[0] # parses out date info
timestop = parsedtimelist[1] try:
# print {"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description} datesplit = datetime.split(", ")
dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}) weekday = datesplit[0]
temp = datesplit[1].split(" ")
monthday = temp[1]
month = temp[0]
year = datesplit[2][:5]
except Exception as e:
error.append(str(e))
# uses helper function to get the start and end time
try:
parsedtimelist = splitAndConvertTime(datesplit[2][6:-1])
timestart = parsedtimelist[0]
timestop = parsedtimelist[1]
except Exception as e:
error.append(str(e))
# appends the dictlist if no errors were found
if error == []:
dictlist.append({
"multiday": False, "id": uniqueid,
"title": title, "dayofweek": weekday,
"dayofmonth": monthday, "month": month,
"year": year, "timestart": timestart,
"timestop": timestop, "location": location,
"description": description
})
else:
dictlist.append({"id": uniqueid, "error": str(e),
"errorlocation": ""})
# this handles events which start on one day and end on another
elif error == []:
# getting the information for the start day/time
try:
datesplit = datetime.split(" - ")
tempsplits = datesplit[0].split(", ")
weekday = tempsplits[0]
month = tempsplits[1].split(" ")[0]
monthday = tempsplits[1].split(" ")[1]
year = tempsplits[2].split(" ")[0]
timestart = datesplit[0].split("(")[1][:-1]
timestart = convertTime(timestart)
except Exception as e:
error.append(str(e))
# getting the information for the end day/time
try:
tempsplits = datesplit[1].split(", ")
endweekday = tempsplits[0]
endmonth = tempsplits[1].split(" ")[0]
endmonthday = tempsplits[1].split(" ")[1]
endyear = tempsplits[2].split(" ")[0]
timestop = datesplit[1].split("(")[1][:-1]
timestop = convertTime(timestop)
except Exception as e:
error.append(str(e))
# append the dictlist if no errors were found
if error == []:
dictlist.append({
"multiday": True, "id": uniqueid,
"title": title, "dayofweek": weekday,
"dayofmonth": monthday, "month": month,
"year": year, "timestart": timestart,
"timestop": timestop, "location": location,
"description": description,
"enddayofweek": endweekday,
"enddayofmonth": endmonthday,
"endmonth": endmonth, "endyear": endyear
})
else:
dictlist.append({"id": uniqueid, "error": str(e)})
else:
dictlist.append({"error": "issue in initialization of event.\
check lines 40-56 in getconnectedscript.py"})
return dictlist return dictlist
# Every event has:
#print "\n\n", sumdetsoup.prettify() # -------summary
#print"===================================" # -------published_parsed
# -------links
#dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}) # -------author
#This was intended to figure out what objects are in each entry and what appears only sometimes # -------summary
#The results are: # -------guidislink
####Every event has: # -------title_detail
#-------summary # -------link
#-------published_parsed # -------authors
#-------links # -------title
#-------author # -------author_detail
#-------summary # -------id
#-------guidislink # -------published
#-------title_detail # Some events have:
#-------link # -------tags
#-------authors
#-------title
#-------author_detail
#-------id
#-------published
####Some events have:
#-------tags
This diff is collapsed.
<!doctype html>
<html>
<title>
MasonToday API
</title>
<body>
<h1>
MasonToday API
</h1>
<p>
We have two APIs available, pulled from two different sources of GMU events:
<ul>
<li>Get Connected: <a href="/api/getconnected">masontoday.gmu.io/api/getconnected</a></li>
<li>25 Live: <a href="/api/25live">masontoday.gmu.io/api/25live</a></li>
</ul>
</p>
<p>
Welcome to the API for MasonToday, courtesy of Student Run Computing and Technology! If you'd like, you can find the source code at the GMU Gitlab, <a href="https://git.gmu.edu/srct/mason-today-web">https://git.gmu.edu/srct/mason-today-web</a>
</p>
</body>
</html>
#!/bin/sh #!/bin/sh
export FLASK_APP=mason-today/app.py export FLASK_APP=mason-today/__init__.py
export FLASK_DEBUG=1 export FLASK_DEBUG=1
flask run flask run
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment