getconnectedscript.py 2.67 KB
Newer Older
1 2
import requests
from bs4 import BeautifulSoup
Landon DeCoito's avatar
Landon DeCoito committed
3
from parscript import cleanup, doTheTime
4 5
import feedparser

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST


# woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text)
# soup = BeautifulSoup(woah, "lxml")
# print soup.prettify
def load_getconn_data():
    feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text
    feedtext = cleanup(feedtext)
    

    feed = feedparser.parse(feedtext) # this calls the RSS feed parser from !feedparser

    # print feed, "\n\n\n"
    # ctr = 0
    dictlist = []

    for entry in feed.entries:
        # print"==================================="
        uniqueid = entry.id[-7:]
        # print uniqueid

        title = entry.title
        # print title
        
        sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser")
        
        location = [sumdetsoup.div.span.text]
        # print location

        description = sumdetsoup.find_all("div")[1].text
        # print description

        
        datetime = sumdetsoup.b.text
        # print datetime
        
        if (datetime.count("(") == 1):
            datesplit = datetime.split(", ")
            weekday = datesplit[0]
            month = datesplit[1].split(" ")
            monthday = month[1]
            month = month[0]
            year = datesplit[2][:5]
            parsedtimelist = doTheTime(datesplit[2][6:-1])
            timestart = parsedtimelist[0]
            timestop = parsedtimelist[1]
            # print {"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}
            dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
    return dictlist

    
    #print "\n\n", sumdetsoup.prettify()
    #print"==================================="
60

Landon DeCoito's avatar
Landon DeCoito committed
61
#dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
#This was intended to figure out what objects are in each entry and what appears only sometimes
#The results are:
####Every event has:
#-------summary
#-------published_parsed
#-------links
#-------author
#-------summary
#-------guidislink
#-------title_detail
#-------link
#-------authors
#-------title
#-------author_detail
#-------id
#-------published
####Some events have:
#-------tags