getconnectedscript.py 2.67 KB
Newer Older
1
2
import requests
from bs4 import BeautifulSoup
Landon DeCoito's avatar
Landon DeCoito committed
3
from parscript import cleanup, doTheTime
4
5
import feedparser

6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST


# woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text)
# soup = BeautifulSoup(woah, "lxml")
# print soup.prettify
def load_getconn_data():
    feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text
    feedtext = cleanup(feedtext)
    

    feed = feedparser.parse(feedtext) # this calls the RSS feed parser from !feedparser

    # print feed, "\n\n\n"
    # ctr = 0
    dictlist = []

    for entry in feed.entries:
        # print"==================================="
        uniqueid = entry.id[-7:]
        # print uniqueid

        title = entry.title
        # print title
        
        sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser")
        
        location = [sumdetsoup.div.span.text]
        # print location

        description = sumdetsoup.find_all("div")[1].text
        # print description

        
        datetime = sumdetsoup.b.text
        # print datetime
        
        if (datetime.count("(") == 1):
            datesplit = datetime.split(", ")
            weekday = datesplit[0]
            month = datesplit[1].split(" ")
            monthday = month[1]
            month = month[0]
            year = datesplit[2][:5]
            parsedtimelist = doTheTime(datesplit[2][6:-1])
            timestart = parsedtimelist[0]
            timestop = parsedtimelist[1]
            # print {"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description}
            dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
    return dictlist

    
    #print "\n\n", sumdetsoup.prettify()
    #print"==================================="
60

Landon DeCoito's avatar
Landon DeCoito committed
61
#dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#This was intended to figure out what objects are in each entry and what appears only sometimes
#The results are:
####Every event has:
#-------summary
#-------published_parsed
#-------links
#-------author
#-------summary
#-------guidislink
#-------title_detail
#-------link
#-------authors
#-------title
#-------author_detail
#-------id
#-------published
####Some events have:
#-------tags