getconnectedscript.py 5.75 KB
Newer Older
1
# app imports
2
from parscript import cleanup, convertTime
3
4

# third party imports
5
import feedparser
6
7
8
9
from bs4 import BeautifulSoup
import requests

# DEV REMINDER: CHANGE THE LINES IN INTIALISATION ERROR MESSAGE (LINE 138)
10

11

12
13
14
def splitAndConvertTime(strin):
    strin = strin.replace(" ", "")
    strin = strin.split("-")
15
    returnlist = ["", ""]
16
    returnlist[1] = convertTime(strin[1])
17
18
    if not (strin[0][-2:] == "am" or strin[0][-2:] == "AM") \
            and not (strin[0][-2:] == "pm" or strin[0][-2:] == "PM"):
19
20
21
22
23
24
25
        if (strin[1][-2:] == "am"):
            returnlist[0] = convertTime(strin[0] + "am")
        else:
            returnlist[0] = convertTime(strin[0] + "pm")
    else:
        returnlist[0] = convertTime(strin[0])
    return returnlist
26

27

28
def load_getconn_data():
29
30
    feedtext = requests.get(
            "https://getconnected.gmu.edu/events/events.rss").text
31
32
    feedtext = cleanup(feedtext)

33
34
    # this calls the RSS feed parser from !feedparser
    feed = feedparser.parse(feedtext)
35
36
37
38

    dictlist = []

    for entry in feed.entries:
39
        error = []
40
41
42
43
44
45
46
47
        # this collects the attributes which are always there
        # also creates the Soup and sets up for the rest of the parsing
        try:
            uniqueid = entry.id[-7:]
            # print uniqueid

            title = entry.title
            # print title
48

49
50
            sumdetsoup = BeautifulSoup(entry.summary_detail["value"]
                                       .encode("utf-8"), "html.parser")
51

52
53
            location = [sumdetsoup.div.span.text]
            # print location
54

55
56
57
58
59
60
61
            description = sumdetsoup.find_all("div")[1].text
            # print description

            datetime = sumdetsoup.b.text
            # print datetime
        except Exception as e:
            error.append(str(e))
62
63

        # this handles events which start and end on the same day
64
        # if we've found an error, there's no point in continuing
65
66
67
68
69
70
71
72
73
74
75
        if (datetime.count("(") == 1) and (error == []):
            # parses out date info
            try:
                datesplit = datetime.split(", ")
                weekday = datesplit[0]
                temp = datesplit[1].split(" ")
                monthday = temp[1]
                month = temp[0]
                year = datesplit[2][:5]
            except Exception as e:
                error.append(str(e))
76
77

            # uses helper function to get the start and end time
78
79
80
81
82
83
            try:
                parsedtimelist = splitAndConvertTime(datesplit[2][6:-1])
                timestart = parsedtimelist[0]
                timestop = parsedtimelist[1]
            except Exception as e:
                error.append(str(e))
84

85
86
87
88
89
90
91
92
93
94
95
96
97
            # appends the dictlist if no errors were found
            if error == []:
                dictlist.append({
                                "multiday": False, "id": uniqueid,
                                "title": title, "dayofweek": weekday,
                                "dayofmonth": monthday, "month": month,
                                "year": year, "timestart": timestart,
                                "timestop": timestop, "location": location,
                                "description": description
                                })
            else:
                dictlist.append({"id": uniqueid, "error": str(e),
                                 "errorlocation": ""})
98
        # this handles events which start on one day and end on another
99
        elif error == []:
100
            # getting the information for the start day/time
101
102
103
104
105
106
107
108
109
110
111
            try:
                datesplit = datetime.split(" - ")
                tempsplits = datesplit[0].split(", ")
                weekday = tempsplits[0]
                month = tempsplits[1].split(" ")[0]
                monthday = tempsplits[1].split(" ")[1]
                year = tempsplits[2].split(" ")[0]
                timestart = datesplit[0].split("(")[1][:-1]
                timestart = convertTime(timestart)
            except Exception as e:
                error.append(str(e))
112
113

            # getting the information for the end day/time
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
            try:
                tempsplits = datesplit[1].split(", ")
                endweekday = tempsplits[0]
                endmonth = tempsplits[1].split(" ")[0]
                endmonthday = tempsplits[1].split(" ")[1]
                endyear = tempsplits[2].split(" ")[0]
                timestop = datesplit[1].split("(")[1][:-1]
                timestop = convertTime(timestop)
            except Exception as e:
                error.append(str(e))

            # append the dictlist if no errors were found
            if error == []:
                dictlist.append({
                                "multiday": True, "id": uniqueid,
                                "title": title, "dayofweek": weekday,
                                "dayofmonth": monthday, "month": month,
                                "year": year, "timestart": timestart,
                                "timestop": timestop, "location": location,
                                "description": description,
                                "enddayofweek": endweekday,
                                "enddayofmonth": endmonthday,
                                "endmonth": endmonth, "endyear": endyear
                                })
            else:
                dictlist.append({"id": uniqueid, "error": str(e)})
        else:
            dictlist.append("error": "issue in initialization of event.\
                            check lines 40-56 in getconnectedscript.py")
143
    return dictlist
144

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Every event has:
# -------summary
# -------published_parsed
# -------links
# -------author
# -------summary
# -------guidislink
# -------title_detail
# -------link
# -------authors
# -------title
# -------author_detail
# -------id
# -------published
# Some events have:
# -------tags