getconnectedscript.py 5.76 KB
Newer Older
1
# app imports
2
from parscript import cleanup, convertTime
3
4

# third party imports
5
import feedparser
6
7
8
9
from bs4 import BeautifulSoup
import requests

# DEV REMINDER: CHANGE THE LINES IN INTIALISATION ERROR MESSAGE (LINE 138)
10

11

12
13
14
def splitAndConvertTime(strin):
    strin = strin.replace(" ", "")
    strin = strin.split("-")
15
    returnlist = ["", ""]
16
    returnlist[1] = convertTime(strin[1])
17
18
    if not (strin[0][-2:] == "am" or strin[0][-2:] == "AM") \
            and not (strin[0][-2:] == "pm" or strin[0][-2:] == "PM"):
19
20
21
22
23
24
25
        if (strin[1][-2:] == "am"):
            returnlist[0] = convertTime(strin[0] + "am")
        else:
            returnlist[0] = convertTime(strin[0] + "pm")
    else:
        returnlist[0] = convertTime(strin[0])
    return returnlist
26

27

28
def load_getconn_data():
29
30
    feedtext = requests.get(
            "https://getconnected.gmu.edu/events/events.rss").text
31
32
    feedtext = cleanup(feedtext)

33
34
    # this calls the RSS feed parser from !feedparser
    feed = feedparser.parse(feedtext)
35
36
37
38

    dictlist = []

    for entry in feed.entries:
39
        error = []
40
41
42
43
44
45
46
47
        # this collects the attributes which are always there
        # also creates the Soup and sets up for the rest of the parsing
        try:
            uniqueid = entry.id[-7:]
            # print uniqueid

            title = entry.title
            # print title
48

49
50
            sumdetsoup = BeautifulSoup(entry.summary_detail["value"]
                                       .encode("utf-8"), "html.parser")
51

52
53
            location = [sumdetsoup.div.span.text]
            # print location
54

55
56
57
58
59
60
61
            description = sumdetsoup.find_all("div")[1].text
            # print description

            datetime = sumdetsoup.b.text
            # print datetime
        except Exception as e:
            error.append(str(e))
62
63

        # this handles events which start and end on the same day
64
        # if we've found an error, there's no point in continuing
65
66
67
68
69
70
71
72
        if (datetime.count("(") == 1) and (error == []):
            # parses out date info
            try:
                datesplit = datetime.split(", ")
                weekday = datesplit[0]
                temp = datesplit[1].split(" ")
                monthday = temp[1]
                month = temp[0]
73
                year = datesplit[2].split(" ")[0]
74
75
            except Exception as e:
                error.append(str(e))
76
77

            # uses helper function to get the start and end time
78
79
80
81
82
83
            try:
                parsedtimelist = splitAndConvertTime(datesplit[2][6:-1])
                timestart = parsedtimelist[0]
                timestop = parsedtimelist[1]
            except Exception as e:
                error.append(str(e))
84

85
86
87
88
89
90
91
92
93
94
95
96
97
            # appends the dictlist if no errors were found
            if error == []:
                dictlist.append({
                                "multiday": False, "id": uniqueid,
                                "title": title, "dayofweek": weekday,
                                "dayofmonth": monthday, "month": month,
                                "year": year, "timestart": timestart,
                                "timestop": timestop, "location": location,
                                "description": description
                                })
            else:
                dictlist.append({"id": uniqueid, "error": str(e),
                                 "errorlocation": ""})
98
        # this handles events which start on one day and end on another
99
        elif error == []:
100
            # getting the information for the start day/time
101
102
103
104
105
106
107
108
109
110
111
            try:
                datesplit = datetime.split(" - ")
                tempsplits = datesplit[0].split(", ")
                weekday = tempsplits[0]
                month = tempsplits[1].split(" ")[0]
                monthday = tempsplits[1].split(" ")[1]
                year = tempsplits[2].split(" ")[0]
                timestart = datesplit[0].split("(")[1][:-1]
                timestart = convertTime(timestart)
            except Exception as e:
                error.append(str(e))
112
113

            # getting the information for the end day/time
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
            try:
                tempsplits = datesplit[1].split(", ")
                endweekday = tempsplits[0]
                endmonth = tempsplits[1].split(" ")[0]
                endmonthday = tempsplits[1].split(" ")[1]
                endyear = tempsplits[2].split(" ")[0]
                timestop = datesplit[1].split("(")[1][:-1]
                timestop = convertTime(timestop)
            except Exception as e:
                error.append(str(e))

            # append the dictlist if no errors were found
            if error == []:
                dictlist.append({
                                "multiday": True, "id": uniqueid,
                                "title": title, "dayofweek": weekday,
                                "dayofmonth": monthday, "month": month,
                                "year": year, "timestart": timestart,
                                "timestop": timestop, "location": location,
                                "description": description,
                                "enddayofweek": endweekday,
                                "enddayofmonth": endmonthday,
                                "endmonth": endmonth, "endyear": endyear
                                })
            else:
                dictlist.append({"id": uniqueid, "error": str(e)})
        else:
141
142
            dictlist.append({"error": "issue in initialization of event.\
                            check lines 40-56 in getconnectedscript.py"})
143
    return dictlist
144

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Every event has:
# -------summary
# -------published_parsed
# -------links
# -------author
# -------summary
# -------guidislink
# -------title_detail
# -------link
# -------authors
# -------title
# -------author_detail
# -------id
# -------published
# Some events have:
# -------tags