getconnectedscript.py 5.75 KB
Newer Older
1
# app imports
2
from parscript import cleanup, convertTime
3 4

# third party imports
5
import feedparser
6 7 8 9
from bs4 import BeautifulSoup
import requests

# DEV REMINDER: CHANGE THE LINES IN INTIALISATION ERROR MESSAGE (LINE 138)
10

11

12 13 14
def splitAndConvertTime(strin):
    strin = strin.replace(" ", "")
    strin = strin.split("-")
15
    returnlist = ["", ""]
16
    returnlist[1] = convertTime(strin[1])
17 18
    if not (strin[0][-2:] == "am" or strin[0][-2:] == "AM") \
            and not (strin[0][-2:] == "pm" or strin[0][-2:] == "PM"):
19 20 21 22 23 24 25
        if (strin[1][-2:] == "am"):
            returnlist[0] = convertTime(strin[0] + "am")
        else:
            returnlist[0] = convertTime(strin[0] + "pm")
    else:
        returnlist[0] = convertTime(strin[0])
    return returnlist
26

27

28
def load_gc_data():
29 30
    feedtext = requests.get(
            "https://getconnected.gmu.edu/events/events.rss").text
31 32
    feedtext = cleanup(feedtext)

33 34
    # this calls the RSS feed parser from !feedparser
    feed = feedparser.parse(feedtext)
35 36 37 38

    dictlist = []

    for entry in feed.entries:
39
        error = []
40 41 42 43 44 45 46 47
        # this collects the attributes which are always there
        # also creates the Soup and sets up for the rest of the parsing
        try:
            uniqueid = entry.id[-7:]
            # print uniqueid

            title = entry.title
            # print title
48

49 50
            sumdetsoup = BeautifulSoup(entry.summary_detail["value"]
                                       .encode("utf-8"), "html.parser")
51

52 53
            location = [sumdetsoup.div.span.text]
            # print location
54

55 56 57 58 59 60 61
            description = sumdetsoup.find_all("div")[1].text
            # print description

            datetime = sumdetsoup.b.text
            # print datetime
        except Exception as e:
            error.append(str(e))
62 63

        # this handles events which start and end on the same day
64
        # if we've found an error, there's no point in continuing
65 66 67 68 69 70 71 72
        if (datetime.count("(") == 1) and (error == []):
            # parses out date info
            try:
                datesplit = datetime.split(", ")
                weekday = datesplit[0]
                temp = datesplit[1].split(" ")
                monthday = temp[1]
                month = temp[0]
73
                year = datesplit[2].split(" ")[0]
74 75
            except Exception as e:
                error.append(str(e))
76 77

            # uses helper function to get the start and end time
78 79 80 81 82 83
            try:
                parsedtimelist = splitAndConvertTime(datesplit[2][6:-1])
                timestart = parsedtimelist[0]
                timestop = parsedtimelist[1]
            except Exception as e:
                error.append(str(e))
84

85 86 87 88 89 90 91 92 93 94 95 96 97
            # appends the dictlist if no errors were found
            if error == []:
                dictlist.append({
                                "multiday": False, "id": uniqueid,
                                "title": title, "dayofweek": weekday,
                                "dayofmonth": monthday, "month": month,
                                "year": year, "timestart": timestart,
                                "timestop": timestop, "location": location,
                                "description": description
                                })
            else:
                dictlist.append({"id": uniqueid, "error": str(e),
                                 "errorlocation": ""})
98
        # this handles events which start on one day and end on another
99
        elif error == []:
100
            # getting the information for the start day/time
101 102 103 104 105 106 107 108 109 110 111
            try:
                datesplit = datetime.split(" - ")
                tempsplits = datesplit[0].split(", ")
                weekday = tempsplits[0]
                month = tempsplits[1].split(" ")[0]
                monthday = tempsplits[1].split(" ")[1]
                year = tempsplits[2].split(" ")[0]
                timestart = datesplit[0].split("(")[1][:-1]
                timestart = convertTime(timestart)
            except Exception as e:
                error.append(str(e))
112 113

            # getting the information for the end day/time
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
            try:
                tempsplits = datesplit[1].split(", ")
                endweekday = tempsplits[0]
                endmonth = tempsplits[1].split(" ")[0]
                endmonthday = tempsplits[1].split(" ")[1]
                endyear = tempsplits[2].split(" ")[0]
                timestop = datesplit[1].split("(")[1][:-1]
                timestop = convertTime(timestop)
            except Exception as e:
                error.append(str(e))

            # append the dictlist if no errors were found
            if error == []:
                dictlist.append({
                                "multiday": True, "id": uniqueid,
                                "title": title, "dayofweek": weekday,
                                "dayofmonth": monthday, "month": month,
                                "year": year, "timestart": timestart,
                                "timestop": timestop, "location": location,
                                "description": description,
                                "enddayofweek": endweekday,
                                "enddayofmonth": endmonthday,
                                "endmonth": endmonth, "endyear": endyear
                                })
            else:
                dictlist.append({"id": uniqueid, "error": str(e)})
        else:
141 142
            dictlist.append({"error": "issue in initialization of event.\
                            check lines 40-56 in getconnectedscript.py"})
143
    return dictlist
144

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
# Every event has:
# -------summary
# -------published_parsed
# -------links
# -------author
# -------summary
# -------guidislink
# -------title_detail
# -------link
# -------authors
# -------title
# -------author_detail
# -------id
# -------published
# Some events have:
# -------tags