getconnectedscript.py 3.67 KB
Newer Older
1
2
import requests
from bs4 import BeautifulSoup
Landon DeCoito's avatar
Landon DeCoito committed
3
from parscript import cleanup, doTheTime
4
5
6
7
8
9
10
11
import feedparser

#TODO: ADD "getconnected" ATTRIBUTE TO LOAD_DATA DICTLIST


#woah = cleanup(requests.get("https://getconnected.gmu.edu/events/events.rss").text)
#soup = BeautifulSoup(woah, "lxml")
#print soup.prettify
Landon DeCoito's avatar
Landon DeCoito committed
12
13
14
feedtext = requests.get("https://getconnected.gmu.edu/events/events.rss").text
feedtext = cleanup(feedtext)
feedtext = feedtext.replace("’", "'")
15

Landon DeCoito's avatar
Landon DeCoito committed
16
feed = feedparser.parse(feedtext)#this calls the RSS feed parser from !feedparser
17
18
19
20
21
22

#print feed, "\n\n\n"
#ctr = 0
dictlist = []

for entry in feed.entries:
Landon DeCoito's avatar
Landon DeCoito committed
23
	# templist = {}
24
	#print entry.summary_detail.value
Landon DeCoito's avatar
Landon DeCoito committed
25
	# templist["summary_detail"] = entry.summary_detail
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

	'''print "----------------------------------"
	print "1) ", entry.published_parsed, "\n"
	templist["published_parsed"] = entry.published_parsed

	print entry.links, "\n"
	templist["links"] = entry.links
	
	print "3) ", entry.author, "\n"
	templist["author"] = entry.author
	
	print entry.summary, "\n"
	templist["summary"] = entry.summary
	
	print "5) ", entry.guidislink, "\n"
	templist["guidislink"] = entry.guidislink
	
	print entry.title_detail, "\n"
	templist["title_detail"] = entry.title_detail
	
	print "6) ", entry.link, "\n"
	templist["link"] = entry.link
	
	print entry.authors, "\n"
	templist["authors"] = entry.authors
	
	print "7) ", entry.title, "\n"
	templist["title"] = entry.title
	
	print entry.author_detail, "\n"
	templist["author_detail"] = entry.author_detail
	
	print "9) ", entry.id, "\n"
	templist["id"] = entry.id
	
	print entry.published, "\n"
	templist["published"] = entry.published
	print"-----------------------------------"'''
	


Landon DeCoito's avatar
Landon DeCoito committed
67
68
69
	#print"==================================="
	uniqueid = entry.id[-7:]
	#print uniqueid
70
71

	title = entry.title
Landon DeCoito's avatar
Landon DeCoito committed
72
73
	#print title
	
74
	sumdetsoup = BeautifulSoup(entry.summary_detail["value"].encode("ascii", "replace"), "html.parser")
Landon DeCoito's avatar
Landon DeCoito committed
75
76
77
78
79
80
	
	location = [sumdetsoup.div.span.text]
	#print location

	description = sumdetsoup.find_all("div")[1].p.text
	#print description
81

Landon DeCoito's avatar
Landon DeCoito committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
	
	datetime = sumdetsoup.b.text
	#print datetime
	
	if (datetime.count("(") == 1):
		datesplit = datetime.split(", ")
		weekday = datesplit[0]
		month = datesplit[1].split(" ")
		monthday = month[1]
		month = month[0]
		year = datesplit[2][:5]
		parsedtimelist = doTheTime(datesplit[2][6:-1])
		timestart = parsedtimelist[0]
		timestop = parsedtimelist[1]
		dictlist.append({"id":uniqueid, "title":title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})		

print dictlist

	
	#print "\n\n", sumdetsoup.prettify()
	#print"==================================="
103

Landon DeCoito's avatar
Landon DeCoito committed
104
#dictlist.append({"id":uniqueid, "title":entry_title, "dayofweek":weekday, "dayofmonth":monthday, "month":month, "year":year, "timestart":timestart, "timestop":timestop, "location":location, "description":description})
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#This was intended to figure out what objects are in each entry and what appears only sometimes
#The results are:
####Every event has:
#-------summary
#-------published_parsed
#-------links
#-------author
#-------summary
#-------guidislink
#-------title_detail
#-------link
#-------authors
#-------title
#-------author_detail
#-------id
#-------published
####Some events have:
#-------tags


'''for key in feed.entries[0].keys():
	everyone.append(key)
some = []

for entry in feed.entries:
	#print "----------------------------------"
	for key in entry.keys():
		if not key in everyone:
			some.append(key)
		for key in everyone:
			if not (key in entry.keys()):
				everyone.remove(key)
				some.append(key)
	#print"-----------------------------------"
	#ctr += 1
print "Everyone: \n", everyone
print "Some: \n", some'''