Commit bb610e31 authored by Landon DeCoito's avatar Landon DeCoito

completed 'cleanup' function for squashing The Character and slightly revised README

parent 3bb91ef1
......@@ -3,3 +3,5 @@ Mason Today Web is a shitty version of the Mason Today project. It will be the c
Please refer to the requirements.txt for information on what packages to install to properly run the program.
We make soup. A lot of it.
This is currently licensed under the "wut" license. Plznosteal
print "and we begin"
from bs4 import BeautifulSoup
import requests
class Event:
def __init__(self, entryTag): #where var entrySoup is a specific event tag
def __init__(self, entryTag): #where var entryTag is a specific event tag
self.__name = entryTag.find('title').string
self.__description = entryTag.find('content').string
self.__time = "timeplaceholder"
......@@ -11,35 +14,31 @@ class Event:
def __str__(self):
return self.__name + ": " + self.__description + "\n\n"
def cleanup(string):
ctr = 0
while ctr < len(string):
def cleanup(str):
str = str.replace("&amp;", "&")
str = str.replace("&nbsp;", " ")
str = str.replace("&ndash;", "-")
str = str.replace("&lt;", "<")
str = str.replace("&gt;", ">")
str = str.replace("<br/>", "\n")
return str
from bs4 import BeautifulSoup
import requests
f = requests.get("http://25livepub.collegenet.com/calendars/events_all.xml") #grabs the xml from 25live
#f = open("events.xml", "r") #Opens a local document. events.xml is a shortened version of the larger events doc
soup = BeautifulSoup(f.text, "lxml") #creates soup of the xml
xmldoc = requests.get("http://25livepub.collegenet.com/calendars/events_all.xml") #grabs the xml from 25live
#xmldoc = open("events.xml", "r") #Opens a local document. events.xml is a shortened version of the larger events doc
entries = soup.find_all('entry') #creates a list of all the entry tags from the xml
print type(entries[0]), "\n" #prints the first entry
print entries[0].prettify(), "\n" #prints the first entry out
print entries[0].find('content').prettify(), "\n" #prints the first content tag in the first entry
xmldoc = cleanup(xmldoc.text)
print xmldoc
soup = BeautifulSoup(xmldoc, "lxml") #creates soup of the xml
print soup.prettify(), "\n\n"
titles = []
for item in entries:
titles.append(item.find('title').string)
#this iterates through the entries and puts each event's title into a list
print titles, "\n"
entries = soup.find_all('entry') #creates a list of all the entry tags from the xml
#print type(entries[0]), "\n" #prints the first entry
#print entries[0].prettify(), "\n" #prints the first entry out
#print entries[0].find('content').prettify(), "\n" #prints the first content tag in the first entry
events = []
for item in entries:
events.append(Event(item))
print events
#for item in entries:
# print item.find('content').string, "\n\n"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment