...
 
Commits (2)
# django imports
from django.core.management.base import BaseCommand, CommandError
from api import parscript
class Command(BaseCommand):
help = 'Updates all 25live data stored'
help = 'Updates the database from all our sources'
def handle(self, *args, **options):
parscript.load_data()
\ No newline at end of file
from django.db import models
from bs4 import BeautifulSoup
import requests
# python imports
from datetime import datetime
import pytz
# Create your models here.
#currently we are storing a list of locations as a big string, this ideally would be a seperate table
#connected to our event class with a foreign key.
class Event(models.Model):
uniqueid = models.IntegerField(primary_key=True)
title = models.CharField(max_length=120) # max 25Live title length
locations = models.TextField(default="Location Not Provided")
description = models.TextField(default="Description Not Provided")
start_time = models.DateTimeField(default=(datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC)))
end_time = models.DateTimeField(default=(datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC)))
#start_time = models.DateTimeField(default=(datetime(1949, 10, 1, 0, 0, 0)))
#end_time = models.DateTimeField(default=(datetime(1949, 10, 1, 0, 0, 0)))
def make_dict(event):
return {
'uniqueid': event.uniqueid,
'title': event.title,
'locations': event.locations,
'description': event.description,
'start_time': event.start_time.isoformat(),
'end_time': event.end_time.isoformat(),
'day_of_week': event.start_time.weekday(),
}
#doesn't insert event if unique id is already in database
def insert_event(self):
if(Event.objects.filter(uniqueid=self.uniqueid).exists()):
if( str(Event.objects.get(uniqueid=self.uniqueid)) == str(self)):
return("Event is Already in Database")
else:
#print(str(self))
#print(str(Event.objects.get(uniqueid=self.uniqueid)))
self.save()
return("Event is beign updated") #you simply adopted the spahget, I was born in it
else:
self.save()
return("Event is Beging Saved")
#give ban
def give_ban(self):
return u'༼ つ ◕_ ◕ ༽つ GIVE BAN ༼ つ ◕_ ◕ ༽つ'
# django imports
from django.db import models
# third party imports
from bs4 import BeautifulSoup
import requests
def __str__(self):
return str(self.uniqueid) + ',' + \
self.title + ', ' + \
self.locations + ', ' + \
self.description + ', ' + \
self.start_time.isoformat() + ', ' + \
self.end_time.isoformat()
# currently we are storing a list of locations as a big string,
# this ideally would be a seperate table
# connected to our event class with a foreign key.
class Event(models.Model):
uniqueid = models.IntegerField(primary_key=True)
title = models.CharField(max_length=120) # max 25Live title length
locations = models.TextField(default="Location Not Provided")
description = models.TextField(default="Description Not Provided")
start_time = models.DateTimeField(
default=(datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC))
)
end_time = models.DateTimeField(
default=(datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC))
)
def make_dict(event):
return {
'uniqueid': event.uniqueid,
'title': event.title,
'locations': event.locations,
'description': event.description,
'start_time': event.start_time.isoformat(),
'end_time': event.end_time.isoformat(),
'day_of_week': event.start_time.weekday(),
}
# doesn't insert event if unique id is already in database
def insert_event(self):
if(Event.objects.filter(uniqueid=self.uniqueid).exists()):
if(str(Event.objects.get(uniqueid=self.uniqueid)) == str(self)):
return("Event is Already in Database")
else:
self.save()
return("Event is beign updated")
# you simply adopted the spahget, I was born in it
else:
self.save()
return("Event is Beging Saved")
# give ban
def give_ban(self):
return u'༼ つ ◕_ ◕ ༽つ GIVE BAN ༼ つ ◕_ ◕ ༽つ'
def __str__(self):
return str(self.uniqueid) + ',' + \
self.title + ', ' + \
self.locations + ', ' + \
self.description + ', ' + \
self.start_time.isoformat() + ', ' + \
self.end_time.isoformat()
from bs4 import BeautifulSoup
# python imports
from datetime import datetime, timedelta
import pytz
import requests
# django imports
from api.models import Event
error = []
#timezone = pytz.timezone('EST')
# third part imports
import requests
from bs4 import BeautifulSoup
# Parses the XML from Mason and mines 2 BTC. thanks zac wood
def load_data():
print('hello there')
# Parses the XML from Mason and mines 2 BTC. thanks zac wood
# creates a list of all the entry tags from the xml
soup = BeautifulSoup(requests.get("https://25livepub.collegenet.com/calendars/events_all.xml?html=0").text, "lxml")
soup = BeautifulSoup(
requests.get(
"https://25livepub.collegenet.com/calendars/events_all.xml?html=0"
).text, "lxml"
)
entries = soup.findAll('entry')
# indexs an entry in the list of entries
#iterates through soup and parses each entry
# iterates through soup and parses each entry
for entry in entries:
data = entry_parser(entry)
my_event = Event(data[0], data[1], data[2], data[3], data[4], data[5])
my_event = Event(
data[0], # uniqueid
data[1], # title
data[2], # location
data[3], # description
data[4], # start_time
data[5], # end_time
)
print(my_event.insert_event())
def entry_parser(entry):
error = []
# get event ID
try:
uniqueId = int(entry.id.text[-9:])
......@@ -32,71 +47,60 @@ def entry_parser(entry):
error.append("Error with getting ID")
# get event Title
try:
try:
title = entry.title.text
except Exception:
error.append("Error with getting Title")
# get published Datetime
try:
try:
publishedDatetime = entry.published.text
except Exception:
except Exception:
error.append("Error with getting PublishedDatetime")
# get updated Datetime
try:
try:
updatedDatetime = entry.updated.text
except Exception:
except Exception:
error.append("Error with getting UpdatedDatetime")
# get content
try:
try:
content = entry.content.text
except Exception:
except Exception:
error.append("Error with getting get content")
location = str(content.splitlines()[0])
description = "\n".join(content.split("\n")[3:])
#description = "damnit"
start_datetime, end_datetime = scrape_date_time(content.splitlines()[1])
description = "\n".join(content.split("\n")[3:])
'''
print(
"--------------------------------------------------\n" + \
"id : " + uniqueId + "\n" + \
"title : " + title + "\n" + \
"location : " + location + "\n" + \
"description : " + description + "\n" + \
"start time : " + start_datetime.isoformat() + '\n' + \
"end time : " + end_datetime.isoformat() + '\n' + \
"---------------------------------------------------"
)
'''
start_datetime, end_datetime = scrape_date_time(content.splitlines()[1])
parsed_data = [uniqueId, title, location, description, start_datetime, end_datetime]
parsed_data = [
uniqueId,
title,
location,
description,
start_datetime,
end_datetime,
]
return parsed_data
# takes in a string fitting a time format and returns a tuple
# which contains the start_ and end_time as datetime objects
def scrape_date_time(content_time):
#print(content_time)
start_time = ''
end_time = ''
try:
event_time = content_time.split(', ')[3].split(' - ')
event_time = content_time.split(', ')[3].split(' - ')
start_time = event_time[0]
end_time = event_time[1]
if( ('pm' in end_time) and ('am' not in start_time)):
if('pm' in end_time and 'am' not in start_time):
start_time += 'pm'
elif('am' in end_time and 'am' not in start_time):
start_time += 'am'
......@@ -109,32 +113,41 @@ def scrape_date_time(content_time):
string_to_parse_start = content_time + "," + start_time
parsing_format = '%A, %B %d, %Y,%I:%M%p'
end_datetime = datetime.strptime(string_to_parse_end, parsing_format)
start_datetime = datetime.strptime(string_to_parse_start, parsing_format)
end_datetime_aware = end_datetime.replace(tzinfo=pytz.timezone("EST"))
start_datetime_aware = start_datetime.replace(tzinfo=pytz.timezone("EST"))
return (start_datetime_aware.astimezone(pytz.UTC), end_datetime_aware.astimezone(pytz.UTC))
end_datetime = datetime.strptime(
string_to_parse_end,
parsing_format,
)
start_datetime = datetime.strptime(
string_to_parse_start,
parsing_format,
)
end_datetime_aware = end_datetime.replace(
tzinfo=pytz.timezone("EST")
)
start_datetime_aware = start_datetime.replace(
tzinfo=pytz.timezone("EST")
)
return (
start_datetime_aware.astimezone(pytz.UTC),
end_datetime_aware.astimezone(pytz.UTC),
)
except IndexError:
return (datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC), datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC)) #TODO change so it returns default datetime
#return(datetime(1949, 10, 1, 0, 0, 0), datetime(1949, 10, 1, 0, 0, 0))
return (
datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC),
datetime(1949, 10, 1, 0, 0, 0, tzinfo=pytz.UTC),
) # TODO change so it returns default datetime
def fix_string(the_time):
if(':' not in the_time):
injPt= the_time.find('a') if 'a' in the_time else the_time.find('p')
injPt = the_time.find('a') if 'a' in the_time else the_time.find('p')
the_time = the_time[:injPt] + ":00" + the_time[injPt:]
if(the_time[-1] == " "):
the_time = the_time[:-1]
return the_time
return the_time
# everything in the house is fuzzy, stupid dogs were acting like pollinators, if that's how you even spell it
\ No newline at end of file
# everything in the house is fuzzy, stupid dogs were acting like pollinators, if that's how you even spell it