peoplefinder.py 1.91 KB
Newer Older
Akshay Karthik's avatar
Akshay Karthik committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import re
import requests
from bs4 import BeautifulSoup

BASE_URL = "http://peoplefinder.gmu.edu/index.php"
MODE = {
    "standard": "mode=standard",
    "advanced": "mode=advanced"
}


def call_standard(search, group="all", page=1):
    url = "{base}?search={search}&group={group}&people=100".format(
            base=BASE_URL,
            search=search,
            group=group)
    
    url += "&page={page}".format(page=page)
    current_page = requests.get(url)
    soup = BeautifulSoup(current_page.content)

    people = []
    container_divs = soup.findAll('div', {'class': 'person'})
    people_divs = map(lambda x: list(x.children), container_divs) 
    for person in people_divs:
        current_result = {'type': None}
        for attribute in person:
            if attribute.name == 'h3':
                current_result['name'] = attribute.text
            elif attribute.name == 'p' and current_result['type'] is None:
                content = attribute.text
                re_match = re.match("Major: (.*)", content) 
                if re_match:
                    current_result['type'] = "Student"
                    current_result['major'] = re_match.group(1)
                else:
                    current_result['type'] = "Faculty"
                    current_result['title'] = content
            elif attribute.name == 'p':
                if attribute.acronym:
                    current_result['address'] = attribute.text
                elif attribute.span:
                    phone = attribute.find('span', {'class': 'phone'})
                    current_result['phone'] = phone.text if phone else None
                    
                    fax = attribute.find('span', {'class': 'fax'})
                    current_result['fax'] = fax.text if fax else None 

        people.append(current_result)
    
    return {
        'results': people,
        'hasNextPage': bool(soup.find('li', {'class': 'next'}))
    }