peoplefinder.py 1.91 KB
Newer Older
Akshay Karthik's avatar
Akshay Karthik committed
1
2
3
4
5
6
7
8
9
10
11
12
import re
import requests
from bs4 import BeautifulSoup

BASE_URL = "http://peoplefinder.gmu.edu/index.php"
MODE = {
    "standard": "mode=standard",
    "advanced": "mode=advanced"
}


def call_standard(search, group="all", page=1):
13
    url = "{base}?search={search}&group={group}&people=100&page={page}".format(
Akshay Karthik's avatar
Akshay Karthik committed
14
15
            base=BASE_URL,
            search=search,
16
17
18
            group=group,
            page=page)

Akshay Karthik's avatar
Akshay Karthik committed
19
20
21
22
23
24
25
    current_page = requests.get(url)
    soup = BeautifulSoup(current_page.content)

    people = []
    container_divs = soup.findAll('div', {'class': 'person'})
    people_divs = map(lambda x: list(x.children), container_divs) 
    for person in people_divs:
Akshay Karthik's avatar
Akshay Karthik committed
26
        current_result = {"raw":[]}
Akshay Karthik's avatar
Akshay Karthik committed
27
        for attribute in person:
Akshay Karthik's avatar
Akshay Karthik committed
28
            current_result['raw'].append(str(attribute))
Akshay Karthik's avatar
Akshay Karthik committed
29
30
31
            if attribute.name == 'h3':
                current_result['name'] = attribute.text
            elif attribute.name == 'p':
Akshay Karthik's avatar
Akshay Karthik committed
32
33
34
35
36
37
38
39
                content = attribute.text
                major_match = re.match("Major: (.*)", content)
                if major_match:
                    current_result['major'] = major_match.group(1)

                email_match = re.match("Email: (.*)", content)
                if email_match:
                    current_result['email'] = email_match.group(1)
40

Akshay Karthik's avatar
Akshay Karthik committed
41
42
                if attribute.acronym:
                    current_result['address'] = attribute.text
43

Akshay Karthik's avatar
Akshay Karthik committed
44
45
46
47
                elif attribute.span:
                    phone = attribute.find('span', {'class': 'phone'})
                    current_result['phone'] = phone.text if phone else None
                    fax = attribute.find('span', {'class': 'fax'})
48
                    current_result['fax'] = fax.text[5:] if fax else None 
Akshay Karthik's avatar
Akshay Karthik committed
49
50
        # comment the line below to debug
        del current_result["raw"]
Akshay Karthik's avatar
Akshay Karthik committed
51
        people.append(current_result)
52

Akshay Karthik's avatar
Akshay Karthik committed
53
54
55
56
    return {
        'results': people,
        'hasNextPage': bool(soup.find('li', {'class': 'next'}))
    }
57