Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SRCT
whats-open-web
Commits
c0bd342a
Commit
c0bd342a
authored
Feb 03, 2017
by
mdsecurity
Browse files
tidying a few thing up
adding results.json and scraper.js to the gitignore cleaning up some of the scraper code
parent
efd9df1d
Changes
5
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
c0bd342a
...
...
@@ -38,3 +38,7 @@ testem.log
#System Files
.DS_Store
Thumbs.db
#random stuff
scraper.js
results.json
\ No newline at end of file
place-example.json
View file @
c0bd342a
{
"name"
:
"Burger King"
,
"location"
:
"The Johnson Center"
,
"date_
chang
ed"
:
"2015-08-31T16:52:09.600Z"
,
"date_
modifi
ed"
:
"2015-08-31T16:52:09.600Z"
,
"phone_number"
:
"703-121-3212"
,
"tap_n_go_link"
:
"https://www.tapingo.com/order/restaurant/starbucks-gmu/"
,
"tags"
:
[
"vegan"
,
"mexican"
],
...
...
results.json
View file @
c0bd342a
[
{
"title"
:
"Wing Zone"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, The Hub"
,
"operation_hours"
:
[
{
...
...
@@ -98,6 +99,7 @@
},
{
"title"
:
"Panera Bread"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -195,6 +197,7 @@
},
{
"title"
:
"Panda Express"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Merten Hall"
,
"operation_hours"
:
[
{
...
...
@@ -292,6 +295,7 @@
},
{
"title"
:
"Manhattan Pizza"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Merten Hall"
,
"operation_hours"
:
[
{
...
...
@@ -389,6 +393,7 @@
},
{
"title"
:
"Argo Tea"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Fenwick Library"
,
"operation_hours"
:
[
{
...
...
@@ -486,6 +491,7 @@
},
{
"title"
:
"Taco Bell"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, SUB I"
,
"operation_hours"
:
[
{
...
...
@@ -571,6 +577,7 @@
},
{
"title"
:
"Subway"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Rogers Hall"
,
"operation_hours"
:
[
{
...
...
@@ -668,6 +675,7 @@
},
{
"title"
:
"Star Ginger"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -765,6 +773,7 @@
},
{
"title"
:
"Starbucks"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Northern Neck"
,
"operation_hours"
:
[
{
...
...
@@ -820,6 +829,7 @@
},
{
"title"
:
"Starbucks"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -917,6 +927,7 @@
},
{
"title"
:
"Southside"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Southside"
,
"operation_hours"
:
[
{
...
...
@@ -1014,6 +1025,7 @@
},
{
"title"
:
"Simply To Go"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, SUB I"
,
"operation_hours"
:
[
{
...
...
@@ -1099,6 +1111,7 @@
},
{
"title"
:
"Second Stop Patriot Shop"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Rogers Hall"
,
"operation_hours"
:
[
{
...
...
@@ -1196,6 +1209,7 @@
},
{
"title"
:
"Red Hot & Blue"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -1293,6 +1307,7 @@
},
{
"title"
:
"Rathskeller"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, SUB I"
,
"operation_hours"
:
[
{
...
...
@@ -1378,6 +1393,7 @@
},
{
"title"
:
"Randall's Cafe"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"SciTech (Prince William Campus)"
,
"operation_hours"
:
[
{
...
...
@@ -1463,6 +1479,7 @@
},
{
"title"
:
"Peet's Coffee"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Innovation Hall"
,
"operation_hours"
:
[
{
...
...
@@ -1542,6 +1559,7 @@
},
{
"title"
:
"Peet's Coffee"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Exploratory Hall"
,
"operation_hours"
:
[
{
...
...
@@ -1627,6 +1645,7 @@
},
{
"title"
:
"One Stop Patriot Shop"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Blue Ridge"
,
"operation_hours"
:
[
{
...
...
@@ -1724,6 +1743,7 @@
},
{
"title"
:
"Jorge's"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -1809,6 +1829,7 @@
},
{
"title"
:
"IndAroma"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -1906,6 +1927,7 @@
},
{
"title"
:
"Ike's"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, President's Park"
,
"operation_hours"
:
[
{
...
...
@@ -1961,6 +1983,7 @@
},
{
"title"
:
"The Globe"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, The Global Center"
,
"operation_hours"
:
[
{
...
...
@@ -2058,6 +2081,7 @@
},
{
"title"
:
"Freshens"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, The RAC"
,
"operation_hours"
:
[
{
...
...
@@ -2143,6 +2167,7 @@
},
{
"title"
:
"Freshens"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -2228,6 +2253,7 @@
},
{
"title"
:
"Express"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -2325,6 +2351,7 @@
},
{
"title"
:
"Einstein Bros. Bagels"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Merten Hall"
,
"operation_hours"
:
[
{
...
...
@@ -2410,6 +2437,7 @@
},
{
"title"
:
"Einstein Bros. Bagels"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Engineering Building"
,
"operation_hours"
:
[
{
...
...
@@ -2495,6 +2523,7 @@
},
{
"title"
:
"Chick Fil A"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, SUB I"
,
"operation_hours"
:
[
{
...
...
@@ -2580,6 +2609,7 @@
},
{
"title"
:
"Burger King"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Fairfax Campus, Johnson Center"
,
"operation_hours"
:
[
{
...
...
@@ -2677,6 +2707,7 @@
},
{
"title"
:
"Au Bon Pain"
,
"date_modified"
:
"2017-02-04T00:38:21.126Z"
,
"location"
:
"Arlington Campus"
,
"operation_hours"
:
[
{
...
...
scraper.js
View file @
c0bd342a
...
...
@@ -14,14 +14,16 @@ var x = Xray({
parse_op_hrs
:
function
(
value
)
{
var
operation_hours
=
[];
XRegExp
.
forEach
(
value
,
time
,
function
(
match
)
{
var
op_hours_obj
=
{
"
special_status
"
:
""
,
"
is_closed
"
:
false
,
"
open_all_day
"
:
false
,
"
open_time
"
:
false
,
"
close_time
"
:
false
};
if
(
match
.
closed
!==
undefined
)
{
operation_hours
.
push
({
"
special_status
"
:
""
,
"
is_closed
"
:
true
,
"
open_all_day
"
:
false
,
"
open_time
"
:
false
,
"
close_time
"
:
false
});
op_hours_obj
.
is_closed
=
true
;
operation_hours
.
push
(
op_hours_obj
);
}
else
if
(
match
.
all_day
!==
undefined
)
{
operation_hours
.
push
({
...
...
@@ -33,16 +35,12 @@ var x = Xray({
});
}
else
{
(
match
.
open_ampm
===
'
pm
'
)
?
match
.
open_hour
=
(
Number
(
match
.
open_hour
)
+
12
)
:
match
.
open_hour
=
Number
(
match
.
open_hour
);
(
match
.
close_ampm
===
'
pm
'
)
?
match
.
close_hour
=
(
Number
(
match
.
close_hour
)
+
12
)
:
match
.
close_hour
=
Number
(
match
.
close_hour
);
operation_hours
.
push
({
"
special_status
"
:
""
,
"
is_closed
"
:
false
,
"
open_all_day
"
:
false
,
"
open_time
"
:
[
match
.
open_hour
,
Number
(
match
.
open_minute
)
]
,
"
close_time
"
:
[
match
.
close_hour
,
Number
(
match
.
close_minute
)
]
"
open_time
"
:
to_24_hour_arr
(
Number
(
match
.
open_hour
)
,
Number
(
match
.
open_minute
)
,
match
.
open_ampm
)
,
"
close_time
"
:
to_24_hour_arr
(
Number
(
match
.
close_hour
)
,
Number
(
match
.
close_minute
)
,
match
.
close_ampm
)
});
}
});
...
...
@@ -63,6 +61,7 @@ x('http://dining.gmu.edu/dining-choices/hours-of-operation/', {
obj
.
title
[
i
]
=
obj
.
title
[
i
].
replace
(
obj
.
location
[
i
],
''
);
var
entry
=
{
title
:
obj
.
title
[
i
].
trim
(),
date_modified
:
new
Date
(),
location
:
obj
.
location
[
i
].
trim
(),
operation_hours
:
obj
.
operation_hours
[
i
]
};
...
...
@@ -74,3 +73,8 @@ x('http://dining.gmu.edu/dining-choices/hours-of-operation/', {
}
});
});
var
to_24_hour_arr
=
function
(
hour
,
minute
,
ampm
)
{
var
new_hour
;
(
ampm
===
'
pm
'
)
?
new_hour
=
hour
+
12
:
new_hour
=
hour
;
return
[
new_hour
,
minute
];
};
scraper.ts
View file @
c0bd342a
...
...
@@ -15,37 +15,28 @@ let x = Xray({
parse_op_hrs
:
function
(
value
)
{
let
operation_hours
=
[];
XRegExp
.
forEach
(
value
,
time
,
(
match
)
=>
{
let
op_hours_obj
=
{
"
special_status
"
:
""
,
"
is_closed
"
:
false
,
"
open_all_day
"
:
false
,
"
open_time
"
:
false
,
"
close_time
"
:
false
}
if
(
match
.
closed
!==
undefined
)
{
operation_hours
.
push
({
"
special_status
"
:
""
,
"
is_closed
"
:
true
,
"
open_all_day
"
:
false
,
"
open_time
"
:
false
,
"
close_time
"
:
false
});
op_hours_obj
.
is_closed
=
true
;
}
else
if
(
match
.
all_day
!==
undefined
)
{
operation_hours
.
push
({
"
special_status
"
:
""
,
"
is_closed
"
:
false
,
"
open_all_day
"
:
true
,
"
open_time
"
:
false
,
"
close_time
"
:
false
});
}
else
{
(
match
.
open_ampm
===
'
pm
'
)
?
match
.
open_hour
=
(
Number
(
match
.
open_hour
)
+
12
)
:
match
.
open_hour
=
Number
(
match
.
open_hour
);
(
match
.
close_ampm
===
'
pm
'
)
?
match
.
close_hour
=
(
Number
(
match
.
close_hour
)
+
12
)
:
match
.
close_hour
=
Number
(
match
.
close_hour
);
op_hours_obj
.
open_all_day
=
true
;
}
else
{
operation_hours
.
push
({
"
special_status
"
:
""
,
"
is_closed
"
:
false
,
"
open_all_day
"
:
false
,
"
open_time
"
:
[
match
.
open_hour
,
Number
(
match
.
open_minute
)],
"
close_time
"
:
[
match
.
close_hour
,
Number
(
match
.
close_minute
)]
});
op_hours_obj
.
open_time
=
to_24_hour_arr
(
Number
(
match
.
open_hour
),
Number
(
match
.
open_minute
),
match
.
open_ampm
);
op_hours_obj
.
close_time
=
to_24_hour_arr
(
Number
(
match
.
close_hour
),
Number
(
match
.
close_minute
),
match
.
close_ampm
);
}
operation_hours
.
push
(
op_hours_obj
);
});
return
operation_hours
;
}
...
...
@@ -63,15 +54,21 @@ x('http://dining.gmu.edu/dining-choices/hours-of-operation/', {
for
(
let
i
=
0
;
i
<
obj
.
title
.
length
;
i
++
)
{
// this next line fixes the problem with the .storename tag putting the title and location in
// the same string
obj
.
title
[
i
]
=
obj
.
title
[
i
].
replace
(
obj
.
location
[
i
],
''
);
obj
.
title
[
i
]
=
obj
.
title
[
i
].
replace
(
obj
.
location
[
i
],
''
);
let
entry
=
{
title
:
obj
.
title
[
i
].
trim
(),
date_modified
:
new
Date
(),
location
:
obj
.
location
[
i
].
trim
(),
operation_hours
:
obj
.
operation_hours
[
i
]
}
newObj
.
unshift
(
entry
);
}
fs
.
writeFile
(
'
results.json
'
,
JSON
.
stringify
(
newObj
,
null
,
"
\t
"
),
function
(
err
)
{
if
(
err
)
{
throw
err
;}
if
(
err
)
{
throw
err
;
}
});
})
let
to_24_hour_arr
=
function
(
hour
,
minute
,
ampm
)
{
let
new_hour
;
(
ampm
===
'
pm
'
)
?
new_hour
=
hour
+
12
:
new_hour
=
hour
;
return
[
new_hour
,
minute
];
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment