Commit 40be2b50 authored by mdsecurity's avatar mdsecurity
Browse files

updated the data_structure added regex

added ew values to the place-example.json
added xregexp to the scraper
parent 10c5c55c
......@@ -45,6 +45,7 @@
"ts-node": "1.2.1",
"tslint": "^4.3.0",
"typescript": "~2.0.3",
"x-ray": "^2.3.1"
"x-ray": "^2.3.1",
"xregexp": "^3.1.1"
}
}
{
"name": "Burger King",
"location": "The Johnson Center",
"date_changed": "2015-08-31T16:52:09.600Z",
"phone_number":"703-121-3212",
"tap_n_go_link":"https://www.tapingo.com/order/restaurant/starbucks-gmu/",
"tags":["vegan","mexican"],
"address": "4335 Chesapeake River Way Fairfax, Virginia",
"latitude":"38.8340368",
"longitude":"-77.3098702",
"operation_hours":[
{
"special_status":false,
"is_closed":false,
"open_all_day":false,
"open_time":[9,40],
"close_time":[14,30]
},
{
"special_status":"Closed for Construction",
"is_closed":true,
"open_all_day":false,
"open_time":false,
"close_time":false
},
{
"special_status":false,
"is_closed":false,
"open_all_day":true,
"open_time":false,
"close_time":false
},
{
"special_status":false,
"is_closed":false,
"open_all_day":false,
"open_time":[9,40],
"close_time":[14,30]
},
{
"special_status":false,
"is_closed":false,
"open_all_day":false,
"open_time":[9,40],
"close_time":[14,30]
},
{
"special_status":false,
"is_closed":false,
"open_all_day":false,
"open_time":[9,40],
"close_time":[14,30]
},
{
"special_status":false,
"is_closed":false,
"open_all_day":false,
"open_time":[9,40],
"close_time":[14,30]
}
]
}
\ No newline at end of file
This diff is collapsed.
var Xray = require('x-ray');
var fs = require('fs');
var XRegExp = require('xregexp');
var time = XRegExp('((?<closed> closed) | (?<all_day> 24(\\s?)*hours) | (?<open_hour> [01][0-9] ) : \n\
(?<open_minute> [0-9][0-9] ) (\\s?)* \n\
(?<open_ampm> [ap]m ) \n\
(\\s?)*\\W(\\s?)* \n\
(?<close_hour> [01][0-9] ) : \n\
(?<close_minute> [0-9][0-9] ) (\\s?)* \n\
(?<close_ampm> [ap]m )) ', 'ix');
var thing = {
"is_closed": false,
"open_all_day": false,
"open_time": [9, 40],
"close_time": [14, 30]
};
var x = Xray({
filters: {
trim: function (value) {
return value.trim()
return value.trim();
},
reverse: function (value) {
return typeof value === 'string' ? value.split('').reverse().join('') : value
return typeof value === 'string' ? value.split('').reverse().join('') : value;
},
slice: function (value, start, end) {
return typeof value === 'string' ? value.slice(start, end) : value
return typeof value === 'string' ? value.slice(start, end) : value;
},
parse_op_hrs: function (value) {
value = value.replace('Hours of Operation', '');
value = value.replace('Monday:', '');
value = value.replace('Tuesday:', ',');
value = value.replace('Wednesday:', ',');
value = value.replace('Thursday:', ',')
value = value.replace('Friday:', ',')
value = value.replace('Saturday:', ',')
value = value.replace('Sunday:', ',')
value = value.split(' ').join('');
value = value.split(',');
for(let i = 0; i < value.length;i++){
value[i] = value[i].replace('a',',a')
value[i] = value[i].replace('p',',p')
value[i] = value[i].replace(new RegExp(':','g'),',')
value[i] = value[i].split('-');
// value[i][0] = value[i][0].split(',');
console.log(value[i][1]);
value[i][1] = value[i][1].split(',');
}
return value;
var operation_hours = [];
XRegExp.forEach(value, time, function (match) {
if (match.closed === undefined) {
operation_hours.unshift();
}
else if (match.all_day === undefined) {
}
else {
}
});
return operation_hours;
}
}
});
......@@ -44,30 +44,21 @@ x('http://dining.gmu.edu/dining-choices/hours-of-operation/', {
title: ['.storename | trim'],
location: ['.location | trim'],
operationHours: ['.open-closed-sign | parse_op_hrs']
// operation:''
})
// this fixes the json so each element in the array has a title location and oparation hours
(function (err, obj) {
let newObj = [];
for (let i = 0; i < obj.title.length; i++) {
let entry = {
})(function (err, obj) {
var newObj = [];
for (var i = 0; i < obj.title.length; i++) {
var entry = {
title: obj.title[i],
location: obj.location[i],
operationHours: obj.operationHours[i]
}
operation_hours: obj.operation_hours[i]
};
newObj.unshift(entry);
}
fs.writeFile("results.json", JSON.stringify(newObj, null, "\t"), function (err) {
if (err) throw err;
if (err)
throw err;
});
})
});
// let to_Date_Ob = function(time){
// if(time[2] === 'pm'){
// time[0]+= 12;
......@@ -79,3 +70,28 @@ x('http://dining.gmu.edu/dining-choices/hours-of-operation/', {
// date.setUTCMilliseconds(0);
// return date;
// }
/*
value = value.replace('Hours of Operation', '');
value = value.replace('Monday:', '');
value = value.replace('Tuesday:', ',');
value = value.replace('Wednesday:', ',');
value = value.replace('Thursday:', ',')
value = value.replace('Friday:', ',')
value = value.replace('Saturday:', ',')
value = value.replace('Sunday:', ',')
value = value.split(' ').join('');
value = value.split(',');
for(let i = 0; i < value.length;i++){
value[i] = value[i].replace('a',',a')
value[i] = value[i].replace('p',',p')
value[i] = value[i].replace(new RegExp(':','g'),',')
value[i] = value[i].split('-');
// value[i][0] = value[i][0].split(',');
console.log(value[i][1]);
value[i][1] = value[i][1].split(',');
}
*/
let Xray = require('x-ray');
let fs = require('fs');
let XRegExp = require('xregexp');
let time = XRegExp('((?<closed> closed) | (?<all_day> 24(\\s?)*hours) | (?<open_hour> [01][0-9] ) : \n\
(?<open_minute> [0-9][0-9] ) (\\s?)* \n\
(?<open_ampm> [ap]m ) \n\
(\\s?)*\\W(\\s?)* \n\
(?<close_hour> [01][0-9] ) : \n\
(?<close_minute> [0-9][0-9] ) (\\s?)* \n\
(?<close_ampm> [ap]m )) ','ix');
let thing = {
"is_closed":false,
"open_all_day":false,
"open_time":[9,40],
"close_time":[14,30]
}
let x = Xray({
filters: {
trim: function (value) {
return value.trim()
},
reverse: function (value) {
return typeof value === 'string' ? value.split('').reverse().join('') : value
},
slice: function (value, start, end) {
return typeof value === 'string' ? value.slice(start, end) : value
},
parse_op_hrs: function (value) {
let operation_hours = [];
XRegExp.forEach(value,time,(match)=>{
if(match.closed === undefined){
operation_hours.unshift()
}else if(match.all_day === undefined){
}else{
}
});
return operation_hours;
}
}
});
x('http://dining.gmu.edu/dining-choices/hours-of-operation/', {
title: ['.storename | trim'],
location: ['.location | trim'],
operationHours: ['.open-closed-sign | parse_op_hrs']
})
// this fixes the json so each element in the array has a title location and oparation hours
(function (err, obj) {
let newObj = [];
for (let i = 0; i < obj.title.length; i++) {
let entry = {
title: obj.title[i],
location: obj.location[i],
operation_hours: obj.operation_hours[i]
}
newObj.unshift(entry);
}
fs.writeFile("results.json", JSON.stringify(newObj, null, "\t"), function (err) {
if (err) throw err;
});
})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment