All I wanted to do is scrape in JavaScript…

A female blue mickey mouse platy
casper.each(fishArray, function(self, currentFish){
/*
* .evaluate is not async, so it must be wrapped in a .then
* .then is STEP Async, they're executed one after the other
*/
casper.then(function(){
console.log("Getting: " + currentFish);
// Change the drop down selections
casper.evaluate(function(currentFish) {
$('select[name="category"]').val(currentFish).change();
$('select[name="DAYS"]').val('1').change();
},currentFish);
});
...
fs.write(pathToFolder+soldPath+fish+outputFormat, JSON.stringify(soldJSON, null, 4), 'w')
casper.then(function(){
console.log("Sending to Firebase...");
// Open the url for the database
casper.thenOpen("https://aquascraper-data.firebaseio.com/test.json?auth="+deets.deets+"&debug=true",{
method: "post",
data: JSON.stringify(allFish),
headers: {
auth : "xxxxxx",
},
contentType : 'application/json',
dataType: 'json',
},function(response){
casper.echo("POSTED TO Firebase: "+JSON.stringify(response));
});
});
Storage view for Firebase
https://github.com/ariya/phantomjs/issues/14143
Phantom being a fat leaker
casperjs scrape.js --engine=slimerjs
var promises = fishArray.map(function(currentFish) {
return new Promise(function(resolve, reject) {
exec('casperjs turnitoffandon.js',
{
env: {
'currentFish': currentFish
},
},
function(err, stdout, stderr) {
if (err) {
console.log('ERROR. err was:' + err);
console.log('ERROR. stderr was:' + stderr);
return reject(err);
} else {
console.log('success stdout was:' + stdout);
allAuctions[currentFish] = JSON.parse(stdout);
//console.log(allAuctions);
resolve();
}
}
);
});
});
Promise.all(promises)
.then(function() {
console.log("allAuctions are done."+allAuctions);
console.log("Sending to Firebase...");
var options = {
method: 'post',
body: allAuctions,
json: true,
url: "https://aquascraper-data.firebaseio.com/test.json?auth="+deets+"&debug=true"
};
request(options, function (err, res, body) {
if (err) {
console.error('error posting json: ', err);
throw err;
}
var headers = res.headers;
var statusCode = res.statusCode;
console.log('headers: ', headers);
console.log('statusCode: ', statusCode);
console.log('body: ', body);
});
})
The set of buildpacks I used to (almost) run a Node controlled casper script
Error: Command failed: casperjs turnitoffandon.js
ERROR: stderr was:/bin/sh 1: casperjs: not found
pageSettings: {
// NO WAIT, DO LOAD IMAGES, for some reason, this prevents a worse memory leak
loadImages: true,
loadPlugins: false,
},
Now we’re on the Scheduler!
Script runs fine, exits and then…. crashes??
I don’t know how or why this works. Don’t ask me.
State changed from up to COMPLETE

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store