All I wanted to do is scrape in JavaScript…

A female blue mickey mouse platy
casper.each(fishArray, function(self, currentFish){
/*
* .evaluate is not async, so it must be wrapped in a .then
* .then is STEP Async, they're executed one after the other
*/
casper.then(function(){
console.log("Getting: " + currentFish);
// Change the drop down selections
casper.evaluate(function(currentFish) {
$('select[name="category"]').val(currentFish).change();
$('select[name="DAYS"]').val('1').change();
},currentFish);
});
...
fs.write(pathToFolder+soldPath+fish+outputFormat, JSON.stringify(soldJSON, null, 4), 'w')
casper.then(function(){
console.log("Sending to Firebase...");
// Open the url for the database
casper.thenOpen("https://aquascraper-data.firebaseio.com/test.json?auth="+deets.deets+"&debug=true",{
method: "post",
data: JSON.stringify(allFish),
headers: {
auth : "xxxxxx",
},
contentType : 'application/json',
dataType: 'json',
},function(response){
casper.echo("POSTED TO Firebase: "+JSON.stringify(response));
});
});
Storage view for Firebase
https://github.com/ariya/phantomjs/issues/14143
Phantom being a fat leaker
casperjs scrape.js --engine=slimerjs
var promises = fishArray.map(function(currentFish) {
return new Promise(function(resolve, reject) {
exec('casperjs turnitoffandon.js',
{
env: {
'currentFish': currentFish
},
},
function(err, stdout, stderr) {
if (err) {
console.log('ERROR. err was:' + err);
console.log('ERROR. stderr was:' + stderr);
return reject(err);
} else {
console.log('success stdout was:' + stdout);
allAuctions[currentFish] = JSON.parse(stdout);
//console.log(allAuctions);
resolve();
}
}
);
});
});
Promise.all(promises)
.then(function() {
console.log("allAuctions are done."+allAuctions);
console.log("Sending to Firebase...");
var options = {
method: 'post',
body: allAuctions,
json: true,
url: "https://aquascraper-data.firebaseio.com/test.json?auth="+deets+"&debug=true"
};
request(options, function (err, res, body) {
if (err) {
console.error('error posting json: ', err);
throw err;
}
var headers = res.headers;
var statusCode = res.statusCode;
console.log('headers: ', headers);
console.log('statusCode: ', statusCode);
console.log('body: ', body);
});
})
The set of buildpacks I used to (almost) run a Node controlled casper script
Error: Command failed: casperjs turnitoffandon.js
ERROR: stderr was:/bin/sh 1: casperjs: not found
pageSettings: {
// NO WAIT, DO LOAD IMAGES, for some reason, this prevents a worse memory leak
loadImages: true,
loadPlugins: false,
},
Now we’re on the Scheduler!
Script runs fine, exits and then…. crashes??
I don’t know how or why this works. Don’t ask me.
State changed from up to COMPLETE

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Taylor Nodell

Taylor Nodell

Developer. Musician. Naturalist. Traveler. In any order. @tayloredtotaylor