2016-12-02 3 views
0

私はphantomjsをロードし、それぞれをhtmlファイルとして保存したいというURLの配列を調べるためにphantomjsを使用しています。phantomjsは配列のURLを保存しないでください

これまでのところ、私のコードはちゃんと動作します。 各ページの読み込みと保存が始まりますが、多くのページの後に突然新しいページの読み込みが停止し、同じページが何度も何度も何度も保存されます。 私は部分的に私がpage.close()を使用していないと信じていますが、コードを実行しても全く動作しません。

私は誰かの助けを捜していました。特に、問題の原因が何であるかを説明しました。誰かが私の問題に対する解決策を持っていれば、それは大いに感謝されます。私は現時点では厄介なことを知っていますが、私は問題を発見した後にクリーンアップするのを待っています。

var fs = require('fs'); 

/* this is used get an array of urls I'm trying to find. 
function linkfinder(){ 
var array = fs.read('C:\\Users\\jacob\\Documents\\SDD\\links.txt').toString().split('\n'); 
console.log(array[1]); 
console.log('ffff'); 
return array; 
}*/ 

var urls = { 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1476, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1548, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1781, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1506, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1321, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1390, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1430, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1707, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1477, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1431, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1678, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1409, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1239, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1765, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=2203, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1889, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=2240, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1650, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1490, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1514, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1322, 
http://www.njcaa.org/member_colleges/college-profile?collegeId=1744 
} 

    var page = new WebPage(); 
    function handle_page(url){ 
    page.open(url, function(){ 
    //... 
    page.injectJs('jquery.min.js'); 
    // var html = page.evaluate(function(){ 
     // return document.getElementsByTagName('html')[0].innerHTML 
    // }); 
    //save to file 
    page.onLoadFinished = function() { 
    console.log("page load finished"); 
    var path ='C:\\Users\\jacob\\Documents\\SDD\\schools\\.html'; 
    var linked = url.substr(63, 4); 
    var output = [path.slice(0, 37), linked, path.slice(37)].join(''); 
    console.log(output); 
    //page.render('C:\Users\jacob\Documents\export.png'); 

    fs.write(output, page.content, 'w'); 

}; 

    // page.close(); 

    next_page(); 
}); 

} 

function next_page(){ 
var url = links.shift(); 

if(!url){ 
    phantom.exit(0); 
} 
handle_page(url); 
} 

next_page(); 

答えて

0

これは動作しますが、あなたは(私はLinuxでは、そこ/root/pjs私のパスに取り組んでいます)右のパスを指定する必要があります。

var page = require('webpage').create(), fs = require('fs'); 

page.onLoadFinished = function() {}// won't work at all. The same content/every page((
var urls = [//an array 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1476", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1548", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1781", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1506", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1321", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1390", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1430", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1707", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1477", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1431", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1678", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1409", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1239", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1765", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=2203", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1889", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=2240", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1650", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1490", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1514", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1322", 
"http://www.njcaa.org/member_colleges/college-profile?collegeId=1744" 
] 
var i=0; 
function on_a_page(status){i++; 
console.log("page load finished"); 
var output = '/root/pjs/'+page.url.substr(63, 4)+'.html';// You need to specify the right path (i'm working on Linux, '/root/pjs' my path there). 
console.log(output); 
fs.write(output, page.content, 'w'); 

if(i<urls.length){to_open()}else{phantom.exit()} 
} 
function to_open(){ page.open(urls[i], on_a_page);console.log(i) } 
to_open() 
関連する問題