2016-07-12 2 views
2

はJavaScriptを持つHtmlunitを使用してWebページからファイルをダウンロードするには、私が実際にからファイルをダウンロードするためのリンクをクリックしようとしています:どのようにアンカータグ

http://www.histdata.com/download-free-forex-historical-data/?/metatrader/1-minute-bar-quotes/eurusd/2013

私がしようとしている行のHTMLコード

<a id="a_file" title="Download the zip data file"  href="javascript:return true;" target="nullDisplay">HISTDATA_COM_MT_EURUSD_M1_2013.zip</a> 

とJavaコードは次のとおりです:ダウンロードにある

WebClient webClient = new WebClient(BrowserVersion.FIREFOX_38); 
webClient.getOptions().setJavaScriptEnabled(true); 
webClient.setAjaxController(new NicelyResynchronizingAjaxController()); 
HtmlPage htmlPage=webClient.getPage("http://www.histdata.com/download-free-forex-historical-data/?/metatrader/1-minute-bar-quotes/eurusd/2016/7"); 
List<HtmlAnchor> anchors=htmlPage.getAnchors(); 
HtmlAnchor anchor = null; 
for (int i = 0; i < anchors.size(); ++i) { 
    anchor = anchors.get(i); 
    String sAnchor = anchor.asText(); 

    if (sAnchor.equals("HISTDATA_COM_MT_EURUSD_M1_201607.zip")) 
     break; 
} 
Page p = anchor.click(); 

webClient.waitForBackgroundJavaScript(60000); 
InputStream is = p.getWebResponse().getContentAsStream(); 
int b = 0; 
while ((b = is.read()) != -1) { 
    System.out.print((char)b); 
} 

私が取得エラーメッセージは次のとおりです。

Jul 12, 2016 1:29:57 PM com.gargoylesoftware.htmlunit.javascript.StrictErrorReporter error 
SEVERE: error: message=[invalid return] sourceName=[javascript url] line=[88] lineSource=[return true;] lineOffset=[7] 
Exception in thread "main" ======= EXCEPTION START ======== 
Exception class=[net.sourceforge.htmlunit.corejs.javascript.EvaluatorException] 
com.gargoylesoftware.htmlunit.ScriptException: invalid return (javascript url#88) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:904) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) 
    at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:515) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.compile(JavaScriptEngine.java:729) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.compile(JavaScriptEngine.java:694) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:746) 
    at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:902) 
    at com.gargoylesoftware.htmlunit.html.HtmlAnchor.doClickStateUpdate(HtmlAnchor.java:114) 
    at com.gargoylesoftware.htmlunit.html.HtmlAnchor.doClickStateUpdate(HtmlAnchor.java:179) 
    at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:800) 
    at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:747) 
    at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:694) 
    at clickPage.main(clickPage.java:38) 
Caused by: net.sourceforge.htmlunit.corejs.javascript.EvaluatorException: invalid return (javascript url#88) 
    at com.gargoylesoftware.htmlunit.javascript.StrictErrorReporter.error(StrictErrorReporter.java:65) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.addError(Parser.java:188) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.addError(Parser.java:167) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.reportError(Parser.java:255) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.reportError(Parser.java:244) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.reportError(Parser.java:237) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.returnOrYield(Parser.java:1632) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.statementHelper(Parser.java:1022) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.statement(Parser.java:928) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.parse(Parser.java:572) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.parse(Parser.java:492) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.compileImpl(Context.java:2660) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.compileString(Context.java:1623) 
    at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory$TimeoutContext.compileString(HtmlUnitContextFactory.java:172) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.compileString(Context.java:1615) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$2.doRun(JavaScriptEngine.java:720) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:889) 
    ... 12 more 
Enclosed exception: 
net.sourceforge.htmlunit.corejs.javascript.EvaluatorException: invalid return (javascript url#88) 
    at com.gargoylesoftware.htmlunit.javascript.StrictErrorReporter.error(StrictErrorReporter.java:65) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.addError(Parser.java:188) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.addError(Parser.java:167) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.reportError(Parser.java:255) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.reportError(Parser.java:244) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.reportError(Parser.java:237) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.returnOrYield(Parser.java:1632) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.statementHelper(Parser.java:1022) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.statement(Parser.java:928) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.parse(Parser.java:572) 
    at net.sourceforge.htmlunit.corejs.javascript.Parser.parse(Parser.java:492) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.compileImpl(Context.java:2660) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.compileString(Context.java:1623) 
    at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory$TimeoutContext.compileString(HtmlUnitContextFactory.java:172) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.compileString(Context.java:1615) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$2.doRun(JavaScriptEngine.java:720) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:889) 
    at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) 
    at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:515) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.compile(JavaScriptEngine.java:729) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.compile(JavaScriptEngine.java:694) 
    at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:746) 
    at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:902) 
    at com.gargoylesoftware.htmlunit.html.HtmlAnchor.doClickStateUpdate(HtmlAnchor.java:114) 
    at com.gargoylesoftware.htmlunit.html.HtmlAnchor.doClickStateUpdate(HtmlAnchor.java:179) 
    at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:800) 
    at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:747) 
    at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:694) 
    at clickPage.main(clickPage.java:38) 
== CALLING JAVASCRIPT == 
return true; 
======= EXCEPTION END ======== 

は私が私のコードとどのように与えられたリンクからファイルをダウンロードするには何が間違っている教えてください。

答えて

0

ご報告いただきありがとうございます、エラーはSVNに修正されました。

latest buildまたはスナップショットを使用してください。

1
//Complete solution 
//1. open page 
//2. list the urls of that page using xpath 
//3. download all file of that url . 

import java.io.File; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.OutputStream; 
import java.net.HttpURLConnection; 
import java.net.URL; 
import java.util.Date; 
import java.util.List; 
import java.util.Map; 

import com.gargoylesoftware.htmlunit.BrowserVersion; 
import com.gargoylesoftware.htmlunit.WebClient; 
import com.gargoylesoftware.htmlunit.html.DomAttr; 
import com.gargoylesoftware.htmlunit.html.HtmlAnchor; 
import com.gargoylesoftware.htmlunit.html.HtmlPage; 

public class Crawler { 

    public static void main(String[] args) throws Throwable { 

     String baseUrl= "Enter base http/https url here"; 
     String url1 = baseUrl+ "add addational url of main page"; 
     String xpathofdownlaodlinks = "xpath of file url or--> html/body/div/div[3]/a/@href"; 

     String pathToSaveFile="d:\\local\\to\\save\\files"; 

     String fileExt = ".txt"; 

     WebClient webclient = new WebClient(BrowserVersion.CHROME); 
     webclient.getOptions().setJavaScriptEnabled(true); 
     HtmlPage page = webclient.getPage(url1); 

     List<DomAttr> links = (List<DomAttr>) page.getByXPath(xpathofdownlaodlinks); 

     List<HtmlAnchor> anchors=page.getAnchors(); 

     for (DomAttr object : links) { 
      String link = baseUrl+object.getValue()+""; 
      Date d=new Date(); 

      downlaodRawFile(link,pathToSaveFile +d.getTime() + fileExt); 
     }  
     webclient.close(); 
    } 

    public static void downlaodRawFile(String link,String fileName) throws IOException, Throwable{ 
       URL    url = new URL(link); 
       HttpURLConnection http = (HttpURLConnection)url.openConnection(); 
       Map< String, List<String>> header = http.getHeaderFields(); 
       while(isRedirected(header)) { 
       link = header.get("Location").get(0); 
       url = new URL(link); 
       http = (HttpURLConnection)url.openConnection(); 
       header = http.getHeaderFields(); 
       } 
       InputStream input = http.getInputStream(); 
       byte[]  buffer = new byte[4096]; 
       int   n  = -1; 
       OutputStream output = new FileOutputStream(new File(fileName)); 
       while ((n = input.read(buffer)) != -1) { 
       output.write(buffer, 0, n); 
       } 
       output.close(); 
    } 

    private static boolean isRedirected(Map<String, List<String>> header) { 
      for(String hv : header.get(null)) { 
      if( hv.contains(" 301 ") 
       || hv.contains(" 302 ")) return true; 
      } 
      return false; 
     } 

} 
関連する問題