2016-03-27 3 views
-3

どのようにこれを再実行することができます同時実行プログラムを使用して、またははるかに良い方法。スレッドプールエグゼキュータを意味します。 基本的には、クローラが指定のURLをクロールして、後で別のWebサイトに見つかったURLに従ってください。スレッドプールエグゼキュータを使用してこれをどのように再実装できますか?

package Mainpackge; 

import java.io.IOException; 

import java.util.ArrayList; 
import java.util.List; 

import org.jsoup.Jsoup; 
import org.jsoup.nodes.Document; 
import org.jsoup.nodes.Element; 
import org.jsoup.select.Elements; 

public class main { 

    public static void main(String[] args) { 
     //List of urs to collect data from 
     String[] urls = new String[]{ 

       "http://www.answers.com/", 
       "http://www.britannica.com/", 
       "https://ie.yahoo.com/?p=us", 
       "https://en.wikipedia.org/wiki/Main_Page", 
       "http://ww w.worldbook.com/", 
       "http://www.computerlanguage.com/", 
       "http://www.howstuffworks.com/", 
       "http://www.dmoz.org/Computers/Computer_Science/" 
       }; 

     // Create and start workers 
     List<Worker> workers = new ArrayList<>(urls.length); 
     for (String url : urls) { 
      Worker w = new Worker(url); 
      workers.add(w); 
      new Thread(w).start(); 
     } 

     // Retrieve results 
     for (Worker w : workers) { 
      Elements results = w.waitForResults(); 
      if (results != null) 
       for (Element result : results) { result.absUrl("a") ; 
        System.out.println(w.getName()+": "+result.absUrl("href")); 
       } 

      else 
       System.err.println(w.getName()+" had some error!"); 
     } 
    } 
} 

class Worker implements Runnable { 

    private String url; 
    private Elements results; 
    private String name; 
    private static int number = 0; 

    private final Object lock = new Object(); 

    public Worker(String url) { 
     this.url = url; 
     this.name = "Worker-" + (number++); 
    } 

    public String getName() { 
     return name; 
    } 

    @Override 
    public void run() { 
     try { 
      Document doc = Jsoup.connect(this.url).get(); 

      Elements links = doc.select("a"); 

      // Update results 
      synchronized (lock) { 
       this.results = links; 
       lock.notifyAll(); 
      } 
     } catch (IOException e) { 
      // You should implement a better error handling code.. 
      System.err.println("Error while parsing: "+this.url); 
      e.printStackTrace(); 
     } 
    } 

    public Elements waitForResults() { 
     synchronized (lock) { 
      try { 
       while (this.results == null) { 
        lock.wait(); 
       } 
       return this.results; 
      } catch (InterruptedException e) { 
       // Again better error handling 
       e.printStackTrace(); 
      } 

      return null; 
     } 
    } 
} 

答えて

0

スレッドでExecutorServiceとCallableの実装を使用した完全な例。

import java.util.ArrayList; 
import java.util.Arrays; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutionException; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.concurrent.TimeUnit; 

public class ThreadPoolExample { 
    public static void main(String[] args) throws InterruptedException, ExecutionException { 
     List<String> urls = Arrays.asList(new String[]{ 
       "http://www.answers.com/", 
       "http://www.britannica.com/", 
       "https://ie.yahoo.com/?p=us", 
       "https://en.wikipedia.org/wiki/Main_Page", 
       "http://ww w.worldbook.com/", 
       "http://www.computerlanguage.com/", 
       "http://www.howstuffworks.com/", 
       "http://www.dmoz.org/Computers/Computer_Science/" 
       }); 

     ExecutorService ex = Executors.newFixedThreadPool(10); 
     ex.awaitTermination(2, TimeUnit.SECONDS); 

     List<Future<Element>> results = new ArrayList<>(); 
     for (String string : urls) { 
      results.add(ex.submit(new Crawler(string))); 
     } 

     for (Future<Element> future : results) { 
      // Get will wait for the thread to be done 
      for (String url : future.get().urls) { 
       // ADD A NEW THREAD FOR EACH URLS YOU FOUND ! 
       ex.submit(new Crawler(url)); 
      } 
     } 
     ex.shutdown(); 
    } 

    public static class Crawler implements Callable<Element>{ 
     String url; 
     public Crawler(String url) { 
      this.url = url; 
     } 
     @Override 
     public Element call() throws Exception { 
      // Implement your crawling logic and return your elements 
      return new Element(Arrays.asList(new String[]{"all new urls", "that you found while crwaling"})); 
     } 

    } 

    public static class Element{ 
     List<String> urls; 
     public Element(List<String> urls) { 
      this.urls = urls; 
     } 
     @Override 
     public String toString() { 
      return "Elements found : " + urls.size(); 
     } 
    } 
} 
+0

なぜ、実行者サービスをインスタンス化した直後に 'awaitTermination'を呼び出すのですか?スレッドプールが10に設定されているのはなぜですか?伝統的に、スレッドプールのサイズは使用可能なプロセッサの数に設定されています( 'Runtime.getRuntime()。availableProcessors()')。参照:http://stackoverflow.com/a/1980858/363573 – Stephan

関連する問題