2016-05-25 16 views

C#を使用してコンソールベースのWebクローラーを作成しようとしています.Googles検索バーを使用してキーワードを検索できるようにしたいと考えて、これはquestionです。しかし、それはWindowsアプリケーションを使用しているので、私はそれがコンソールベースとは異なると思っている..?これについては、どうすれば簡単に行えますか?Windows Appを使用するのと同じ概念ですか?Google検索バーを使用したコンソールベースのWebクローラーC#


using System; 
using System.Collections.Specialized; 
using System.IO; 
using System.Net; 
using System.Text.RegularExpressions; 

namespace Crawler 

    //Create information handling 

    public interface IWidow 
     string Say(string input); 
     string Success(string input); 
     string MinorErr(string input); 
     string FatalErr(string input); 
     string Debug(string input); 

    * Intiate the information handling 
    * and create the color coordination. 

    public class ConsoleInformative : IWidow 
     public string Say(string input) 
      Console.ForegroundColor = ConsoleColor.Gray; 
      Console.WriteLine($"[{DateTime.Now.ToString("h:mm:ss tt")}] {input}"); 
      return input; 

     public string Success(string input) 
      Console.ForegroundColor = ConsoleColor.Green; 
      Console.WriteLine($"[{DateTime.Now.ToString("h:mm:ss tt")}] {input}"); 
      return input; 

     public string MinorErr(string input) 
      Console.ForegroundColor = ConsoleColor.DarkYellow; 
      Console.WriteLine($"[{DateTime.Now.ToString("h:mm:ss tt")}] {input}"); 
      return input; 

     public string FatalErr(string input) 
      Console.ForegroundColor = ConsoleColor.Red; 
      Console.WriteLine($"[{DateTime.Now.ToString("h:mm:ss tt")}] {input}"); 
      return input; 

     public string Debug(string input) 
      Console.ForegroundColor = ConsoleColor.Yellow; 
      Console.WriteLine($"[{DateTime.Now.ToString("h:mm:ss tt")}] {input}"); 
      return input; 

    class BlackWidow 

     * Make a request to the web host in 
     * this case it is Google. 

     private static string GetWebInfo(string url) 
      string logPath = [email protected]"{Path.GetDirectoryName(System.AppDomain.CurrentDomain.BaseDirectory)}\log\html.txt"; 
      string errPath = [email protected]"{Path.GetDirectoryName(System.AppDomain.CurrentDomain.BaseDirectory)}\log\error.txt"; 

      HttpWebRequest requests = (HttpWebRequest)HttpWebRequest.Create(url); 
      requests.UserAgent = "A .NET Web Crawler"; 

      IWebProxy proxy = requests.Proxy; 
      IWidow info = new ConsoleInformative(); 

      * Used cached credentials to access 
      * proxy if there is one. 

      info.Say("Checking if you're behind a proxy"); 
      if (proxy != null) 
        info.Say("Proxy found attempting to login with cached credentials.."); 
        string proxyUri = proxy.GetProxy(requests.RequestUri).ToString(); 
        requests.UseDefaultCredentials = true; 
        requests.Proxy = new WebProxy(proxyUri, false); 
        requests.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials; 

       * Catch exception if hte cached 
       * credentials fail to load. 

       catch (Exception e) 
        info.FatalErr("Unable to verify cached credentials.."); 
        File.WriteAllText($"{errPath}", e.ToString()); 
        info.Debug("Wrote error to file for further analysis, exiting process.."); 
      info.Success("Logged in with cached credentials, continuing process."); 
      WebResponse providedResponse = requests.GetResponse(); 
      Stream stream = providedResponse.GetResponseStream(); 
      StreamReader readInformation = new StreamReader(stream); 
      string htmlOutput = readInformation.ReadToEnd(); 
      File.WriteAllText($"{logPath}", htmlOutput); 
      return htmlOutput; 

     //Main method 

     static void Main(string[] args) 
      IWidow info = new ConsoleInformative(); 

       string searchQuery = "test"; 
       string searchEngine = "https://google.com"; 
       NameValueCollection search = new NameValueCollection(); 
       Regex linkParser = new Regex(@"\b(?:https?://|www\.)\S+\b", RegexOptions.Compiled | RegexOptions.IgnoreCase); 

       info.Say("Attempting to connect to the site.."); 
       info.Success($"Connected to site, writing HTML to file, and searching {searchEngine} with query {searchQuery}."); 
       search.Add("q", searchQuery); 

      * Catch all exceptions and write them 
      * to a file for futher analysis if any 
      * occur during the process. 

      catch (Exception e) 
       var filePath = AppDomain.CurrentDomain.BaseDirectory; 

       info.FatalErr($"Exception thrown: {e}"); 
       File.WriteAllText([email protected]"{filePath}\errorlog.LOG", e.ToString()); 
       info.Debug($"Wrote Exception to file located in {filePath}"); 

私の実装を見てみるとよいでしょう。 https://stackoverflow.com/a/16975398/1610747 – Misterhex




using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using System.Collections.Specialized; 
using System.IO; 
using System.Net; 
using System.Text.RegularExpressions; 

namespace Crawler 
     //Create information handling 

    public interface IWidow 
     string Say(string input); 
     string Success(string input); 
     string MinorErr(string input); 
     string FatalErr(string input); 
     string Debug(string input); 

    * Intiate the information handling 
    * and create the color coordination. 

    public class ConsoleInformative : IWidow 
     public string Say(string input) 
      Console.ForegroundColor = ConsoleColor.Gray; 
      Console.WriteLine("[{0}] {1}",DateTime.Now.ToString("h:mm:ss tt"),input); 
      return input; 

     public string Success(string input) 
      Console.ForegroundColor = ConsoleColor.Green; 
      Console.WriteLine("[{0}] {1}", DateTime.Now.ToString("h:mm:ss tt"), input); 
      return input; 

     public string MinorErr(string input) 
      Console.ForegroundColor = ConsoleColor.DarkYellow; 
      Console.WriteLine("[{0}] {1}", DateTime.Now.ToString("h:mm:ss tt"), input); 
      return input; 

     public string FatalErr(string input) 
      Console.ForegroundColor = ConsoleColor.Red; 
      Console.WriteLine("[{0}] {0}", DateTime.Now.ToString("h:mm:ss tt"), input); 
      return input; 

     public string Debug(string input) 
      Console.ForegroundColor = ConsoleColor.Yellow; 
      Console.WriteLine("[{0}] {1}",DateTime.Now.ToString("h:mm:ss tt"), input); 
      return input; 

    public class BlackWidow 
     public BlackWidow(string url) 

     * Make a request to the web host in 
     * this case it is Google. 

     private static string GetWebInfo(string url) 
      string logPath = string.Format(@"{0}\html.txt", Path.GetDirectoryName(System.AppDomain.CurrentDomain.BaseDirectory)); 
      string errPath = string.Format(@"{0}\error.txt", Path.GetDirectoryName(System.AppDomain.CurrentDomain.BaseDirectory)); 

      HttpWebRequest requests = (HttpWebRequest)HttpWebRequest.Create(url); 
      requests.ProtocolVersion = HttpVersion.Version10; 
      requests.UserAgent = "A .NET Web Crawler"; 

      IWebProxy proxy = requests.Proxy; 
      IWidow info = new ConsoleInformative(); 

      * Used cached credentials to access 
      * proxy if there is one. 

      info.Say("Checking if you're behind a proxy"); 
      if (proxy != null) 
        info.Say("Proxy found attempting to login with cached credentials.."); 
        string proxyUri = proxy.GetProxy(requests.RequestUri).ToString(); 
        requests.UseDefaultCredentials = true; 
        requests.Proxy = new WebProxy(proxyUri, false); 
        requests.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials; 

       * Catch exception if hte cached 
       * credentials fail to load. 

       catch (Exception e) 
        info.FatalErr("Unable to verify cached credentials.."); 
        File.WriteAllText(errPath, e.ToString()); 
        info.Debug("Wrote error to file for further analysis, exiting process.."); 
      info.Success("Logged in with cached credentials, continuing process."); 
      WebResponse providedResponse = requests.GetResponse(); 
      Stream stream = providedResponse.GetResponseStream(); 
      StreamReader readInformation = new StreamReader(stream); 
      string htmlOutput = readInformation.ReadToEnd(); 
      File.WriteAllText(logPath, htmlOutput); 
      return htmlOutput; 
    class Program 
     static void Main(string[] args) 
      IWidow info = new ConsoleInformative(); 

       string searchQuery = "test"; 
       string searchEngine = "https://google.com"; 
       NameValueCollection search = new NameValueCollection(); 
       Regex linkParser = new Regex(@"\b(?:https?://|www\.)\S+\b", RegexOptions.Compiled | RegexOptions.IgnoreCase); 

       info.Say("Attempting to connect to the site.."); 
       BlackWidow blackWidow = new BlackWidow(searchEngine); 
       info.Success(string.Format("Connected to site, writing HTML to file, and searching {0} with query {1}.", searchEngine,searchQuery)); 
       search.Add("q", searchQuery); 


      * Catch all exceptions and write them 
      * to a file for futher analysis if any 
      * occur during the process. 

      catch (Exception e) 
       var filePath = AppDomain.CurrentDomain.BaseDirectory; 

       info.FatalErr(string.Format("Exception thrown: {0}", e.ToString())); 
       File.WriteAllText(string.Format(@"{0}\errorlog.LOG",filePath), e.ToString()); 
       info.Debug(string.Format("Wrote Exception to file located in {0}",filePath)); 

プロキシの背後にいない場合は、プロキシの部分を取り出すことができます。それは主に私がどこにいるのかだけです – 13aal
