2016-10-09 5 views
0

これまで、DVLA Webサイトからのデータを使用してmysiteのデータ実装を支援するために、次のcURLスクリプトを使用してきました。このサイトから入手したコードの元のソースを指し示すことはできず、ソースに注意を払うこともできませんでした。何らかの理由でCurlポストサーバーから応答が返されないDVLAスクレイピングASPx

function hhb_curl_exec($ch, $url) 
 
{ 
 
    static $hhb_curl_domainCache = ""; 
 
    //$hhb_curl_domainCache=&$this->hhb_curl_domainCache; 
 
    //$ch=&$this->curlh; 
 
    if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { 
 
     throw new InvalidArgumentException('$ch must be a curl handle!'); 
 
    } 
 
    if (!is_string($url)) { 
 
     throw new InvalidArgumentException('$url must be a string!'); 
 
    } 
 

 
    $tmpvar = ""; 
 
    if (parse_url($url, PHP_URL_HOST) === null) { 
 
     if (substr($url, 0, 1) !== '/') { 
 
      $url = $hhb_curl_domainCache . '/' . $url; 
 
     } else { 
 
      $url = $hhb_curl_domainCache . $url; 
 
     } 
 
    } 
 
    ; 
 

 
    curl_setopt($ch, CURLOPT_URL, $url); 
 
    $html = curl_exec($ch); 
 
    if (curl_errno($ch)) { 
 
     throw new Exception('Curl error (curl_errno=' . curl_errno($ch) . ') on url ' . var_export($url, true) . ': ' . curl_error($ch)); 
 
     // echo 'Curl error: ' . curl_error($ch); 
 
    } 
 
    if ($html === '' && 203 != ($tmpvar = curl_getinfo($ch, CURLINFO_HTTP_CODE)) /*203 is "success, but no output"..*/) { 
 
     throw new Exception('Curl returned nothing for ' . var_export($url, true) . ' but HTTP_RESPONSE_CODE was ' . var_export($tmpvar, true)); 
 
    } 
 
    ; 
 
    //remember that curl (usually) auto-follows the "Location: " http redirects.. 
 
    $hhb_curl_domainCache = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), PHP_URL_HOST); 
 
    return $html; 
 
} 
 
function hhb_curl_exec2($ch, $url, &$returnHeaders = array(), &$returnCookies = array(), &$verboseDebugInfo = "") 
 
{ 
 
    $returnHeaders = array(); 
 
    $returnCookies = array(); 
 
    $verboseDebugInfo = ""; 
 
    if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { 
 
     throw new InvalidArgumentException('$ch must be a curl handle!'); 
 
    } 
 
    if (!is_string($url)) { 
 
     throw new InvalidArgumentException('$url must be a string!'); 
 
    } 
 
    $verbosefileh = tmpfile(); 
 
    $verbosefile = stream_get_meta_data($verbosefileh); 
 
    $verbosefile = $verbosefile['uri']; 
 
    curl_setopt($ch, CURLOPT_VERBOSE, 1); 
 
    curl_setopt($ch, CURLOPT_STDERR, $verbosefileh); 
 
    curl_setopt($ch, CURLOPT_HEADER, 1); 
 
    $html    = hhb_curl_exec($ch, $url); 
 
    $verboseDebugInfo = file_get_contents($verbosefile); 
 
    curl_setopt($ch, CURLOPT_STDERR, NULL); 
 
    fclose($verbosefileh); 
 
    unset($verbosefile, $verbosefileh); 
 
    $headers  = array(); 
 
    $crlf   = "\x0d\x0a"; 
 
    $thepos  = strpos($html, $crlf . $crlf, 0); 
 
    $headersString = substr($html, 0, $thepos); 
 
    $headerArr  = explode($crlf, $headersString); 
 
    $returnHeaders = $headerArr; 
 
    unset($headersString, $headerArr); 
 
    $htmlBody = substr($html, $thepos + 4); //should work on utf8/ascii headers... utf32? not so sure.. 
 
    unset($html); 
 
    //I REALLY HOPE THERE EXIST A BETTER WAY TO GET COOKIES.. good grief this looks ugly.. 
 
    //at least it's tested and seems to work perfectly... 
 
    $grabCookieName = function($str,&$len) 
 
    { 
 
     $len=0; 
 
     $ret = ""; 
 
     $i = 0; 
 
     for ($i = 0; $i < strlen($str); ++$i) { 
 
      ++$len; 
 
      if ($str[$i] === ' ') { 
 
       continue; 
 
      } 
 
      if ($str[$i] === '=') { 
 
       --$len; 
 
       break; 
 
      } 
 
      $ret .= $str[$i]; 
 
     } 
 
     return urldecode($ret); 
 
    }; 
 
    foreach ($returnHeaders as $header) { 
 
     //Set-Cookie: crlfcoookielol=crlf+is%0D%0A+and+newline+is+%0D%0A+and+semicolon+is%3B+and+not+sure+what+else 
 
     /*Set-Cookie:ci_spill=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22305d3d67b8016ca9661c3b032d4319df%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A14%3A%2285.164.158.128%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A109%3A%22Mozilla%2F5.0+%28Windows+NT+6.1%3B+WOW64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F43.0.2357.132+Safari%2F537.36%22%3Bs%3A13%3A%22last_activity%22%3Bi%3A1436874639%3B%7Dcab1dd09f4eca466660e8a767856d013; expires=Tue, 14-Jul-2015 13:50:39 GMT; path=/ 
 
     Set-Cookie: sessionToken=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT; 
 
     //Cookie names cannot contain any of the following '=,; \t\r\n\013\014' 
 
     // 
 
     */ 
 
     if (stripos($header, "Set-Cookie:") !== 0) { 
 
      continue; 
 
      /**/ 
 
     } 
 
     $header = trim(substr($header, strlen("Set-Cookie:"))); 
 
     $len=0; 
 
     while (strlen($header) > 0) { 
 
      $cookiename     = $grabCookieName($header,$len); 
 
      $returnCookies[$cookiename] = ''; 
 
      $header      = substr($header, $len + 1); //also remove the = 
 
      if (strlen($header) < 1) { 
 
       break; 
 
      } 
 
      ; 
 
      $thepos = strpos($header, ';'); 
 
      if ($thepos === false) { //last cookie in this Set-Cookie. 
 
       $returnCookies[$cookiename] = urldecode($header); 
 
       break; 
 
      } 
 
      $returnCookies[$cookiename] = urldecode(substr($header, 0, $thepos)); 
 
      $header      = trim(substr($header, $thepos + 1)); //also remove the ; 
 
     } 
 
    } 
 
    unset($header, $cookiename, $thepos); 
 
    return $htmlBody; 
 
} 
 

 
############################################################## 
 
function hhb_curl_init($custom_options_array = array()) 
 
{ 
 
    if (empty($custom_options_array)) { 
 
     $custom_options_array = array(); 
 
     //i feel kinda bad about this.. argv[1] of curl_init wants a string(url), or NULL 
 
     //at least i want to allow NULL aswell :/ 
 
    } 
 
    if (!is_array($custom_options_array)) { 
 
     throw new InvalidArgumentException('$custom_options_array must be an array!'); 
 
    } 
 
    ; 
 
    $options_array = array(
 
     CURLOPT_AUTOREFERER => true, 
 
     CURLOPT_BINARYTRANSFER => true, 
 
     CURLOPT_COOKIESESSION => true, 
 
     CURLOPT_FOLLOWLOCATION => true, 
 
     CURLOPT_FORBID_REUSE => false, 
 
     CURLOPT_HTTPGET => true, 
 
     CURLOPT_RETURNTRANSFER => true, 
 
     CURLOPT_SSL_VERIFYPEER => false, 
 
     CURLOPT_CONNECTTIMEOUT => 10, 
 
     CURLOPT_TIMEOUT => 11, 
 
     CURLOPT_ENCODING => "" 
 
     //CURLOPT_REFERER=>'example.org', 
 
     //CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0' 
 
    ); 
 
    if (!array_key_exists(CURLOPT_COOKIEFILE, $custom_options_array)) { 
 
     //do this only conditionally because tmpfile() call.. 
 
     static $curl_cookiefiles_arr = array(); //workaround for https://bugs.php.net/bug.php?id=66014 
 
     $curl_cookiefiles_arr[]   = $options_array[CURLOPT_COOKIEFILE] = tmpfile(); 
 
     $options_array[CURLOPT_COOKIEFILE] = stream_get_meta_data($options_array[CURLOPT_COOKIEFILE]); 
 
     $options_array[CURLOPT_COOKIEFILE] = $options_array[CURLOPT_COOKIEFILE]['uri']; 
 

 
    } 
 
    //we can't use array_merge() because of how it handles integer-keys, it would/could cause corruption 
 
    foreach ($custom_options_array as $key => $val) { 
 
     $options_array[$key] = $val; 
 
    } 
 
    unset($key, $val, $custom_options_array); 
 
    $curl = curl_init(); 
 
    curl_setopt_array($curl, $options_array); 
 
    return $curl; 
 
} 
 

 

 

 

 
//------------------------------------------------// 
 

 
//Registration and VRM have to be valid and make as per the DVLA format for makes otehrwise this wont work! 
 

 
$registration_number = 'PK07LVD'; // must be a valid VRM to get the correct response 
 
$vehicle_maker ='BMW'; //Make must matchVRM held with DVLA and entered in same format as DVLA list on site 
 

 

 
$ch=hhb_curl_init(); 
 

 
$debugHeaders=array(); 
 
$debugCookies=array(); 
 
$debugRequest=''; 
 

 
$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); 
 
//first do an empty request to get a session id and cookies and the weird VIEWSTATE stuff... 
 
[email protected]::loadHTML($html); 
 
assert(is_object($domd)); 
 
$__VIEWSTATE=$domd->getElementById('__VIEWSTATE')->getAttribute('value'); 
 
$__VIEWSTATEGENERATOR=$domd->getElementById('__VIEWSTATEGENERATOR')->getAttribute('value'); 
 
$__EVENTVALIDATION=$domd->getElementById('__EVENTVALIDATION')->getAttribute('value'); 
 

 
//var_dump('__VIEWSTATE:',$__VIEWSTATE,'__VIEWSTATEGENERATOR:',$__VIEWSTATEGENERATOR,'__EVENTVALIDATION:',$__EVENTVALIDATION,'headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest,'domd:',$domd); 
 

 
//now to get the POST stuff 
 
curl_setopt_array($ch,array(
 
CURLOPT_POST=>true, 
 
CURLOPT_POSTFIELDS=>http_build_query(array(
 
'__LASTFOCUS'=>'', 
 
'__EVENTTARGET'=>'', 
 
'__VIEWSTATE'=>$__VIEWSTATE, 
 
'__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, 
 
'__EVENTVALIDATION'=>$__EVENTVALIDATION, 
 
'ctl00$MainContent$txtSearchVrm'=>'$registration_number', 
 
'ctl00$MainContent$MakeTextBox'=>'$vehicle_maker', 
 
'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 
 
'ctl00$MainContent$butSearch'=>'Search', 
 
)) 
 
)); 
 

 

 
$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); 
 
//var_dump('headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest); 
 

 
echo $html; 
 

 
?>

先週以上の何かが変更されていると私は$ htmlの中に返される結果を得ていないのです、何が起こっていることのすべては、車両登録を入力してくださいするためのフォームですが表示されています。お問い合わせフォームを送信していることを

変数は次のとおりです。

Array 
 
(
 
    [__EVENTTARGET] => 
 
    [__EVENTARGUMENT] => 
 
    [__VIEWSTATE] => /wEPDwUKMTQ2ODczMjQwMA8WAh4NU2VydmljZUhlYWRlcjL8BAABAAAA/////wEAAAAAAAAADAIAAABBRG1zIFNoYXJlZCwgVmVyc2lvbj0xLjAuMC4wLCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPW51bGwFAQAAACVEbXNTaGFyZWQuRGF0YUNvbnRyYWN0cy5Db21tb24uSGVhZGVyCQAAABNjb252ZXJzYXRpb25JZEZpZWxkD29yaWdpbkRhdGVGaWVsZBJhcHBsaWNhdGlvbkNERmllbGQOY2hhbm5lbENERmllbGQOY29udGFjdElkRmllbGQNZXZlbnRGbGdGaWVsZBJzZXJ2aWNlVHlwZUNERmllbGQPbGFuZ3VhZ2VDREZpZWxkDGVuZFVzZXJGaWVsZAEAAQEAAAQEBA0JAS5EbXNTaGFyZWQuRGF0YUNvbnRyYWN0cy5Db21tb24uU2VydmljZVR5cGVDb2RlAgAAACtEbXNTaGFyZWQuRGF0YUNvbnRyYWN0cy5Db21tb24uTGFuZ3VhZ2VDb2RlAgAAACZEbXNTaGFyZWQuRGF0YUNvbnRyYWN0cy5Db21tb24uRW5kVXNlcgIAAAACAAAACswmEsba79OIBgMAAAALRVZMX1NjcmVlbnMGBAAAAAhEVkxBX1dFQvPsDXUAAAAAAQX7////LkRtc1NoYXJlZC5EYXRhQ29udHJhY3RzLkNvbW1vbi5TZXJ2aWNlVHlwZUNvZGUBAAAAB3ZhbHVlX18ACAIAAAACAAAABfr///8rRG1zU2hhcmVkLkRhdGFDb250cmFjdHMuQ29tbW9uLkxhbmd1YWdlQ29kZQEAAAAHdmFsdWVfXwAIAgAAAAAAAAAKCxYCZg9kFgICAQ9kFgRmDxYCHgRUZXh0BQ9WZWhpY2xlIGVucXVpcnlkAgIPDxYCHgtOYXZpZ2F0ZVVybAUvaHR0cHM6Ly93aC5zbmFwc3VydmV5cy5jb20vcy5hc3A/az0xNDcwMjMwNjQwNTRkZBgBBSFjdGwwMCRNYWluQ29udGVudCRtdlZlaGljbGVTZWFyY2gPD2RmZAl8wP9HdAZERXThmPjkY7mMhrt6 
 
    [__VIEWSTATEGENERATOR] => CA0B0334 
 
    [__EVENTVALIDATION] => /wEdAAec98WnAVQeQUdqU6NI4oVRBOwywjxOOgpEYFN2beEgnftoCCZcWJSqSRLD/FKuxxkI0x5r4gPeKgWgSNWptTEWInv2PXI3Jzdn3U6eHDG4Qb7lltCXTdtnDbitYujbDJI0GQSIMiv32DreL6oRbYpQ8QTO8WJr3q5Y80Jf5PzdZW5VzdA= 
 
    [ctl00$MainContent$txtSearchVrm] => pk07lvd 
 
    [ctl00$MainContent$MakeTextBox] => bmw 
 
    [ctl00$MainContent$txtV5CDocumentReferenceNumber] => 
 
    [ctl00$MainContent$butSearch] => Search 
 
)

私は私のサーバーからリモートでフォームを実行して、フォームのPOSTにhttps://vehicleenquiry.service.gov.uk/に変数を持って、私は入手できますか必要な結果は返されますが、明らかに政府のウェブサイトを通じてブラウザに表示されます。

私は、おそらく何が起こっているのか、コードに間違いがあるのか​​、助けを求めています。なぜ私はcURLコーディングを実行すると、私は$ htmlで返された結果を取得していないが、最初の問い合わせフォームだけで、各段階を調査しようと私の髪を引き出す。

私はどんなass assにも感謝します。私は同様の投稿があったことを知っていますが、その問題を調査するために何が行われたのかについての詳細はあまりありません。新しいサイトのために、私は返信できませんでした。

// -------------------元のコードから編集する09/10/16 21:46 ------------ ----- //

このコードは別の機能の一部です。私はそれ以来、明確化のために優先される機能を削除しました。

// ---------------------------出力について---------- //

ある意味では、このコードは$ htmlが車両照会サービスからの応答を常に出力します。

リモートサイトが(https://vehicleenquiry.service.gov.uk)であるという問題は、POSTデータを処理して結果を返していないようですクエリのあたかもフォームデータを入力していないかのように、最初の照会フォームが返されます。


私は$ htmlの車両の詳細、税務、MOTの状態と応答が含まれていることを期待する:

代わりのcURLコマンドが返されるお問い合わせフォームで応答します。

+0

どのようにこの事を実行していますか? :) –

+0

関数dvlascrape($ send_array)は実際にはパラメータから何も使用しないので、パラメータは無用です。 私はこの関数を実行すると、curlから何のエラーもなく、いくつかのhtmlを受け取りますが、あまりにも正しく見えませんが、htmlを受け取ります。 var_dump(curl_error($ ch))を追加すると何が表示されますか。 return $ html;の前に –

+0

はいdvlascrape($ send_array)関数は残念です。このコードは別の操作の一部を形成していますが、これはその機能です。 –

答えて

0

まず間違い:

'ctl00$MainContent$txtSearchVrm'=>'$registration_number', 
'ctl00$MainContent$MakeTextBox'=>'$vehicle_maker', 

は次のようになります。

'ctl00$MainContent$txtSearchVrm'=>$registration_number, 
'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 

第二に間違い

$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); 

は次のようになります。

$html=hhb_curl_exec2($ch,'https://vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); 

全体コード:

<?php 
function dvlascrape($send_array) 
{ 

$make_trimmed=$send_array['2']; 
$vrm_strip=$send_array['0']; 



function hhb_curl_exec($ch, $url) 
{ 
    static $hhb_curl_domainCache = ""; 
    //$hhb_curl_domainCache=&$this->hhb_curl_domainCache; 
    //$ch=&$this->curlh; 
    if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { 
     throw new InvalidArgumentException('$ch must be a curl handle!'); 
    } 
    if (!is_string($url)) { 
     throw new InvalidArgumentException('$url must be a string!'); 
    } 

    $tmpvar = ""; 
    if (parse_url($url, PHP_URL_HOST) === null) { 
     if (substr($url, 0, 1) !== '/') { 
      $url = $hhb_curl_domainCache . '/' . $url; 
     } else { 
      $url = $hhb_curl_domainCache . $url; 
     } 
    } 
    ; 

    curl_setopt($ch, CURLOPT_URL, $url); 
    $html = curl_exec($ch); 
    if (curl_errno($ch)) { 
     throw new Exception('Curl error (curl_errno=' . curl_errno($ch) . ') on url ' . var_export($url, true) . ': ' . curl_error($ch)); 
     // echo 'Curl error: ' . curl_error($ch); 
    } 
    if ($html === '' && 203 != ($tmpvar = curl_getinfo($ch, CURLINFO_HTTP_CODE)) /*203 is "success, but no output"..*/) { 
     throw new Exception('Curl returned nothing for ' . var_export($url, true) . ' but HTTP_RESPONSE_CODE was ' . var_export($tmpvar, true)); 
    } 
    ; 
    //remember that curl (usually) auto-follows the "Location: " http redirects.. 
    $hhb_curl_domainCache = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), PHP_URL_HOST); 
    return $html; 
} 
function hhb_curl_exec2($ch, $url, &$returnHeaders = array(), &$returnCookies = array(), &$verboseDebugInfo = "") 
{ 
    $returnHeaders = array(); 
    $returnCookies = array(); 
    $verboseDebugInfo = ""; 
    if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { 
     throw new InvalidArgumentException('$ch must be a curl handle!'); 
    } 
    if (!is_string($url)) { 
     throw new InvalidArgumentException('$url must be a string!'); 
    } 
    $verbosefileh = tmpfile(); 
    $verbosefile = stream_get_meta_data($verbosefileh); 
    $verbosefile = $verbosefile['uri']; 
    curl_setopt($ch, CURLOPT_VERBOSE, 1); 
    curl_setopt($ch, CURLOPT_STDERR, $verbosefileh); 
    curl_setopt($ch, CURLOPT_HEADER, 1); 
    $html    = hhb_curl_exec($ch, $url); 
    $verboseDebugInfo = file_get_contents($verbosefile); 
    curl_setopt($ch, CURLOPT_STDERR, NULL); 
    fclose($verbosefileh); 
    unset($verbosefile, $verbosefileh); 
    $headers  = array(); 
    $crlf   = "\x0d\x0a"; 
    $thepos  = strpos($html, $crlf . $crlf, 0); 
    $headersString = substr($html, 0, $thepos); 
    $headerArr  = explode($crlf, $headersString); 
    $returnHeaders = $headerArr; 
    unset($headersString, $headerArr); 
    $htmlBody = substr($html, $thepos + 4); //should work on utf8/ascii headers... utf32? not so sure.. 
    unset($html); 
    //I REALLY HOPE THERE EXIST A BETTER WAY TO GET COOKIES.. good grief this looks ugly.. 
    //at least it's tested and seems to work perfectly... 
    $grabCookieName = function($str,&$len) 
    { 
     $len=0; 
     $ret = ""; 
     $i = 0; 
     for ($i = 0; $i < strlen($str); ++$i) { 
      ++$len; 
      if ($str[$i] === ' ') { 
       continue; 
      } 
      if ($str[$i] === '=') { 
       --$len; 
       break; 
      } 
      $ret .= $str[$i]; 
     } 
     return urldecode($ret); 
    }; 
    foreach ($returnHeaders as $header) { 
     //Set-Cookie: crlfcoookielol=crlf+is%0D%0A+and+newline+is+%0D%0A+and+semicolon+is%3B+and+not+sure+what+else 
     /*Set-Cookie:ci_spill=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22305d3d67b8016ca9661c3b032d4319df%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A14%3A%2285.164.158.128%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A109%3A%22Mozilla%2F5.0+%28Windows+NT+6.1%3B+WOW64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F43.0.2357.132+Safari%2F537.36%22%3Bs%3A13%3A%22last_activity%22%3Bi%3A1436874639%3B%7Dcab1dd09f4eca466660e8a767856d013; expires=Tue, 14-Jul-2015 13:50:39 GMT; path=/ 
     Set-Cookie: sessionToken=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT; 
     //Cookie names cannot contain any of the following '=,; \t\r\n\013\014' 
     // 
     */ 
     if (stripos($header, "Set-Cookie:") !== 0) { 
      continue; 
      /**/ 
     } 
     $header = trim(substr($header, strlen("Set-Cookie:"))); 
     $len=0; 
     while (strlen($header) > 0) { 
      $cookiename     = $grabCookieName($header,$len); 
      $returnCookies[$cookiename] = ''; 
      $header      = substr($header, $len + 1); //also remove the = 
      if (strlen($header) < 1) { 
       break; 
      } 
      ; 
      $thepos = strpos($header, ';'); 
      if ($thepos === false) { //last cookie in this Set-Cookie. 
       $returnCookies[$cookiename] = urldecode($header); 
       break; 
      } 
      $returnCookies[$cookiename] = urldecode(substr($header, 0, $thepos)); 
      $header      = trim(substr($header, $thepos + 1)); //also remove the ; 
     } 
    } 
    unset($header, $cookiename, $thepos); 
    return $htmlBody; 
} 

############################################################## 
function hhb_curl_init($custom_options_array = array()) 
{ 
    if (empty($custom_options_array)) { 
     $custom_options_array = array(); 
     //i feel kinda bad about this.. argv[1] of curl_init wants a string(url), or NULL 
     //at least i want to allow NULL aswell :/ 
    } 
    if (!is_array($custom_options_array)) { 
     throw new InvalidArgumentException('$custom_options_array must be an array!'); 
    } 
    ; 
    $options_array = array(
     CURLOPT_AUTOREFERER => true, 
     CURLOPT_BINARYTRANSFER => true, 
     CURLOPT_COOKIESESSION => true, 
     CURLOPT_FOLLOWLOCATION => true, 
     CURLOPT_FORBID_REUSE => false, 
     CURLOPT_HTTPGET => true, 
     CURLOPT_RETURNTRANSFER => true, 
     CURLOPT_SSL_VERIFYPEER => false, 
     CURLOPT_CONNECTTIMEOUT => 10, 
     CURLOPT_TIMEOUT => 11, 
     CURLOPT_ENCODING => "" 
     //CURLOPT_REFERER=>'example.org', 
     //CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0' 
    ); 
    if (!array_key_exists(CURLOPT_COOKIEFILE, $custom_options_array)) { 
     //do this only conditionally because tmpfile() call.. 
     static $curl_cookiefiles_arr = array(); //workaround for https://bugs.php.net/bug.php?id=66014 
     $curl_cookiefiles_arr[]   = $options_array[CURLOPT_COOKIEFILE] = tmpfile(); 
     $options_array[CURLOPT_COOKIEFILE] = stream_get_meta_data($options_array[CURLOPT_COOKIEFILE]); 
     $options_array[CURLOPT_COOKIEFILE] = $options_array[CURLOPT_COOKIEFILE]['uri']; 

    } 
    //we can't use array_merge() because of how it handles integer-keys, it would/could cause corruption 
    foreach ($custom_options_array as $key => $val) { 
     $options_array[$key] = $val; 
    } 
    unset($key, $val, $custom_options_array); 
    $curl = curl_init(); 
    curl_setopt_array($curl, $options_array); 
    return $curl; 
} 




//------------------------------------------------// 

//Registration and VRM have to be valid and make as per the DVLA format for makes otehrwise this wont work! 

$registration_number = 'PK07LVD'; // must be a valid VRM to get the correct response 
$vehicle_maker ='BMW'; //Make must matchVRM held with DVLA and entered in same format as DVLA list on site 


$ch=hhb_curl_init(); 

$debugHeaders=array(); 
$debugCookies=array(); 
$debugRequest=''; 

$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); 
//first do an empty request to get a session id and cookies and the weird VIEWSTATE stuff... 
[email protected]::loadHTML($html); 
assert(is_object($domd)); 
$__VIEWSTATE=$domd->getElementById('__VIEWSTATE')->getAttribute('value'); 
$__VIEWSTATEGENERATOR=$domd->getElementById('__VIEWSTATEGENERATOR')->getAttribute('value'); 
$__EVENTVALIDATION=$domd->getElementById('__EVENTVALIDATION')->getAttribute('value'); 

//var_dump('__VIEWSTATE:',$__VIEWSTATE,'__VIEWSTATEGENERATOR:',$__VIEWSTATEGENERATOR,'__EVENTVALIDATION:',$__EVENTVALIDATION,'headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest,'domd:',$domd); 
var_dump($__VIEWSTATE); 
//now to get the POST stuff 
curl_setopt_array($ch,array(
CURLOPT_POST=>true, 
CURLOPT_POSTFIELDS=>http_build_query(array(
'__LASTFOCUS'=>'', 
'__EVENTTARGET'=>'', 
'__EVENTARGUMENT'=>'', 
'__VIEWSTATE'=>$__VIEWSTATE, 
'__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, 
'__EVENTVALIDATION'=>$__EVENTVALIDATION, 
'ctl00$MainContent$txtSearchVrm'=>$registration_number, 
'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 
'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 
'ctl00$MainContent$butSearch'=>'Search', 
)) 
)); 
var_dump(
array(
'__LASTFOCUS'=>'', 
'__EVENTTARGET'=>'', 
'__EVENTARGUMENT'=>'', 
'__VIEWSTATE'=>$__VIEWSTATE, 
'__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, 
'__EVENTVALIDATION'=>$__EVENTVALIDATION, 
'ctl00$MainContent$txtSearchVrm'=>$registration_number, 
'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 
'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 
'ctl00$MainContent$butSearch'=>'Search', 
)); 

$html=hhb_curl_exec2($ch,'https://vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); 
//var_dump(curl_error ($ch)) ; 
//var_dump('headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest); 

return $html; 






} 

var_dump(dvlascrape(array('', '', ''))); 
?> 
+0

優れています - パベルありがとうございました....私が気づいていない最も簡単なことはいつも信じられません。 –

+0

このコードは動作しましたが、数週間前にサイトアドレスを変更してwwwを削除する必要があります。 木の木が見えないことがあります。 もう一度。ありがとうございますD –

+0

最新のアップデートの実例がありますか?新しいURLはhttps://vehicleenquiry.service.gov.uk/ViewVehicleです – ServerSideSkittles

関連する問題