推荐:《PHP教程》
先来看下同步的代码以及请求时间。
$start_time=date(h:i:sa); for ($i=0; $i <100 ; $i++) { $urls[]=http://www.downxia.com/downinfo/2315.$i..html; GetTitle(geturl(http://www.downxia.com/downinfo/2315.$i..html)); } function geturl($url){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $output = curl_exec($ch); curl_close($ch); return $output; } function GetTitle($output){ preg_match('/<title>.*<\/title>/i',$output,$matches); var_dump($matches[0]); } $end_time=date(h:i:sa); echo '开始时间是:'.$start_time; echo '结束时间是:'.$end_time;
最下面可以看到时间花了27秒。
接下来看下PHP curl 异步并发请求http的代码以及花费时间。
$start_time=date(h:i:sa); $urls=[]; for ($i=0; $i <100 ; $i++) { $urls[]=http://www.downxia.com/downinfo/2315.$i..html; } var_dump($urls); // GetTitle('klasjdkla<title>313asds12</title>'); rolling_curl($urls,'GetTitle'); function GetTitle($output){ preg_match('/<title>.*<\/title>/i',$output,$matches); var_dump($matches[0]); } $end_time=date(h:i:sa); echo '开始时间是:'.$start_time; echo '结束时间是:'.$end_time; function rolling_curl($urls, $callback, $custom_options = null) {//多个url访问 // make sure the rolling window isn't greater than the # of urls $rolling_window = 5; $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window; $master = curl_multi_init(); $curl_arr = array(); // add additional curl options here $std_options = array( CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDirs => 5 ); $options = ($custom_options) ? ($std_options + $custom_options) : $std_options; // start the first batch of requests for ($i = 0; $i < $rolling_window; $i++) { $ch = curl_init(); $options[CURLOPT_URL] = $urls[$i]; curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); } do { while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM); if ($execrun != CURLM_OK) { break; } // a request was just completed -- find out which one while ($done = curl_multi_info_read($master)) { $info = curl_getinfo($done['handle']); if ($info['http_code'] == 200) { $output = curl_multi_getcontent($done['handle']); // request successful. process output using the callback function. $callback($output); // start a new request (it's important to do this before removing the old one) $ch = curl_init(); $options[CURLOPT_URL] = $urls[$i++]; // increment i curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); // remove the curl handle that just completed curl_multi_remove_handle($master, $done['handle']); } else { // request Failed. add error handling. } } } while ($running); curl_multi_close($master); return true; }
才花了3秒?实际上我感觉应该是花了5秒,因为启动比同步要慢,开始的时候卡了2秒。
http请求效率,毋庸置疑是异步远远高于同步。
核心请求代码如下:(这是老外写的,有点小问题,最后的提示undefined offset)
function rolling_curl($urls, $callback, $custom_options = null) {//多个url访问 // make sure the rolling window isn't greater than the # of urls $rolling_window = 5; $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window; $master = curl_multi_init(); $curl_arr = array(); // add additional curl options here $std_options = array( CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDirs => 5 ); $options = ($custom_options) ? ($std_options + $custom_options) : $std_options; // start the first batch of requests for ($i = 0; $i < $rolling_window; $i++) { $ch = curl_init(); $options[CURLOPT_URL] = $urls[$i]; curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); } do { while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM); if ($execrun != CURLM_OK) { break; } // a request was just completed -- find out which one while ($done = curl_multi_info_read($master)) { $info = curl_getinfo($done['handle']); if ($info['http_code'] == 200) { $output = curl_multi_getcontent($done['handle']); // request successful. process output using the callback function. $callback($output); // start a new request (it's important to do this before removing the old one) $ch = curl_init(); $options[CURLOPT_URL] = $urls[$i++]; // increment i curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); // remove the curl handle that just completed curl_multi_remove_handle($master, $done['handle']); } else { // request Failed. add error handling. } } } while ($running); curl_multi_close($master); return true; }
修改一下。只要在新增url的时候加个判断就好了。// 当$i等于$urls数组大小时不用再增加了。
function rolling_curl($urls, $callback, $custom_options = null) {//多个url访问 // make sure the rolling window isn't greater than the # of urls $rolling_window = 5; $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window; $master = curl_multi_init(); $curl_arr = array(); // add additional curl options here $std_options = array( CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDirs => 5 ); $options = ($custom_options) ? ($std_options + $custom_options) : $std_options; // start the first batch of requests for ($i = 0; $i < $rolling_window; $i++) { $ch = curl_init(); $options[CURLOPT_URL] = $urls[$i]; curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); } do { while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM); if ($execrun != CURLM_OK) { break; } // a request was just completed -- find out which one while ($done = curl_multi_info_read($master)) { $info = curl_getinfo($done['handle']); if ($info['http_code'] == 200) { $output = curl_multi_getcontent($done['handle']); // request successful. process output using the callback function. $callback($output); // start a new request (it's important to do this before removing the old one) // 当$i等于$urls数组大小时不用再增加了 if($i<sizeof($urls)){ $ch = curl_init(); $options[CURLOPT_URL] = $urls[$i++]; // increment i curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); } // remove the curl handle that just completed curl_multi_remove_handle($master, $done['handle']); } else { // request Failed. add error handling. } } } while ($running); curl_multi_close($master); return true; }
以上,结束。记录一下,以免自己忘记。
原文地址:https://www.jb51.cc/php/1210292.html
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。