最近打算做个tiktok uid 爬虫技术所以特意搜下相关PHP cURL爬虫技术
1.使用方式:http://api.98lm.com/debug.php?url=www.98lm.com
2.使用curl爬虫采集
3.使用json封装返回结果
<?php //header('content-type:text/html;charset=utf-8'); function curlPost($url,$data,$method){ $ch = curl_init(); //1.初始化 curl_setopt($ch, CURLOPT_URL, $url); //2.请求地址 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);//3.请求方式 //4.参数如下 curl_setopt($ch, CURLOPT_HEADER, 0);//是否显示头信息 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);//是否自动显示返回的信息 //curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);//https curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)');//模拟浏览器 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_AUTOREFERER, 1); curl_setopt($ch, CURLOPT_HTTPHEADER,array('Accept-Encoding: gzip, deflate'));//gzip解压内容 curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate'); if($method=="POST"){//5.post方式的时候添加数据 curl_setopt($ch, CURLOPT_POSTFIELDS, $data); } if($method=="GET"){//5.post方式的时候添加数据 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); } $tmpInfo = curl_exec($ch);//6.执行 if (curl_errno($ch)) {//7.如果出错 return curl_error($ch); } curl_close($ch);//8.关闭 return $tmpInfo; } /*封装返回json数据*/ function jsonReturn($code = 200,$msg='',$data = null) { $Result['code'] = $code; $Result['msg'] = $msg ? $msg : ''; $Result['msg'] = iconv('GB2312', 'UTF-8', $Result['msg'] ); // $Result['msg']= json_decode( $Result['msg']); if($data !== null) $Result['data'] = $data; if(($Result = json_encode($Result,JSON_UNESCAPED_UNICODE)) === false){ switch(json_last_error()){ case JSON_ERROR_NONE: exit('JSON_ERROR_NONE'); case JSON_ERROR_DEPTH: exit('JSON_ERROR_DEPTH'); case JSON_ERROR_STATE_MISMATCH: exit('JSON_ERROR_STATE_MISMATCH'); case JSON_ERROR_CTRL_CHAR: exit('JSON_ERROR_CTRL_CHAR'); case JSON_ERROR_SYNTAX: exit('JSON_ERROR_SYNTAX'); case JSON_ERROR_UTF8: exit('JSON_ERROR_UTF8'); case JSON_ERROR_RECURSION: exit('JSON_ERROR_RECURSION'); case JSON_ERROR_INF_OR_NAN: exit('JSON_ERROR_INF_OR_NAN'); case JSON_ERROR_UNSUPPORTED_TYPE: exit('JSON_ERROR_UNSUPPORTED_TYPE'); case JSON_ERROR_INVALID_PROPERTY_NAME: exit('JSON_ERROR_INVALID_PROPERTY_NAME'); case JSON_ERROR_UTF16: exit('JSON_ERROR_UTF16'); default: exit('JSON_ERROR_UNKNOWN'); } } // 返回JSON数据格式到客户端 包含状态信息 header('Content-Type:application/json; charset=utf-8'); //跨域请求 //header('Access-Control-Allow-Origin:*'); // exit($Result); // $Result= json_decode($Result); exit($Result); } //header('Content-type:text/json'); if($_REQUEST['url']!="") { $tk_url=$_REQUEST['url']; //jsonReturn(1,'获取数据成功',$tk_url); } else { jsonReturn(-1,'No url 获取数据失败'); exit(0); } //$cookie = dirname(__FILE__) . '/cookie_oschina.txt'; //设置cookie保存路径 $data=array('name' => '1234'); //$url="http://www.98lm.com/"; $method="GET"; $file=curlPost($tk_url,$data,$method); //$file=mb_convert_encoding($file,'UTF-8','GBK'); preg_match('/<i class="iconfont icon-daohang2"><\/i>(.*?)<\/a><i class="line">/',$file, $match); jsonReturn(1,'获取数据成功',$match[1]);