最近打算做个tiktok uid 爬虫技术所以特意搜下相关PHP cURL爬虫技术
1.使用方式:http://api.98lm.com/debug.php?url=www.98lm.com
2.使用curl爬虫采集
3.使用json封装返回结果
<?php
//header('content-type:text/html;charset=utf-8');
function curlPost($url,$data,$method){
$ch = curl_init(); //1.初始化
curl_setopt($ch, CURLOPT_URL, $url); //2.请求地址
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);//3.请求方式
//4.参数如下
curl_setopt($ch, CURLOPT_HEADER, 0);//是否显示头信息
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);//是否自动显示返回的信息
//curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);//https
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)');//模拟浏览器
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER,array('Accept-Encoding: gzip, deflate'));//gzip解压内容
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
if($method=="POST"){//5.post方式的时候添加数据
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
}
if($method=="GET"){//5.post方式的时候添加数据
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
}
$tmpInfo = curl_exec($ch);//6.执行
if (curl_errno($ch)) {//7.如果出错
return curl_error($ch);
}
curl_close($ch);//8.关闭
return $tmpInfo;
}
/*封装返回json数据*/
function jsonReturn($code = 200,$msg='',$data = null)
{
$Result['code'] = $code;
$Result['msg'] = $msg ? $msg : '';
$Result['msg'] = iconv('GB2312', 'UTF-8', $Result['msg'] );
// $Result['msg']= json_decode( $Result['msg']);
if($data !== null) $Result['data'] = $data;
if(($Result = json_encode($Result,JSON_UNESCAPED_UNICODE)) === false){
switch(json_last_error()){
case JSON_ERROR_NONE: exit('JSON_ERROR_NONE');
case JSON_ERROR_DEPTH: exit('JSON_ERROR_DEPTH');
case JSON_ERROR_STATE_MISMATCH: exit('JSON_ERROR_STATE_MISMATCH');
case JSON_ERROR_CTRL_CHAR: exit('JSON_ERROR_CTRL_CHAR');
case JSON_ERROR_SYNTAX: exit('JSON_ERROR_SYNTAX');
case JSON_ERROR_UTF8: exit('JSON_ERROR_UTF8');
case JSON_ERROR_RECURSION: exit('JSON_ERROR_RECURSION');
case JSON_ERROR_INF_OR_NAN: exit('JSON_ERROR_INF_OR_NAN');
case JSON_ERROR_UNSUPPORTED_TYPE: exit('JSON_ERROR_UNSUPPORTED_TYPE');
case JSON_ERROR_INVALID_PROPERTY_NAME: exit('JSON_ERROR_INVALID_PROPERTY_NAME');
case JSON_ERROR_UTF16: exit('JSON_ERROR_UTF16');
default: exit('JSON_ERROR_UNKNOWN');
}
}
// 返回JSON数据格式到客户端 包含状态信息
header('Content-Type:application/json; charset=utf-8');
//跨域请求
//header('Access-Control-Allow-Origin:*');
// exit($Result);
// $Result= json_decode($Result);
exit($Result);
}
//header('Content-type:text/json');
if($_REQUEST['url']!="")
{
$tk_url=$_REQUEST['url'];
//jsonReturn(1,'获取数据成功',$tk_url);
} else {
jsonReturn(-1,'No url 获取数据失败');
exit(0);
}
//$cookie = dirname(__FILE__) . '/cookie_oschina.txt'; //设置cookie保存路径
$data=array('name' => '1234');
//$url="http://www.98lm.com/";
$method="GET";
$file=curlPost($tk_url,$data,$method);
//$file=mb_convert_encoding($file,'UTF-8','GBK');
preg_match('/<i class="iconfont icon-daohang2"><\/i>(.*?)<\/a><i class="line">/',$file, $match);
jsonReturn(1,'获取数据成功',$match[1]);

老马黑科技


