爬了一个宠物网站:
用的是php代码
框架tp5
代码如下:
<?php
namespace app\api\controller;
use fast\Http;//引入这个类
use app\common\controller\Api;
/**
* 示例接口
*/
class Demo extends Api
{
//如果$noNeedLogin为空表示所有接口都需要登录才能请求
//如果$noNeedRight为空表示所有接口都需要验证权限才能请求
//如果接口已经设置无需登录,那也就无需鉴权了
//
// 无需登录的接口,*表示全部
protected $noNeedLogin = ['*'];
// 无需鉴权的接口,*表示全部
protected $noNeedRight = ['*'];
/**
* 测试方法
*
* @ApiTitle (测试名称)
* @ApiSummary (测试描述信息)
* @ApiMethod (POST)
* @ApiRoute (/api/demo/test/id/{id}/name/{name})
* @ApiHeaders (name=token, type=string, required=true, description="请求的Token")
* @ApiParams (name="id", type="integer", required=true, description="会员ID")
* @ApiParams (name="name", type="string", required=true, description="用户名")
* @ApiParams (name="data", type="object", sample="{'user_id':'int','user_name':'string','profile':{'email':'string','age':'integer'}}", description="扩展数据")
* @ApiReturnParams (name="code", type="integer", required=true, sample="0")
* @ApiReturnParams (name="msg", type="string", required=true, sample="返回成功")
* @ApiReturnParams (name="data", type="object", sample="{'user_id':'int','user_name':'string','profile':{'email':'string','age':'integer'}}", description="扩展数据返回")
* @ApiReturn ({
'code':'1',
'msg':'返回成功'
})
*/
public function test()
{
$this->domain = 'http://www.boqii.com';
$type=['dog'=>'狗狗','cat'=>'猫咪','smallpet'=>'小宠','aquarium'=>'水族','reptile'=>'爬虫'];
$now_path = getcwd();//目录
// halt(__FILE__);
foreach ($type as $enname => $zhname) {//循环分类
$url = $this->domain.'/pet-all/'.$enname.'/';
echo $zhname;
// $get = $Http->get($this->domain.'/pet-all/dog/');
$max_page = $this->pd_max_page($url);
$url_page = $url.'?p='.$max_page;
// 当前的宠物类别的目录
$cwpath = $now_path.'/cw/'.$zhname;
$this->pd_dir($cwpath);
for ($i=1; $i <=intval($max_page) ; $i++) { //循环页码
//循环下图片
//打开网页 获取源码 下载图片
$yuanma = $this->get($url.'?p='.$i);
$img_arr = $this->re_img($yuanma);
foreach($img_arr as $name=>$vurl){
//
// exit();
$path = $cwpath.'/'.$name;
// file_put_contents($path,$vurl);
$this->download($vurl,$cwpath,$name.".png",$name);
}
}
// dump($max_page);
sleep(4);
echo "<br>{$zhname}结束了";
}
// $this->success('返回成功QQ496631085', $this->request->param());
}
/**
* 找最大页码
*
*/
public function pd_max_page($url)
{
// $Http= new Http();
// $yuanma = $Http->get($url);
$yuanma = $this->get($url);
//正则pet-all/dog/?p=2'
$lastPattern ='/\?p\=([0-9]{1,2})/';
preg_match_all($lastPattern, $yuanma, $end);//找期数
$max = 1;
foreach ($end[1] as $key => $value) {
if(intval($value)>$max){
$max = $value;
}
}
return $max;
}
/**
* 正则图片地址
*
*/
public function re_img($str)
{//<img alt="柯基犬" src="http://img.boqiicdn.com/Data/BK/P/imagick3061541499071.png" />
//<a target="_blank" href="http://www.boqii.com/entry/detail/425.html"><img alt="泰迪犬" src="http://img.boqiicdn.com/Data/BK/P/imagick441541497241.jpg" /></a>
$re ='/\<a .*?href="(.*?)"\>\<img alt="(.*?)" src="(.*?)" \/\>\<\/a\>/';
preg_match_all($re, $str, $end);//找期数
if($end){
$all = array();
foreach($end[2] as $key => $zhname){
$all[$zhname] = $end[3][$key];
}
return $all;
}else{
return 0;
}
}
//判断目录是否存在,不存在就创建
public function pd_dir($path){
if (!is_dir($path)){
//第三个参数是“true”表示能创建多级目录,iconv防止中文目录乱码
$res=mkdir(iconv("UTF-8", "GBK", $path),0777,true);
if ($res){
echo "目录 $path 创建成功<br>";
}else{
echo "目录 $path 创建失败";
exit();
}
}
}
//get获取源码
public function get($url){
$Http= new Http();
$yuanma = $Http->get($url);
return $yuanma;
}
//下载图片的方法 ,待优化,随便拿取的一个,
public function download($url, $save_dir = './cw/',$filename='',$name)
{
if(trim($save_dir)=='')
$save_dir='./cw/';
if(trim($filename)==''){//保存文件名
$allowExt = array('gif', 'jpg', 'jpeg', 'png', 'bmp');
$ext=strrchr($url,'.');
if(!in_array($ext,$allowExt))
return array('file_name'=>'','save_path'=>'','error'=>3);
$filename=time().$ext;
}
if(0!==strrpos($save_dir,'/'))
$save_dir.='/';
//创建保存目录
if(!file_exists($save_dir)&&!mkdir($save_dir,0777,true))
return array('file_name'=>'','save_path'=>'','error'=>5);
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // 信任任何证书
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
$file = curl_exec($ch);
curl_close($ch);
$filename = pathinfo($url);
$resource = fopen($save_dir . $name.'.'.$filename['extension'], 'a');
fwrite($resource, $file);
fclose($resource);
unset($file,$url);
// return array('file_name'=>$filename,'save_path'=>$save_dir.$filename,'error'=>0);
}
}
一个功能块一个方法 ,这样后期方便调用 修改
有什么不懂的可以咨询!