PHP 抓取网页

通过程序代码抓取网页的源代码解析处理。
在线编译运行 PHP 工具
PHP 菜鸟教程

字符串中过滤数字

侵删转自:码农教程

<?php
$str = 'In My Cart : 11 12 items';
preg_match_all('!\d+!', $str, $matches);
print_r($matches);
?>
Array
(
    [0] => Array
        (
            [0] => 11
            [1] => 12
        )
)

查找表达式示例

转自侵删:菜鸟教程

<?php
$userinfo = "Name: <b>PHP</b> <br> Title: <b>Programming Language</b>";
preg_match_all ("/<b>(.*)<\/b>/U", $userinfo, $pat_array);
print_r($pat_array[0]);
?>
Array
(
    [0] => <b>PHP</b>
    [1] => <b>Programming Language</b>
)
<?php
//\\2是一个后向引用的示例. 这会告诉pcre它必须匹配正则表达式中第二个圆括号(这里是([\w]+))
//匹配到的结果. 这里使用两个反斜线是因为这里使用了双引号.
$html = "<b>bold text</b><a href=howdy.html>click me</a>";
preg_match_all("/(<([\w]+)[^>]*>)(.*?)(<\/\\2>)/", $html, $matches, PREG_SET_ORDER);
foreach ($matches as $val) {
    echo "matched: " . $val[0] . "\n";
    echo "part 1: " . $val[1] . "\n";
    echo "part 2: " . $val[2] . "\n";
    echo "part 3: " . $val[3] . "\n";
    echo "part 4: " . $val[4] . "\n\n";
}
?>
matched: <b>bold text</b>
part 1: <b>
part 2: b
part 3: bold text
part 4: </b>
matched: <a href=howdy.html>click me</a>
part 1: <a href=howdy.html>
part 2: a
part 3: click me
part 4: </a>

抓取解析网页源码

侵删转自:简书博客

<?php
// 定义url
$url = 'http://t.qq.com';
// fiel函数读取内容数组
$lines_array = file($url);
// 拆分数组为字符串 
$lines_string = implode('',$lines_array);
// 输出内容,嘿嘿,大家也可以保存在自己的服务器上
echo $lines_string;

PHP抓取远程图片到本地保存

侵删转自:奇幻屋博客

/**
 * PHP将网页上的图片攫取到本地存储
 * @param $imgUrl  图片url地址
 * @param string $saveDir 本地存储路径 默认存储在当前路径
 * @param null $fileName 图片存储到本地的文件名
 * @return mix
 */
<?php
function crabImage($imgUrl, $saveDir='./', $fileName=null){
	if(empty($imgUrl)){
		return false;
	}
	//获取图片信息大小
	$imgSize = getImageSize($imgUrl);
	if(!in_array($imgSize['mime'],array('image/jpg', 'image/gif', 'image/png', 'image/jpeg'),true)){
		return false;
	}
	//获取后缀名
	$_mime = explode('/', $imgSize['mime']);
	$_ext = '.'.end($_mime);
	if(empty($fileName)){  //生成唯一的文件名
		$fileName = uniqid(time(),true).$_ext;
	}
	//开始攫取
	ob_start();
	readfile($imgUrl);
	$imgInfo = ob_get_contents();
	ob_end_clean();
	if(!file_exists($saveDir)){
		mkdir($saveDir,0777,true);
	}
	$fp = fopen($saveDir.$fileName, 'a');
	$imgLen = strlen($imgInfo);    //计算图片源码大小
	$_inx = 1024;   //每次写入1k
	$_time = ceil($imgLen/$_inx);
	for($i=0; $i<$_time; $i++){
		fwrite($fp,substr($imgInfo, $i*$_inx, $_inx));
	}
	fclose($fp);
	return array('file_name'=>$fileName,'save_path'=>$saveDir.$fileName);
}
$url = 'https://mmbiz.qlogo.cn/mmbiz/7WQtTI9h56hgvOH8J0Xp5v97cDNOxf94vq0NdNOhZmb2ZiaJLfwd2U8gNoEvTQXdWlRPOuibkkSebexmR2epE0pQ/0?wx_fmt=gif';
//$url = 'http://www.phpernote.com/images/logo.gif';
//$url = 'http://avatar.csdn.net/5/3/6/1_u014236259.jpg';
var_dump( crabImage($url) );

通过接口获取实时货币汇率

侵删转自:点点外贸

<?php
function currency($from_Currency, $to_Currency, $amount) {
    $url = "https://www.google.com/finance/converter?a=" . $amount . "&from=" . $from_Currency . "&to=" . $to_Currency;
    $ch = curl_init();
    $timeout = 0;
    curl_setopt ($ch, CURLOPT_URL, $url);
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    $rawdata = curl_exec($ch);
    curl_close($ch);
    $matches = array();
    preg_match_all("|<span class=bld>(.*)</span>|U", $rawdata, $matches);
    $result = explode(" ", $matches[1][0]);
    return round($result[0], 2);
}
echo $this->currency("USD", "CNY", 1);die;