PHP 简易爬虫

PHP 简易爬虫

function get_urls($url)

{

$url_array=array();

$the_first_content=file_get_contents($url);

$the_second_content=file_get_contents($url);

$pattern1="/http:\/\/[a-zA-Z0-9\.\?\/\-\=\&\:\+\-\_\'\"]+/";

$pattern2="/http:\/\/[a-zA-Z0-9\.]+/";

preg_match_all($pattern2,$the_second_content,$matches2);

preg_match_all($pattern1,$the_first_content,$matches1);

$new_array1=array_unique($matches1[0]);

$new_array2=array_unique($matches2[0]);

$final_array=array_merge($new_array1,$new_array2);

$final_array=array_unique($final_array);

for($i=0;$i

{

echo$final_array[$i]."

";

}

}

get_urls("http://www.baidu.com");