php 利用php抓取批量关键词百度推广广告中网址保存在txt文件中
利用php抓取批量关键词百度推广广告中网址保存在txt文件中 结合服务器的定时任务可以定时查找关键词广告的竞争程度[代码片段(85行)]
结合服务器的定时任务可以定时查找关键词广告的竞争程度
<?php $fp = @fopen ( "semallurl.txt", "a+" ); $kws1 = "上海酒店,北京酒店,广州酒店,天津酒店,广州酒店"; $kws = explode ( ",", $kws1 ); foreach ( $kws as $kw ) { $keywords = $kw; $enkeywords = urlencode ( $keywords ); $pageURL = "http://www.baidu.com/s?word=$enkeywords"; $contents = fetch ( $pageURL ); /* 抓取页面 */ $contents = preg_replace ( '/<script[^>]*?>.*?<\\/script>/', "", $contents ); /* * * 去掉js代码 */ $contents_left = ""; $contents_right = ""; $ads_left_green = ""; $ads_left_white = ""; $contentsbytwoside = ""; $ads_right = ""; /* 变量初始化 */ $contentsbytwoside = explode ( '<div id="content_', $contents ); $contents_right = $contentsbytwoside [1]; $contents_right = '<div id="content_' . $contents_right; /* 搜索结果右边部分 */ $contents_left = $contentsbytwoside [2]; $contents_left = '<div id="content_' . $contents_left; /* 搜索结果左边部分 */ preg_match_all ( '/(<div id=\\"[0-9]*\\" class=\\"ec_pp_f ec_pp_top.*?)<a href=\\"http:\\/\\/e\\.baidu\\.com\\//', $contents_left, $ads_left_white ); preg_match_all ( '/(<table class=\\"EC_mr15 EC_ppim_top ec_pp_f.*?<\\/table>)/', $contents_left, $ads_left_green ); preg_match_all ( '(<div id=\\"bdfs[^>]*class=\\"EC_im EC_fr EC_PP EC_idea1017 \\">.*?<a class=\\"EC_BL EC_desc\\".*?<\\/a>)', $contents_right, $ads_right ); echo "------------Keywords ads for" . $kw . "start ------------------------------------<br>"; fwrite ( $fp, "----------" . $kw . " ads start------------------------- \\r\\n" ); echo "left ads with green background is<br>"; /* print_r($ads_left_green[0]); */ foreach ( $ads_left_green [0] as $tg1 ) { preg_match ( '/<span>.*?<\\/span>/', $tg1, $tg11 ); fwrite ( $fp, strip_tags ( $tg11 [0] ) . "\\r\\n" ); echo $tg11 [0] . "<br>"; } ; echo "<p>-------------<br>"; echo "left ads with white background is<br>"; /* print_r($ads_left_white[0]); */ foreach ( $ads_left_white [0] as $tg2 ) { preg_match ( '/<span class=\\"ec_url\\">.*?<\\/span>/', $tg2, $tg22 ); fwrite ( $fp, strip_tags ( $tg22 [0] ) . "\\r\\n" ); echo $tg22 [0] . "<br>"; } ; echo "<p>-------------<br>"; echo "right ads with is<br>"; /* print_r($ads_right[0]); */ foreach ( $ads_right [0] as $tg3 ) { preg_match ( '/(<font size=\\"-1\\" class=\\"EC_url\\">.*?<\\/font>)/', $tg3, $tg33 ); fwrite ( $fp, strip_tags ( $tg33 [0] ) . "\\r\\n" ); echo $tg33 [0] . "<br>"; } ; echo "---------------Keywords ads for" . $kw . "END ------------------------------------<br>"; fwrite ( $fp, "----------" . $kw . " ads End------------------------- \\r\\n" ); } ; fwrite ( $fp, date ( "Y-m-d H:i:s" ) . " PHP代码自动运行!\\r\\n" ); fclose ( $fp ); function fetch($Date) { $ch = curl_init (); $timeout = 5; curl_setopt ( $ch, CURLOPT_URL, "$Date" ); curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)" ); curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, $timeout ); $contents = curl_exec ( $ch ); curl_close ( $ch ); return $contents; } ?> //该片段来自于http://outofmemory.cn
- 上一篇:php json_encode 乱码
- 下一篇:php 计算 生肖
精彩图集
精彩文章