<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Kaptain Krayola &#187; Scraping</title>
	<atom:link href="http://www.kaptainkrayola.com/category/tools/scraping/feed/" rel="self" type="application/rss+xml" />
	<link>http://www.kaptainkrayola.com</link>
	<description>You daily dose of internet destruction</description>
	<lastBuildDate>Fri, 05 Feb 2010 22:42:42 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.9.1</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>PHP WordPress Blog Finder</title>
		<link>http://www.kaptainkrayola.com/php-wordpress-blog-finder/</link>
		<comments>http://www.kaptainkrayola.com/php-wordpress-blog-finder/#comments</comments>
		<pubDate>Fri, 25 May 2007 15:42:33 +0000</pubDate>
		<dc:creator>The Kaptain</dc:creator>
				<category><![CDATA[Scraping]]></category>
		<category><![CDATA[Tools]]></category>

		<guid isPermaLink="false">http://www.kaptainkrayola.com/php-wordpress-blog-finder/</guid>
		<description><![CDATA[This little script will automate the discovery of WordPress blogs for you.  It could actually be used to harvest links to anything you want from Google SERPs but for this example it&#8217;s used to find WordPress blogs.  You would just need to add different search terms to the array and it will return [...]]]></description>
			<content:encoded><![CDATA[<p>This little script will automate the discovery of WordPress blogs for you.  It could actually be used to harvest links to anything you want from Google SERPs but for this example it&#8217;s used to find WordPress blogs.  You would just need to add different search terms to the array and it will return whatever you are looking for.  It currently only works with Google but support for the other engines may be added in the near future.  You can specify as many search strings as you want and the script will randomly choose one each time it&#8217;s run.</p>
<p>The script requires that you give it a keyword to use in the search so you can find blogs that are related to the topic of your site(s). </p>
<p>The function will return an array of URLs that you can use however you see fit.</p>
<p>This script requires that you have the Snoopy Class.</p>

<div class="wp_syntax"><table><tr><td class="line_numbers"><pre>1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
</pre></td><td class="code"><pre class="php" style="font-family:monospace;"><span style="color: #b1b100;">include</span><span style="color: #009900;">&#40;</span><span style="color: #0000ff;">&quot;Snoopy.class.php&quot;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
&nbsp;
	<span style="color: #000000; font-weight: bold;">function</span> getWordpress<span style="color: #009900;">&#40;</span><span style="color: #000088;">$keyword</span><span style="color: #009900;">&#41;</span><span style="color: #009900;">&#123;</span>
		<span style="color: #666666; font-style: italic;">//SNOOPY!</span>
		<span style="color: #000088;">$snoopy</span> <span style="color: #339933;">=</span> <span style="color: #000000; font-weight: bold;">new</span> Snoopy<span style="color: #339933;">;</span>
&nbsp;
		<span style="color: #000088;">$aGoogleSearch</span> <span style="color: #339933;">=</span> <span style="color: #990000;">array</span><span style="color: #009900;">&#40;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
			<span style="color: #666666; font-style: italic;">// add an entry for every google query you want to run to harvest the URLs</span>
			<span style="color: #000088;">$aGoogleSearch</span><span style="color: #009900;">&#91;</span><span style="color: #009900;">&#93;</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">&quot;http://www.google.com/search?hl=en&amp;q=%22You+can+leave+a+response%2C+or+trackback+from+your+own+site%22&amp;btnG=Google+Search&amp;start=&quot;</span><span style="color: #339933;">.</span><span style="color: #990000;">rand</span><span style="color: #009900;">&#40;</span><span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">900</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
			<span style="color: #000088;">$aGoogleSearch</span><span style="color: #009900;">&#91;</span><span style="color: #009900;">&#93;</span> <span style="color: #339933;">=</span> <span style="color: #0000ff;">&quot;http://www.google.com/search?hl=en&amp;lr=&amp;safe=off&amp;q=%22Leave+a+Reply%22+%22powered+by+wordpress%22&amp;btnG=Search&amp;start=&quot;</span><span style="color: #339933;">.</span><span style="color: #990000;">rand</span><span style="color: #009900;">&#40;</span><span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">900</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
			<span style="color: #000088;">$aGoogleSearch</span><span style="color: #009900;">&#91;</span><span style="color: #009900;">&#93;</span> <span style="color: #339933;">=</span><span style="color: #0000ff;">&quot;http://www.google.com/search?hl=en&amp;q=%22powered+by+wordpress%22+&quot;</span><span style="color: #339933;">.</span><span style="color: #000088;">$keyword</span><span style="color: #339933;">.</span><span style="color: #0000ff;">&quot;&amp;btnG=Google+Search&amp;start=&quot;</span><span style="color: #339933;">.</span><span style="color: #990000;">rand</span><span style="color: #009900;">&#40;</span><span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">900</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
&nbsp;
		<span style="color: #666666; font-style: italic;">//figure out which google query to run</span>
		<span style="color: #000088;">$arraylen</span> <span style="color: #339933;">=</span> <span style="color: #990000;">count</span><span style="color: #009900;">&#40;</span><span style="color: #000088;">$aGoogleSearch</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
		<span style="color: #000088;">$getPos</span> <span style="color: #339933;">=</span> <span style="color: #990000;">rand</span><span style="color: #009900;">&#40;</span><span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span><span style="color: #000088;">$arraylen</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
		<span style="color: #666666; font-style: italic;">//snatch it up out of the array</span>
		<span style="color: #000088;">$targetURL</span> <span style="color: #339933;">=</span> <span style="color: #000088;">$aGoogleSearch</span><span style="color: #009900;">&#91;</span><span style="color: #000088;">$getPos</span><span style="color: #009900;">&#93;</span><span style="color: #339933;">;</span>
		<span style="color: #666666; font-style: italic;">//hit google</span>
		<span style="color: #000088;">$page</span> <span style="color: #339933;">=</span> <span style="color: #000088;">$snoopy</span><span style="color: #339933;">-&gt;</span><span style="color: #004000;">fetch</span><span style="color: #009900;">&#40;</span><span style="color: #000088;">$targetURL</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
		<span style="color: #000088;">$page</span> <span style="color: #339933;">=</span> <span style="color: #000088;">$snoopy</span><span style="color: #339933;">-&gt;</span><span style="color: #004000;">results</span><span style="color: #339933;">;</span>
&nbsp;
		<span style="color: #666666; font-style: italic;">//create an array to hold our links</span>
		<span style="color: #000088;">$foundLinks</span> <span style="color: #339933;">=</span> <span style="color: #990000;">array</span><span style="color: #009900;">&#40;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
&nbsp;
		<span style="color: #666666; font-style: italic;">//parse out the links we want</span>
		<span style="color: #666666; font-style: italic;">//&lt;a href=&quot;http://wordpress.org/&quot; class=l </span>
		<span style="color: #990000;">preg_match_all</span><span style="color: #009900;">&#40;</span><span style="color: #0000ff;">&quot;/&lt;a\s+href=(.*?)\s+class=l+(.*?)&gt;(.*?)&lt;\/a&gt;/&quot;</span><span style="color: #339933;">,</span><span style="color: #000088;">$page</span><span style="color: #339933;">,</span><span style="color: #000088;">$match</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
&nbsp;
		<span style="color: #b1b100;">for</span> <span style="color: #009900;">&#40;</span><span style="color: #000088;">$i</span> <span style="color: #339933;">=</span> <span style="color: #cc66cc;">1</span><span style="color: #339933;">;</span> <span style="color: #000088;">$i</span> <span style="color: #339933;">&lt;=</span> <span style="color: #cc66cc;">9</span><span style="color: #339933;">;</span> <span style="color: #000088;">$i</span><span style="color: #339933;">++</span><span style="color: #009900;">&#41;</span><span style="color: #009900;">&#123;</span>
			<span style="color: #000088;">$foundLinks</span><span style="color: #009900;">&#91;</span><span style="color: #009900;">&#93;</span> <span style="color: #339933;">=</span> <span style="color: #990000;">str_replace</span><span style="color: #009900;">&#40;</span><span style="color: #0000ff;">'&quot;'</span><span style="color: #339933;">,</span><span style="color: #0000ff;">''</span><span style="color: #339933;">,</span><span style="color: #000088;">$match</span><span style="color: #009900;">&#91;</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">&#93;</span><span style="color: #009900;">&#91;</span><span style="color: #000088;">$i</span><span style="color: #009900;">&#93;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
		<span style="color: #009900;">&#125;</span>
&nbsp;
		<span style="color: #b1b100;">return</span> <span style="color: #000088;">$foundLinks</span><span style="color: #339933;">;</span>
	<span style="color: #009900;">&#125;</span></pre></td></tr></table></div>

<p>And the useage&#8230;</p>

<div class="wp_syntax"><table><tr><td class="line_numbers"><pre>1
2
3
4
5
</pre></td><td class="code"><pre class="php" style="font-family:monospace;"><span style="color: #000088;">$stuff</span> <span style="color: #339933;">=</span> getWordPress<span style="color: #009900;">&#40;</span><span style="color: #0000ff;">&quot;shoes&quot;</span><span style="color: #009900;">&#41;</span><span style="color: #339933;">;</span>
&nbsp;
	<span style="color: #b1b100;">for</span> <span style="color: #009900;">&#40;</span><span style="color: #000088;">$i</span> <span style="color: #339933;">=</span> <span style="color: #cc66cc;">0</span><span style="color: #339933;">;</span> <span style="color: #000088;">$i</span> <span style="color: #339933;">&lt;=</span> <span style="color: #cc66cc;">10</span><span style="color: #339933;">;</span> <span style="color: #000088;">$i</span><span style="color: #339933;">++</span><span style="color: #009900;">&#41;</span><span style="color: #009900;">&#123;</span>
		<span style="color: #b1b100;">echo</span> <span style="color: #000088;">$stuff</span><span style="color: #009900;">&#91;</span><span style="color: #000088;">$i</span><span style="color: #009900;">&#93;</span><span style="color: #339933;">.</span><span style="color: #0000ff;">&quot;&lt;br /&gt;&quot;</span><span style="color: #339933;">;</span>
	<span style="color: #009900;">&#125;</span></pre></td></tr></table></div>

<p>Coming up next: WordPress Comment Poster&#8230;</p>
]]></content:encoded>
			<wfw:commentRss>http://www.kaptainkrayola.com/php-wordpress-blog-finder/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		</item>
	</channel>
</rss>
