Sm0ke |
Moderator |
|
|
Joined: Nov 25, 2006 |
Posts: 141 |
Location: Finland |
|
|
|
|
|
|
Code: | #!/usr/bin/perl
use strict;
use warnings;
use LWP::Simple qw($ua get);
use HTML::Parser;
&usage unless @ARGV==4;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.4');
my($min,$max,$term,$result,@urls,@words) = ($ARGV[0],$ARGV[1],$ARGV[2],$ARGV[3]);
print "[+] Min word length: $min\n[+] Max word length: $max\n[+] Term: $term\n[+] Result file: $result\n";
sub in_array {for(my $x=1;$x<=$#_;$x++){if($_[$x] eq $_[0]){return $x;}}return 0;}
sub trim($){
my $str = shift;
$str =~ s/^(.*?)(?:\x0D\x0A|\x0A|\x0D|\x0C|\x{2028}|\x{2029})/$1/s;#From the module Text::Chomp
$str =~ s/ //g;
$str =~ s/\-//g;
$str =~ s/\://g;
$str =~ s/\%//g;
$str =~ s/\(//g;
$str =~ s/\)//g;
$str =~ s/([!-)]*)//g;
return $str;
}
sub usage{
print "Usage: WordlistCreator.pl <min word length> <max word length> <dork/word> <results list>\n";
print "\tEnclose dork/word in spaces for \"multiple words\"\n";
print "Eg: WordlistCreator.pl 6 13 niggers racism.txt\n";
print "Eg: WordlistCreator.pl 5 10 \"site:myspace.com wigger\" myspace.txt\n";
exit;
}
sub getreq($){
my $url = $_[0];
my $request = HTTP::Request->new(GET => $url);
my $myrequest = $ua->request($request);
if($myrequest->is_success){
return $myrequest->content;
}
else{
print "\n[e] Connection error - " . $myrequest->status_line . "\n";
return 0;
}
}
sub google($$){
my ($dork,$start) = ($_[0],$_[1]);
$dork =~ s/\?/\%3f/g;
$dork =~ s/ /\+/g;
$dork =~ s/:/\%3A/g;
$dork =~ s/\//\%2F/g;
$dork =~ s/&/\%26/g;
$dork =~ s/\"/\%22/g;
$dork =~ s/\'/\%27/g;
$dork =~ s/,/\%2C/g;
$dork =~ s/\\/\%5C/g;
my $url = "http://www.google.com.au/search?q=$dork&start=$start&num=100";
print "[+] $url\n";
my $content = &getreq($url);
if($content =~ m/<h1>We\'re sorry\.\.\.<\/h1>/){
print "[!] Google killed you\n";
return 0;
}
while($content =~ m/<h2 class=r><a href=\"http:\/\/([a-z0-9-\.]+)\/(.*?)\"/ig){
if(!in_array("http://$1",@urls)){
my $dad = "http://$1";
if($2){$dad="http://$1/$2";}
print "[+] $dad\n";
push(@urls,"$dad");
}
}
if ($content =~ m/<td nowrap class=b><a href=\"\/(.*?)\"><div id=nn><\/div>Next<\/a><\/table><\/div><\/div><center>/){
print "[+] Another page\n";
&google($dork,($start + 100));
}
}
sub textreeeh{
foreach(@_){
my @wordz = split(/ /,shift);
foreach(@wordz){
my $word = trim($_);
$word = "\L$word";
if(($word =~ m/^([a-z0-9]*)$/i) && (length($word)>($min-1)) && (length($word)<($max+1))){
if(!in_array($word,@words)){
push(@words,$word);
@words = sort(@words);
open(ELOG,">>$result") || die "[-] Couldn't open file: $result\n";
print ELOG "$word\n";
close(ELOG);
}
}
}
}
}
print "[+] Searching google for $term\n";
&google($term,"0");
print "[+] Finished searching, building..\n";
open(ELOG,">>$result") || die "[-] Couldn't open file: $result\n";
print ELOG "";
close(ELOG);
open("prevwords",$result) || die("[+] Couldn't open file: $result\n");
@words = <prevwords>;
close("prevwords");
foreach(@urls){
my $url = $_;
print "[+] $url\n";
my $content = &getreq($url);
my $p = HTML::Parser->new(text_h => [\&textreeeh, 'text']);
$p->parse($content);
}
print "[+] Got this many words: " . ($#words+1) . "\n";
my $resultsize = ((-s $result)/1024);
print "[+] Size of wordlist: " . substr($resultsize,0,4) . " Kb\n"; |
Quote: | Usage: WordlistCreator.pl <min word length> <max word length> <dork/word> <results list>
Enclose dork/word in spaces for "multiple words"
Eg: WordlistCreator.pl 6 13 niggers racism.txt
Eg: WordlistCreator.pl 5 10 "site:myspace.com wigger" myspace.txt |
It works by taking your string, Googling it and getting all the results it can then goes to each URL and parses the HTML and gets words out.
Quote: | Eg: WordlistCreator.pl 6 13 searchterm dump.txt |
This will Google for "searchterm" and find words that have lengths between 6 and 13, and dump to dump.txt |
|