#!/usr/local/bin/perl -w # name: gprank.pl - Google PageRank # version: 0.03 # release date: 2007/08/20 # original: http://blog.outer-court.com/archive/2004_06_27_index.html#108834386239051706 # synopsis: gprank.pl [URL ...] # example1: ./gprank.pl http://www.google.com/ # example2: cat url.list | ./gprank.pl use 5.008; use strict; use IO::Socket; my $input_charset = 'euc-jp'; # or shiftjis, utf8, etc... my $M = 0xFFFFFFFF + 0x00000001; # 0x100000000; sub m1($$$$) { return (($_[0] + ($M - $_[1]) + ($M - $_[2])) % $M) ^ ($_[3] % $M); } sub mix($$$) { foreach (@_) { $_ %= $M; } my($a, $b, $c) = @_; $a = m1($a, $b, $c, $c >> 13); $b = m1($b, $c, $a, $a << 8); $c = m1($c, $a, $b, $b >> 13); $a = m1($a, $b, $c, $c >> 12); $b = m1($b, $c, $a, $a << 16); $c = m1($c, $a, $b, $b >> 5); $a = m1($a, $b, $c, $c >> 3); $b = m1($b, $c, $a, $a << 10); $c = m1($c, $a, $b, $b >> 15); return ($a, $b, $c); } sub c2i($$) { my($s, $k) = @_; my $i = 0; if ($k <= length($s)) { my @c = unpack('C4', substr($s, $k)); foreach (0..$#c) { $i += ($c[$_] << ($_ * 8)); } } return $i; } sub checkSum($){ my $iurl = shift; my $len = length($iurl); my @i = (0x9E3779B9, 0x9E3779B9, 0xE6359A60); for (my $k = 0; $len >= $k; $k += 12) { @i = mix( $i[0] + c2i($iurl, $k), $i[1] + c2i($iurl, $k + 4), # the first byte is reserved for the length. $i[2] + ( ($k > $len - 12) ? (c2i($iurl, $k + 8) << 8) + $len : c2i($iurl, $k + 8) ) ); } return $i[2]; } sub GooglePageRank($) { my $url = shift; if (defined($input_charset) && $input_charset ne 'utf8') { eval "use Encode"; Encode::from_to($url, $input_charset, "utf8"); } my $decoded_url = $url; $decoded_url =~ s/%([a-fA-Fa0-9][a-fA-F0-9])/pack("C", hex($1))/eg; my $ch = checkSum("info:" . $decoded_url); my $target = 'toolbarqueries.google.com'; my $path = sprintf('/search?client=navclient-auto&ie=utf-8&oe=utf-8&features=Rank&ch=6%u&q=info:%s', $ch, $url); my $version = 'gprank.pl/0.03'; my $rank = -1; my $sock = new IO::Socket::INET(PeerAddr => $target, PeerPort => 'http(80)') || die "Connect to $target, port 80, failed.\n"; $sock->autoflush(1); $sock->print("GET $path HTTP/1.0\n" . "User-Agent: Mozilla/4.0 (compatible; $version)\n\n"); my @result = $sock->getlines(); undef $sock; #Close the socket #print @result; while (my $line = shift @result) { if ($line =~ m#Rank_\d+:\d+:(\d+)#) { $rank = $1; last; } } return $rank; } my @url_list = defined(@ARGV) ? @ARGV : ; while (defined(my $url = shift @url_list)) { chomp $url; if ($url =~ m#^https?://#) { my $rank = GooglePageRank($url); if ($rank >= 0) { printf("%d: %s\n", $rank, $url); } else { printf("NO_INDEX: %s\n", $url); } } } exit(0);