Download Indexed Cache is a proof of concept script that implements the Google SOAP Search API to retrieve content indexed within the Google Cache to support the "Search Engine Reconnaissance" section of the OWASP Testing Guide version 3.
42571e3120e00887108e79161991c1e09c0a3fb72178bd4a81286effe45c918f
#!/usr/bin/perl
# Please refer to the Plain Old Documentation (POD) at the end of this Perl Script for further information
use strict;
# SOAP::Lite version 0.52 or newer is recommended by https://code.google.com/apis/soapsearch/api_faq.html#tech20
use SOAP::Lite;
use Getopt::Long;
use Data::Dumper;
my $VERSION = 0.1; # May be required to upload script to CPAN i.e. https://www.cpan.org/scripts/submitting.html
print "\n\"Download Indexed Cache\" Proof of Concept (PoC) v0.1 (Released at RUXCON 2K8)\n";
print "\n";
print "Copyright 2008 Christian Heinrich\n";
print "Licensed under the Apache License, Version 2.0\n\n";
# Take the query from the command line
my $google_api_key;
my $query;
my $start;
# TODO Input Validation of command line arguments
# TODO Display Usage if no command line arguments are specified
GetOptions(
"key=s" => \$google_api_key,
"query=s" => \$query,
"start=s" => \$start
);
# Process command line arguements
$start = $start - 1;
chomp($query);
# For demonstrations without exposing the Google SOAP Search API insert your Google SOAP Search API Key below to use dic.pl -key "demo"
if ( $google_api_key == "demo" ) {
# Replace "insert_google_api_key" with your Google SOAP Search API Key
# $google_api_key = "insert_google_api_key";
}
# strip ":" from Google Search Operator for Filename
# TODO Expand this to strip illegal filename chars e.g. \/:*?<>|
my $stripped_query = $query;
$stripped_query =~ s/://g;
my $dir = "$stripped_query/dic";
# The directory which holds the output of dic
if ( !( -e $dir ) ) {
print("Creating ./$dir\n\n");
if ( !( -e "./$stripped_query" ) ) {
mkdir("./$stripped_query");
}
system("mkdir $dir");
}
else { print "Appending ./$dir\n\n"; }
my $google_search_results
= do_Google_Search( "$google_api_key", "$query", "$start" );
# TODO Display a warning if <estimatedTotalResultsCount> and <estimateIsExact> exceeds 1000
open( DATA_DUMPER, ">>./$dir/datadumper.txt" );
print DATA_DUMPER ( Data::Dumper::Dumper($google_search_results) );
# The URL corresponding to the Search Result .html file is listed in this .CSV file
open( URL, ">>./$dir/$stripped_query.csv" );
my $google_search_result_number = $start;
# Loop through the results.
foreach
my $google_search_result ( @{ $google_search_results->{resultElements} } )
{
# Set the results as variables
++$google_search_result_number;
my $URL = $google_search_result->{URL};
my $cachedSize = $google_search_result->{cachedSize};
print( "Downloading "
. $URL
. " from Google Cache ["
. $cachedSize . "] as "
. $google_search_result_number
. ".html\n" );
my $google_cached_page = doGetCachedPage( "$google_api_key", "$URL" );
open( CACHEDPAGE, ">./$dir/$google_search_result_number.html" );
print CACHEDPAGE $google_cached_page;
close(CACHEDPAGE);
# TODO Include the date and time the page was indexed i.e. to quote the cache page "It is a snapshot of the page as it appeared on [Date] [Time]"
print URL ( "$google_search_result_number" . "," . "$URL\n" );
}
sub do_Google_Search {
# Variable Naming Convention is as per Google SOAP Search API Reference Documentation
my $key = $_[0];
# $q is Google Search Query from Google SOAP Search API Reference
# TODO Check length of Google Search Query is 2048 bytes
# TODO Check Google Search Query is a maximum of 10 Words
# TODO Check only one site: term is in the Google Search Query
my $q = $_[1];
# my $start = -start cmd line argument
my $start = $_[2];
# TODO Must add a test to ensure that $maxResults is between 1 to 1000
my $maxResults = "10";
# $filter is boolean i.e. either "true" or "false"
my $filter = "false";
# TODO Check Country of Restrict
# TODO Check Topic of Restrict
my $restricts = "";
my $safeSearch = "false";
# TODO Check Language Restrict
my $lr = "";
# ie is Input Encoding and this has been deprecated in the Google SOAP Search API
my $ie = "UTF-8";
# oe is Output Encoding and this has been deprecated in the Google SOAP Search API
my $oe = "UTF-8";
# Location of the GoogleSearch WSDL file
my $google_wsdl = "https://api.google.com/GoogleSearch.wsdl";
# Create a new SOAP::Lite instance, feeding it GoogleSearch.wsdl
my $google_search = SOAP::Lite->service("$google_wsdl");
# TODO Confirm that connection with api.google.com can be established
my $google_search_results = $google_search->doGoogleSearch(
$key, $q, $start, $maxResults, $filter,
$restricts, $safeSearch, $lr, $ie, $oe
);
# TODO Confirm that doGoogleSearchResponse SOAP Message is not empty due to exceeding 10K SOAP Messages with Google SOAP Search API Key
return $google_search_results;
}
sub doGetCachedPage {
# Variable Naming Convention is as per Google SOAP Search API Reference Documentation
my $key = $_[0];
my $URL = $_[1];
# Location of the GoogleSearch WSDL file
my $google_wsdl = "https://api.google.com/GoogleSearch.wsdl";
my $google_cache = SOAP::Lite->service("$google_wsdl");
my $doGetCachedPageResponse
= $google_cache->doGetCachedPage( $google_api_key, $URL );
# TODO Confirm that doGetCachedPageResponse SOAP Message is not empty due to exceeding 10K SOAP Messages with Google SOAP Search API Key
return $doGetCachedPageResponse;
}
=head1 NAME
dic.pl - "Download Indexed Cache"
=head1 VERSION
This documentation refers to dic PoC v0.1. Released at RUXCON 2K8 (AU)
=head1 USAGE
dic.pl -key [key] -query [Google Search Query] -start [Starting Google Search Result Number]
=head1 REQUIRED ARGUMENTS
-key Google SOAP Search API Key
-q Google Search Query
-start Starting Google Search Result Number
=head1 DESCRIPTION
"Download Indexed Cache" implements the Google SOAP Search API to retrieve
content indexed within the Google Cache and supports the "Search Engine
Reconnaissance" section of the recently released OWASP Testing Guide v3.
=head1 DEPENDENCIES
=head1 PREREQUISITES
SOAP::Lite v0.52 CPAN Module
Data::Dumper CPAN Module
=head1 COREQUISITES
=head1 OSNAMES
cygwin
=head1 SCRIPT CATEGORIES
Web
=head1 INCOMPATIBILITIES
=head1 BUGS AND LIMITATIONS
Please refer to the comments beginning with "TODO" in the Perl Code.
=head1 AUTHOR
Christian Heinrich
=head1 CONTACT INFORMATION
christian.heinrich@owasp.org
christian.heinrich@cmlh.id.au
cmlh@cpan.org
https://www.linkedin.com/in/ChristianHeinrich
=head1 MAILING LIST
https://lists.owasp.org/mailman/listinfo/owasp-google-hacking
https://groups.google.com/group/download-indexed-cache
=head1 SUBVERSION REPOSITORY
# TODO svn propset svn:keywords
https://code.google.com/p/dic
=head1 FURTHER INFORMATION AND UPDATES
https://del.icio.us/cmlh/dic
https://lists.owasp.org/mailman/listinfo/owasp-google-hacking
https://groups.google.com/group/download-indexed-cache
https://code.google.com/p/dic
=head1 LICENSE AND COPYRIGHT
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Copyright 2008 Christian Heinrich