#!perl -w #----------------------------------------------------------------------------- # Piranha.pl # # Part of the Data Carving Utility Library (DCUL). # http://www.sftsrc.com/DCUL/ #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # Copyright (c) 2006, SoftSource Consulting # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation # and/or other materials provided with the distribution. # * Neither the name of SoftSource Consulting nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # Version history: # # Version 1.0, 17th July 2006: # Initial release: John Goalby, john.goalby@sftsrc.com #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # Description: # # Pirhana is a general purpose tool to assist the user in dissecting and # examining an image file in numerous ways. # # This script allows the user to search an image file using regular # expressions in either ASCII or hex format. You can also search for # blocks with certain MD5 hash values. # # Other features include outputting summary information about each block # and collecting information about the headers of each block. The headers # are defined as the first n bytes of a block and quite often relate to # file signatures. Using the header information you could determine what # file types were present on a disk image. # # The output of this script is customizable in addition to a feature # to oututting to PDF format. # # Examples of use: # # Examining an image file looking for certain search terms. # Trying to find existence of a number of files on an image file. # Carving out files from an image file by hand. # # Design: # # This script was designed for submission to the DFRWS data carving challenge # (http://www.dfrws.org/2006/challenge/). As such, the challenge was to # examine a 50MB disk image. This is obviously very small compared to disk # images in real use. If you need this functionality on a bigger scale please # contact us for information on our enterprise version. #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # Import statements. #----------------------------------------------------------------------------- use strict; use IO::File; use Getopt::Long; use File::Basename; use File::Find; use Digest::MD5 qw (md5 md5_hex md5_base64); use Switch; use Cwd; eval ("use Win32::Console::ANSI;"); use Term::ANSIColor; #----------------------------------------------------------------------------- # Local imports. #----------------------------------------------------------------------------- use Helpers::DCULHelper; #----------------------------------------------------------------------------- # Various constants. #----------------------------------------------------------------------------- use constant VERSION_NUMBER => '1.0'; use constant DEFAULT_BLOCK_SIZE => 512; use constant TEXTSEARCH => 'text'; use constant HEXSEARCH => 'hex'; use constant MD5SEARCH => 'md5'; use constant DEFHIGHLIGHT => "\e\[1\;33m"; use constant DEFUNHIGHLIGHT => "\e\[0m"; use constant EMPTYHASH => { }; use constant CAPTURE_HEADER_SIZE => 10; use constant NUM_COMMON_HEADERS => 5; use constant OUTPUT_TEXT => 'text'; use constant OUTPUT_BINARY => 'binary'; use constant OUTPUT_SUMMARY => 'summary'; use constant OUTPUT_NONE => 'none'; use constant HEX_PREFIX => '0x'; use constant MD5_PREFIX => 'md5:'; #----------------------------------------------------------------------------- # Capture the name of this script for use later on. #----------------------------------------------------------------------------- my $g_thisscript = basename($0); my $g_thisscriptbasename = basename ($g_thisscript); #----------------------------------------------------------------------------- # Globals #----------------------------------------------------------------------------- my $g_linesperblock = 0; my %g_headers; my %g_headersascii; my $g_outputheadings = 0; my %g_md5hashes; my @g_filestoprocess; my $g_abufformat = ""; my $g_hbufformat = ""; my $g_searchtype = TEXTSEARCH; my $g_outformat = ""; my $g_pdffilename = ""; #----------------------------------------------------------------------------- # Command line option globals. #----------------------------------------------------------------------------- my %g_options; my %g_optionhelp; #----------------------------------------------------------------------------- # Invocation related globals. #----------------------------------------------------------------------------- my $g_starttime; my $g_starttimetext; my @g_scriptlines; #----------------------------------------------------------------------------- # Global counters. #----------------------------------------------------------------------------- my $g_totalbytesread = 0; #----------------------------------------------------------------------------- # Capture the command line parameters. Need to do this before we process them # with GetOptions as that seems to remove them. Also get the number of params # that were passed in. #----------------------------------------------------------------------------- my $g_cmdline = $g_thisscript . DCULHelper::GetCommandLineParams(); my $g_paramspassed = scalar @ARGV; #----------------------------------------------------------------------------- # Globals for the command line options. #----------------------------------------------------------------------------- # Housekeeping my ($gopt_help, $gopt_version); # Various input my ($gopt_blocksize, $gopt_file, $gopt_searchstr); my ($gopt_casesensitive, $gopt_md5file, $gopt_searchfile); my ($gopt_firstblockonly, $gopt_searchunicode); my ($gopt_hexprefix, $gopt_md5prefix); # Header analysis my ($gopt_captureheadersize, $gop_numcommonheaders); # Carving of input file. my ($gopt_start, $gopt_end, $gopt_length); # Highlight my ($gopt_highlight, $gopt_highoff); # Output related my ($gopt_output, $gopt_outblocknum, $gopt_outoffsetdec, $gopt_outoffsethex); my ($gopt_outascii, $gopt_outhex, $gopt_separateblocks, $gopt_linewidth); my ($gopt_outputheaders, $gopt_showall); # PDF processing my ($gopt_outputpdf, $gopt_reporttitle); my ($gopt_subtitle, $gopt_author); #----------------------------------------------------------------------------- # Process the command line arguments. #----------------------------------------------------------------------------- InitializeOptionInfo(); GetOptions (%g_options); #----------------------------------------------------------------------------- # If the user just wants version or help information then output that now. #----------------------------------------------------------------------------- PrintUsage() && exit if ($gopt_help || ($g_paramspassed eq 0)); PrintVersion() && exit if $gopt_version; #----------------------------------------------------------------------------- # Get things setup. #----------------------------------------------------------------------------- # Set the default values. SetDefaults(); # Figure out the highlight for search hits. DetermineHighlight(); # Set the output options based on user input. SetOutputOptions(); # Check that the command line options are consistent before going further. CheckCommandLineOptions(); #----------------------------------------------------------------------------- # This is where all of the work happens. #----------------------------------------------------------------------------- # Take care of getting things initialized. StartUp(); # Output information about this script and environment. PrintScriptInformation() if ($gopt_output ne "binary"); PrintEnvironmentInformation() if ($gopt_output ne "binary"); # Read in the MD5 file of hashes if the user specified one. ReadMD5HashFile ($gopt_md5file) if (defined $gopt_md5file); # Process the search files if the user specified them. ProcessSearchFiles ($gopt_searchfile) if (defined $gopt_searchfile); # Set the search options up based on user input. ProcessSearchString(); # Output the information about the invocation. PrintInvocationInformation() if ($gopt_output ne "binary"); # Process the input file now. ProcessInputFile(); # Output the header information we captured. OutputHeaderInformation() if (($gopt_outputheaders) && ($gopt_output ne "binary")); # Print the summary information. PrintSummaryInformation() if ($gopt_output ne "binary");; # Clean everything up. ShutDown(); #----------------------------------------------------------------------------- # Do things that need to happen at the start of the script. #----------------------------------------------------------------------------- sub StartUp { # Change the slashes in search file to all be the same direction. $gopt_searchfile =~ s/\\/\//g if (defined $gopt_searchfile); # Get the time that we started doing the actual processing. $g_starttime = time(); $g_starttimetext = DCULHelper::FormatDateTime ($g_starttime); # If the user wants PDF output. if ($gopt_outputpdf) { # Start with the script filename. $g_pdffilename = $g_thisscriptbasename; # Remove the extension. $g_pdffilename =~ s/\..*$/ /; # Add the time and date. $g_pdffilename .= $g_starttimetext; # Remove spaces and :'s. $g_pdffilename =~ tr/ \t\:/_/; # Add the extension. $g_pdffilename .= ".pdf"; # Initialize the PDF helper. PDFHelper::StartUp ($g_pdffilename, $gopt_author, "$g_thisscriptbasename Report", $gopt_subtitle, $gopt_reporttitle); } } #----------------------------------------------------------------------------- # Do things that need to happen at the end of the script. #----------------------------------------------------------------------------- sub ShutDown { # If the user wanted PDF output. if ($gopt_outputpdf) { # Cleanup the PDF helper. PDFHelper::ShutDown();; } } #----------------------------------------------------------------------------- # Print the summary information. #----------------------------------------------------------------------------- sub PrintSummaryInformation { # Output a new page title for the summary. DCULHelper::PrintPageTitle ("Summary"); # Get the end time. my $endtime = time(); # Output the time taken. my $totaltime = $endtime - $g_starttime; DCULHelper::PrintLn ("Total time taken: $totaltime second(s)"); # Output the total bytes read from the input file. DCULHelper::PrintLn ("Input file total bytes read: $g_totalbytesread"); # If the user wanted PDF output. if ($gopt_outputpdf) { # Output the source for the script. Don't need to do this for the # console output as the user could output it whenever they wanted. DCULHelper::OutputScriptSource (\@g_scriptlines); } } #----------------------------------------------------------------------------- # Print out useful information about this script. #----------------------------------------------------------------------------- sub PrintScriptInformation { # Script information page. DCULHelper::PrintPageTitle ("Script Information"); # Output the script version number. DCULHelper::PrintLn ("$g_thisscriptbasename version number: ". VERSION_NUMBER); # Read in this script so can create MD5 hash and output the # source if we wanted. open (SCRIPTFILE, $g_thisscript) or warn "Cannot open file ". $g_thisscript, $!; binmode (SCRIPTFILE); @g_scriptlines = ; close (SCRIPTFILE); # Put the script into a handy string my $scriptsource = join "", @g_scriptlines; # Calculate the script's MD5 hash and output. DCULHelper::PrintLn ("$g_thisscriptbasename MD5: ". md5_hex ($scriptsource)); # Output the last modified, created and size information for the script. DCULHelper::PrintLastModCreatedSizeForFile ($g_thisscript); } #----------------------------------------------------------------------------- # Print out useful information about this invocation. #----------------------------------------------------------------------------- sub PrintInvocationInformation { # Invocation information page. DCULHelper::PrintPageTitle ("Invocation"); # Output the invocation information. DCULHelper::PrintLn ("Started: $g_starttimetext"); DCULHelper::PrintLn ("Command line: $g_cmdline"); DCULHelper::PrintLn ("Current working directory: ".cwd()); DCULHelper::PrintLn ("Block size: $gopt_blocksize bytes"); # Output the file that PDF output is being output to should # it be actually being output. DCULHelper::PrintLn ("PDF Output: $g_pdffilename") if $gopt_outputpdf; # Output the input file's MD5 hash. DCULHelper::PrintMD5HashofInputFile ($gopt_file); # Output the last modified, created and size information # for the input file. DCULHelper::PrintLastModCreatedSizeForFile ($gopt_file); } #----------------------------------------------------------------------------- # Print out useful information about the environment. This information is # readily available from the command line so is only an option for outputting # to the PDF file. #----------------------------------------------------------------------------- sub PrintEnvironmentInformation { # If the user requested PDF output. We don't want to output this # information to the console as it is accessible to the user any # time they want it. if ($gopt_outputpdf) { # Output the environment information. DCULHelper::PrintEnvironmentInformation(); } } #----------------------------------------------------------------------------- # Process the input file. Read it in, process it and output the results. #----------------------------------------------------------------------------- sub ProcessInputFile { # If we are to output text. if ($gopt_output eq OUTPUT_TEXT) { # Details information page. DCULHelper::PrintPageTitle ("Details"); } # Holder for the block size of data we read in. my $buffer; # Offset is the current byte offset in the input file. my $offset = 0; # Used in the case of the start being in the middle of the # current block. my $padding = 0; # If the user specified a length, work out what that makes # the value of end to be. $gopt_end = $gopt_start + ($gopt_length - 1) if ($gopt_length ne -1); # Open the input file and set into binary mode. my $fh = IO::File->new ($gopt_file); die "Error reading $gopt_file\n" if !defined ($fh); binmode ($fh); # Holder for the amount of data read. my $bufferread = 0; # Read in the next block. while ($bufferread = $fh->sysread ($buffer, $gopt_blocksize)) { # Use this to keep track of how much was read. $g_totalbytesread += $bufferread; # Next iteration if we will not yet reach the start. if (($offset + $gopt_blocksize) <= $gopt_start) { # Next block please and continue. $offset += $gopt_blocksize; next; } # If the user specified an end. if ($gopt_end ne -1) { # If the end is within this current block. if (($offset + $gopt_blocksize) > $gopt_end) { # Only take the part of the current block that is included # up to the end specified. $buffer = substr ($buffer, 0, ($gopt_end - $offset) + 1); } } # Reset this value each time. $padding = 0; # If the start is contained within this block. if (($gopt_start >= $offset) && ($gopt_start < ($offset + $gopt_blocksize))) { # Get the part of the buffer from the start onwards. $buffer = substr ($buffer, $gopt_start - $offset); # The padding is the difference between the start of the block and # the actual start. We need this value so that we know where the # buffer is in relation to the input file offsets. $padding = $gopt_start - $offset; } # Assume that we are searching the buffer we read until told otherwise. my $searchbuffer = $buffer; # If we are doing a hex search. if ($g_searchtype eq HEXSEARCH) { # Change the search buffer to hex strings with spaces inbetween # each byte. We need the space so that we can match FF to 00FF00 # instead of 0FF000. my $hexwithspaces = "H2 " x $gopt_blocksize; my @searchbufferarr = unpack $hexwithspaces, $buffer; $searchbuffer = join (" ", @searchbufferarr); $searchbuffer .= " "; } # If we are doing a md5 search. if ($g_searchtype eq MD5SEARCH) { # Change the search buffer to the MD5 hash. $searchbuffer = md5_hex ($buffer); # If we have a hit on the MD5 hash. if (exists $g_md5hashes{$searchbuffer}) { # If we are to output text. if ($gopt_output eq OUTPUT_TEXT) { # Output the formatted buffer, we pass in an empty # matches hash as we are not going to highlight anything. OutputFormattedBuffer ($buffer, $offset, $padding, &EMPTYHASH); } # If we are to output binary. elsif ($gopt_output eq OUTPUT_BINARY) { # Just print to output binary. print ($buffer); } # If we are to output summary information. elsif ($gopt_output eq OUTPUT_SUMMARY) { # Output the summary information. OutputSummaryInfo ($buffer, $offset); } } } # If we are showing all entries or we have a search hit then we have # something to output. elsif ($gopt_showall || (! defined $gopt_searchstr) || ($searchbuffer =~ /$gopt_searchstr/s)) { # If we are going to output the header information. if ($gopt_outputheaders) { # Capture the header information. CaptureHeaders ($buffer); } # If we are to output text. if ($gopt_output eq OUTPUT_TEXT) { # Get the matches against the search buffer. my %matches = CreateMatchesHash ($searchbuffer); # Output the formatted buffer, passing in the matches hash # for easier highlighting of matches. OutputFormattedBuffer ($buffer, $offset, $padding, \%matches); } # If we are to output binary. elsif ($gopt_output eq OUTPUT_BINARY) { # Just print to output binary. print ($buffer); } # If we are to output summary information. elsif ($gopt_output eq OUTPUT_SUMMARY) { # Output the summary information. OutputSummaryInfo ($buffer, $offset); } } # Next block please. $offset += $gopt_blocksize; # We don't need to continue if an end was specified and # we are now past it. last if (($gopt_end ne -1) && ($offset > $gopt_end)); } # Close the file. close ($fh); } #----------------------------------------------------------------------------- # Initialize the information for the command line options. #----------------------------------------------------------------------------- sub InitializeOptionInfo { $g_options{"version|v"} = \$gopt_version; $g_optionhelp{"version"} = "Output version information and exit."; $g_options{"help|?"} = \$gopt_help; $g_optionhelp{"help"} = "Display this help and exit."; $g_options{"author|a:s"} = \$gopt_author; $g_optionhelp{"author"} = "The author for the PDF report."; $g_options{"outputpdf|pdf!"} = \$gopt_outputpdf; $g_optionhelp{"outputpdf"} = "Specify whether to output to PDF in ". "addition to console. Default is to output to console only ". "as this is more of an investigative tool. If you choose to ". "output binary then this option is not used."; $g_options{"title|t:s"} = \$gopt_reporttitle; $g_optionhelp{"title"} = "The title for the PDF report."; $g_options{"subtitle|u:s"} = \$gopt_subtitle; $g_optionhelp{"subtitle"} = "The subtitle for the PDF report."; $g_options{"file|f=s"} = \$gopt_file; $g_optionhelp{"file"} = "Specify the input file to process."; $g_options{"highlight|h:s"} = \$gopt_highlight; $g_optionhelp{"highlight"} = "Set the highlight color using ANSI ". "capable terminals, or Windows CMD.exe. Default is to highlight in ". "bold yellow. is a single character. Upper-case makes the ". "color bold. Colors are red, green, yellow, blue, magenta, cyan, ". "white. You can also specify inverse. You can specify 0 as the ". "highlight to turn it off."; $g_options{"casesensitive"} = \$gopt_casesensitive; $g_optionhelp{"casesensitive"} = "Make the search case-sensitive. Does ". "not apply to HEX or MD5 searches."; $g_options{"showall"} = \$gopt_showall; $g_optionhelp{"showall"} = "Used when you are searching. Shows all ". "blocks even if they don't have a search hit. If you don't ". "specify this option you will only see blocks that have a search ". "hit. Has no effect with MD5 searches."; $g_options{"searchstr|q:s"} = \$gopt_searchstr; $g_optionhelp{"searchstr"} = "Can contain regular expressions. Note ". "that on Windows the ^ (carat) symbol is an escape character. The ". "'& | ( ) < > ^' characters are reserved. So to anchor a search at ". "the start of a block you can use double carats (^^) or enlcose ". "the parameter in double quotes. If you want to search for HEX ". "values, prefix the search string with ".HEX_PREFIX.". If you want ". "to search for MD5 hashes of the block, prefix the search string ". "with ".MD5_PREFIX.". Please note that regular expressions don't ". "work so well in this version as an attempt has been made to ". "search on byte boundaries. The '.' character will work as ". "expected."; $g_options{"blocksize|bs:i"} = \$gopt_blocksize; $g_optionhelp{"blocksize"} = "The blocksize to use for output. " . "Default is ".DEFAULT_BLOCK_SIZE.". If using a disk image as the ". "input file you would most likely use the sector size."; $g_options{"linewidth|w:i"} = \$gopt_linewidth; $g_optionhelp{"linewidth"} = "The number of bytes per line to output."; $g_options{"start|s:i"} = \$gopt_start; $g_optionhelp{"start"} = "Specify the starting byte offset of the input ". "file. Default is to start from offset 0."; $g_options{"end|e:i"} = \$gopt_end; $g_optionhelp{"end"} = "Specify the ending byte offset of the input". "file. Default is to end at the end of the input file."; $g_options{"length|l:i"} = \$gopt_length; $g_optionhelp{"length"} = "Specify the number of bytes to output from ". "the input file based on the start specified. Useful if you don't ". "want to calculate the end byte offset by hand."; $g_options{"showblocknum|0!"} = \$gopt_outblocknum; $g_optionhelp{"showblocknum"} = "Output the block number. ". "This is output by default."; $g_options{"showoffsetdec|1!"} = \$gopt_outoffsetdec; $g_optionhelp{"showoffsetdec"} = "Output the decimal offset ". "information. This is output by default."; $g_options{"showoffsethex|2!"} = \$gopt_outoffsethex; $g_optionhelp{"showoffsethex"} = "Output the hex offset information. ". "This is output by default."; $g_options{"showascii|3!"} = \$gopt_outascii; $g_optionhelp{"showascii"} = "Output the ASCII information. ". "This is output by default."; $g_options{"showhex|4!"} = \$gopt_outhex; $g_optionhelp{"showhex"} = "Output the hex information. ". "This is output by default."; $g_options{"separateblocks!"} = \$gopt_separateblocks; $g_optionhelp{"separateblocks"} = "Put a line between the output of ". "each block output. On by default."; $g_options{"outputheaders!"} = \$gopt_outputheaders; $g_optionhelp{"outputheaders"} = "Output the first few bytes of each of ". "the blocks for further file signature analysis."; $g_options{"md5file|md5:s"} = \$gopt_md5file; $g_optionhelp{"md5file"} = "Specify a file that contains MD5 hashes that ". "will be compared against each block of the input file. It is ". "necessary to keep the block size of the hashes the same in both ". "cases. You would use this to save time calculating the hashes". "every invocation with the search file option. The summary output ". "option gives MD5 hashes for each block."; $g_options{"hexprefix:s"} = \$gopt_hexprefix; $g_optionhelp{"hexprefix"} = "Specify the string that specifies a hex". "search. The hex prefix is configurable in case you need to search ". "for a string that begins with the same chars."; $g_options{"md5prefix:s"} = \$gopt_md5prefix; $g_optionhelp{"md5prefix"} = "Specify the string that specifies an MD5". "search. The MD5 prefix is configurable in case you need to search ". "for a string that begins with the same chars."; $g_options{"output|o:s"} = \$gopt_output; $g_optionhelp{"output"} = "Specify the output type. You can choose to ". "have text, binary, summary or none."; $g_options{"searchfile|sf:s"} = \$gopt_searchfile; $g_optionhelp{"searchfile"} = "Specify either a single file or a ". "directory of files (including sub-directories) that you would ". "like to search the input file for. This is achieved by reading ". "the contents of the specified files and creating an MD5 hash for ". "each block (based on specified block size)."; $g_options{"firstblockonly!"} = \$gopt_firstblockonly; $g_optionhelp{"firstblockonly"} = "Specify that you only want to match ". "the first block of the search files. This way you get an ". "indication where a file may reside rather than multiple places ". "it may reside."; $g_options{"unicode!"} = \$gopt_searchunicode; $g_optionhelp{"unicode"} = "Specify that the search text is unicode. ". "This feature is really crude in that it purely puts nulls between ". "the search text specified, saving you from having to do it by ". "hand."; $g_options{"captureheadersize:i"} = \$gopt_captureheadersize; $g_optionhelp{"captureheadersize"} = "The number of bytes that are to ". "be examined for the block header analysis. ". " Default is ".CAPTURE_HEADER_SIZE."."; $g_options{"numcommonheaders:i"} = \$gop_numcommonheaders; $g_optionhelp{"numcommonheaders"} = "The number of occurances of a ". "specific header for it to be output in the header summary. ". " Default is ".NUM_COMMON_HEADERS."."; } #----------------------------------------------------------------------------- # Process each found entry in the search directory. #----------------------------------------------------------------------------- sub ProcessSearchDirectory { # Add the found file to the list of files to process. push (@g_filestoprocess, $File::Find::name) if (-f $File::Find::name); } #----------------------------------------------------------------------------- # Read in a file of hashes that we can later use for matching when we read # in the blocks of the input file. #----------------------------------------------------------------------------- sub ProcessSearchFiles { # Get the passed in search files to process. my $searchfile = $_[0]; # If the user specified a directory. if (-d $searchfile) { # Process all files in the search directory. find ({ wanted => \&ProcessSearchDirectory, no_chdir => 1 }, $searchfile); } else { # Add the single file to the array. push (@g_filestoprocess, $searchfile); } # Add all the input files to the MD5 hash for later processing. AddInputFilesToMD5Hash(); } #----------------------------------------------------------------------------- # Read in a file of hashes that we can later use for matching when we read # in the blocks of the input file. #----------------------------------------------------------------------------- sub ReadMD5HashFile { # Get the passed in filename to read. my $md5file = $_[0]; # Open the file. open (MD5FILE, $md5file) or die "Couldn't open $md5file: $!\n"; # Read in the MD5 file specified by the user. while (my $line = ) { # Remove any whitespace including the CR LF at the end. $line =~ s/\s//; # Check that the entry looks like a hash. if ($line !~ /[a-f0-9]{32}/i) { # We are not going to continue until the user fixes this. die "$line from $md5file does not look like a valid MD5 hash."; } # There is a slight possibility that the hash already exists. if (exists $g_md5hashes{$line}) { # Add the entry to the existing entry. $g_md5hashes{$line} = $g_md5hashes{$line}.";".$md5file; } else { # A a new hash $g_md5hashes{$line} = $md5file; } } # Done with the file now. close (MD5FILE); } #----------------------------------------------------------------------------- # Output the buffer in a nice formatted way. We do a lot of work to make sure # we output all that the user requrested in addition to highlighting searched # for terms as appropriate. # # Also, if the user specifies a start and/or end/length then we pad the data # that is output to retain the block positions of all the bytes. For example # if you request to start halfway through a block, the data will be output # with padding such that the next block starts correctly in the output. #----------------------------------------------------------------------------- sub OutputFormattedBuffer { # Get the passed in parameters. my $buffer = $_[0]; my $offset = $_[1]; my $padding = $_[2]; my $matchesref = $_[3]; my %matches = %$matchesref; # The real offset is the offset with the padding, which is usually 0. # The padding can be non-0 when the start happens within the current # buffer. $offset = $offset + $padding; # Look to see if the offset is on a block boundary. If it is not then # we need to figure out padding from the start of the closest block to # where the offset is. my $offsetrem = $offset % $gopt_linewidth; # Modify the offset value so that the math is easier to work with. $offset = $offset - $offsetrem; # If there is some pre-padding required. if ($offsetrem > 0) { # Fill the start of the buffer with '*'. The '*' has no real # significance other than it is easy to see when debugging. It # also needs to be printable for viewing and so it doesn't get # removed in ASCII buffer. $buffer = ('*' x $offsetrem) . $buffer; } # The block number is the offset divided by the block size. my $blocknum = $offset / $gopt_blocksize; # Remove non-printable characters from the ascii buffer. my $asciibuf = RemoveNonPrintableChars ($buffer); # Create the arrays based on the unpack information we determined. my @abuf = unpack $g_abufformat, $asciibuf; my @hbuf = unpack $g_hbufformat, $buffer; # Holder for the line to output my $outline = ""; # For all of the lines in the block. for (my $i = 0; $i < $g_linesperblock; $i++) { # Might as well stop if there is no more to output. last if (length ($abuf[$i]) eq 0); # Create a temp hex buffer with spaces between each pair of hex chars. my $hbuftempspaces = $hbuf[$i]; $hbuftempspaces =~ s/(..)/$1 /g; # Split the strings into arrays so that we can add highlights around # the text and then reconstitute the string later. my @abuftemparr = split //, $abuf[$i]; my @hbuftemparr = split / /, $hbuftempspaces; # Current position is the current line x line width. my $cur = $i * $gopt_linewidth; # Holder for the current match position. my $matchpos = 0; # For each of the matches. Had trouble getting keys sorted properly # to aid short-circuiting the loop. For now just leave unsorted as # the number of keys should be fairly small. for my $j (keys %matches) { # We have to offset the match position with the offset. $matchpos = $j + $offsetrem; # Only process entries we need to. next if (($matchpos < $cur) || ($matchpos >= ($cur + $gopt_linewidth))); # Add the highlighting for ASCII. $abuftemparr[$matchpos - $cur] = $gopt_highlight.$abuftemparr[$matchpos - $cur].$gopt_highoff; # Add the highlighting for hex. $hbuftemparr[$matchpos - $cur] = $gopt_highlight.$hbuftemparr[$matchpos - $cur].$gopt_highoff; } # Reassemble the strings from the arrays so that we can output them. my $abuftemp = join '', @abuftemparr; my $hbuftemp = join ' ', @hbuftemparr; # Remove whitespace at the end of the hex buffer. $hbuftemp =~ s/\s$//; # Determine how many spaces are needed to pad after the current line. my $postspaces = ($gopt_linewidth - length ($abuf[$i])) * 3; # For the first line we need to check for padding before the data. if ($i eq 0) { # If there is some padding required. if ($offsetrem > 0) { # Set the padding for the hex and ASCII buffers as they both # have information before them. substr ($hbuftemp, 0, $offsetrem * 3) = " " x $offsetrem; substr ($abuftemp, 0, $offsetrem) = " " x $offsetrem; } } # Add the padding after the hex buffer. Don't need padding after the # ASCII buffer as it is the last item to be output on a line. $hbuftemp = $hbuftemp . (" " x $postspaces); # Holder for the list of variables to output. my @outlist; # Only output the information if the user wants it. push (@outlist, $blocknum) if ($gopt_outblocknum); push (@outlist, $offset) if ($gopt_outoffsetdec); push (@outlist, $offset) if ($gopt_outoffsethex); push (@outlist, $hbuftemp) if ($gopt_outhex); push (@outlist, $abuftemp) if ($gopt_outascii); # Do the output. printf nicely takes an array as the final argument # allowing us to dynamically pass the fields we want to output. $outline = sprintf ($g_outformat, @outlist); print ("$outline\n"); # Holder for removal of highlighting on PDF output as PDF # does not support ANSI escape characters. Maybe a project # for another day to convert them appropriately ;-) my $newoutline = $outline; my $curindex = 0; # Remove the highlight on for PDF while (($curindex = index ($newoutline, $gopt_highlight)) ne -1) { # Remove. $newoutline = substr ($newoutline, 0, $curindex) . substr ($newoutline, $curindex + length ($gopt_highlight)); } # Remove the highlight off for PDF while (($curindex = index ($newoutline, $gopt_highoff)) ne -1) { # Remove. $newoutline = substr ($newoutline, 0, $curindex) . substr ($newoutline, $curindex + length ($gopt_highoff)); } # Output to PDF small-like. DCULHelper::PrintLnPDFSmall ($newoutline); # Next line please. $offset += $gopt_linewidth; } # Output a new line between blocks if the user wants to have them # nicely separated. DCULHelper::PrintLn ("") if ($gopt_separateblocks eq 1); } #----------------------------------------------------------------------------- # Output summary info on the metrics we obtain about the block read. #----------------------------------------------------------------------------- sub OutputSummaryInfo { # Invocation information page. DCULHelper::PrintPageTitle ("Summary Analysis"); # Capture the passed in parameters. my $buffer = $_[0]; my $offset = $_[1]; # Get the block number. my $blocknum = $offset / $gopt_blocksize; # Analyze the passed in buffer my %metrics = AnalyzeBuffer ($buffer); # The first time through we need to output the column headings. if ($g_outputheadings eq 0) { # No need to do it again. $g_outputheadings = 1; # Block number and offset are not part of the hash. my $outputstr = "blocknum\toffset\t"; # Go through all of the metrics and build the names. foreach my $metric (sort keys %metrics) { # Output the metric name. $outputstr .= "$metric\t"; } # Remove the trailing TAB and print. $outputstr =~ s/\t$//; DCULHelper::PrintLn ($outputstr); } # Block number and offset are not part of the hash. my $outputstr = "$blocknum\t$offset\t"; # Go through all of the metrics and build the values. foreach my $metric (sort keys %metrics) { # Output the value of the metric. $outputstr .= "$metrics{$metric}\t"; } # Remove the trailing TAB and print. $outputstr =~ s/\t$//; DCULHelper::PrintLn ($outputstr); } #----------------------------------------------------------------------------- # Return various metrics of the buffer passed in. #----------------------------------------------------------------------------- sub AnalyzeBuffer { # The buffer to analyze. my $buffer = $_[0]; # Holders for the values of metrics. my $allnulls = 0; my $newlinecount = 0; my $openhtmlcount = 0; my $closehtmlcount = 0; my $closetagcount = 0; my $lettercount = 0; my $longestalphanumcount = 0; my $jfifcount = 0; my $exifcount = 0; my $xmlcount = 0; my $unicode = 0; my $lowestval = 255; my $highestval = 0; # If we cannot find a non-null value, then the buffer only contains # nulls, so we don't need to do any other processing. if ($buffer !~ /[^\0]/) { # The buffer is all nulls. $allnulls = 1; $lowestval = 0; } else { # Capture various metrics about the buffer. while ($buffer =~ /\n/g) { $newlinecount++; } while ($buffer =~ /\/g) { $closetagcount++; } while ($buffer =~ /[a-zA-Z]/g) { $lettercount++; } while ($buffer =~ /jfif/ig) { $jfifcount++; } while ($buffer =~ /exif/ig) { $exifcount++; } while ($buffer =~ /xml/ig) { $xmlcount++; } # Look to see if there is an indication of a unicode buffer. $unicode = 1 if (($buffer =~ /^\xFF\xFE/) || ($buffer =~ /^\xFE\xFF/)); # Loop through buffer finding consecutive alpha numeric string. while ($buffer =~ /([a-zA-Z0-9 \r\n\!\@\#\$\%\^\&\*\(\)\?\-\;\:\<\>\\\/\,\.]+)/g) { # If the string we found is the longest so far. if (length ($1) > $longestalphanumcount) { # Set as the new longest sequence for this buffer. $longestalphanumcount = length ($1); } } # Create an array of characters for easy comparison. my @bufarray = unpack ("C*", $buffer); # Go through the entire contents of the buffer array. for (my $i = 0; $i < (scalar @bufarray); $i++) { # Set cur val if the lowest so far. $lowestval = $bufarray[$i] if ($bufarray[$i] < $lowestval); # Set cur val if the highest so far. $highestval = $bufarray[$i] if ($bufarray[$i] > $highestval); } } # Produce an MD5 hash of the buffer and determine if we have # a match in the MD5 hashes read in initially. my $md5 = md5_hex ($buffer); my $md5hitfile = $g_md5hashes{$md5} || ""; # Holder for the metrics we are going to return. my %metrics; # Set hash entries for each of the metrics. $metrics{"newlinecount"} = $newlinecount; $metrics{"openhtmlcount"} = $openhtmlcount; $metrics{"closehtmlcount"} = $closehtmlcount; $metrics{"closetagcount"} = $closetagcount; $metrics{"lettercount"} = $lettercount; $metrics{"longestalphanumcount"} = $longestalphanumcount; $metrics{"jfifcount"} = $jfifcount; $metrics{"exifcount"} = $exifcount; $metrics{"xmlcount"} = $xmlcount; $metrics{"lowestval"} = $lowestval; $metrics{"highestval"} = $highestval; $metrics{"allnulls"} = $allnulls; $metrics{"md5"} = $md5; $metrics{"md5hitfile"} = $md5hitfile; $metrics{"unicode"} = $unicode; # Return the hash we filled in. return %metrics; } #----------------------------------------------------------------------------- # Capture the first few bytes of each block from the input file. Store that # information in a pair of hashes, one for the ASCII representation and one # for the hex representation. #----------------------------------------------------------------------------- sub CaptureHeaders { # The buffer we are going to look at. my $buffer = $_[0]; # For the ASCII version, remove non-printable characters. my $asciibuf = RemoveNonPrintableChars ($buffer); # Get the data into some nice little strings. my $abuf = unpack "a" . $gopt_captureheadersize, $asciibuf; my $hbuf = unpack "H" . ($gopt_captureheadersize * 2), $buffer; # Add a count to the hashes for future analysis. $g_headers{$hbuf} += 1; $g_headersascii{$hbuf} = $abuf; } #----------------------------------------------------------------------------- # Output the information captured for the headers. #----------------------------------------------------------------------------- sub OutputHeaderInformation { # Invocation information page. DCULHelper::PrintPageTitle ("Header Analysis"); # Print the header information. DCULHelper::PrintLn ("Hex\tASCII\tOccurances"); # For all of the headers we captured. foreach my $key (keys %g_headers) { # If there were enough interesting identical headers. if ($g_headers{$key} >= $gop_numcommonheaders) { # Output the header information in hex and ASCII. DCULHelper::PrintLn ("$key\t$g_headersascii{$key}\t$g_headers{$key}"); } } } #----------------------------------------------------------------------------- # Replace non printable characters with '.' from the passed in buffer and # return the modified buffer. #----------------------------------------------------------------------------- sub RemoveNonPrintableChars { # Capture the passed in buffer parameter. my $buffer = $_[0]; # Replace all unsafe for output characters with a '.'; $buffer =~ s/[\x00-\x1F\x7F-\xFF]/\./g; # Return the buffer to the caller. return $buffer; } #----------------------------------------------------------------------------- # Determine what highlight to output when searching for matches. #----------------------------------------------------------------------------- sub DetermineHighlight { # If the user specified a highlight then use theirs. if (defined $gopt_highlight) { # Get the first character specified by the user. my $firstchar = substr ($gopt_highlight, 0, 1); # Default for turning off highlighting unless user doesn't want # any highlighting. $gopt_highoff = "\e\[0m"; # Pick the correct highlight string. switch ($firstchar) { case "r" { $gopt_highlight = "\e\[31m"; } case "R" { $gopt_highlight = "\e\[1\;31m"; } case "g" { $gopt_highlight = "\e\[32m"; } case "G" { $gopt_highlight = "\e\[1\;32m"; } case "y" { $gopt_highlight = "\e\[33m"; } case "Y" { $gopt_highlight = "\e\[1\;33m"; } case "b" { $gopt_highlight = "\e\[34m"; } case "B" { $gopt_highlight = "\e\[1\;34m"; } case "m" { $gopt_highlight = "\e\[35m"; } case "M" { $gopt_highlight = "\e\[1\;35m"; } case "c" { $gopt_highlight = "\e\[36m"; } case "C" { $gopt_highlight = "\e\[1\;36m"; } case "w" { $gopt_highlight = "\e\[37m"; } case "W" { $gopt_highlight = "\e\[1\;37m"; } case "i" { $gopt_highlight = "\e\[7m"; } case "0" { $gopt_highlight = ""; $gopt_highoff = ""; } else { $gopt_highlight = DEFHIGHLIGHT; } } } else { # Use defaults as the user didn't specify. $gopt_highlight = DEFHIGHLIGHT; $gopt_highoff = DEFUNHIGHLIGHT; } } #----------------------------------------------------------------------------- # Add all of the files specified for searching to a hash of MD5 hashes. The # user has asked us to search for either a particular file, or all files in a # directory. These files are contained in the @g_filestoprocess variable. # We go through and read the contents and create MD5 hashes of either just the # first block, or all blocks of the file. #----------------------------------------------------------------------------- sub AddInputFilesToMD5Hash { # For all of the files the user wants to search for. foreach my $fileprocess (@g_filestoprocess) { # Open the file in binary mode. my $fhin = IO::File->new ($fileprocess); die "Error reading $fileprocess\n" if !defined ($fhin); binmode ($fhin); # Start off at the first block and define the input buffer. my $blocknum = 0; my $buffer = ' ' x $gopt_blocksize; # Loop until we either have no more to read or read less than # a full block or we have done the first block. Note that it # is no use to MD5 hash a partial block as we only compare it to # full MD5 hashes on the other end. while ($fhin->sysread ($buffer, $gopt_blocksize) eq $gopt_blocksize) { # Get the MD5 hash of the current block. my $tempmd5 = md5_hex ($buffer); # There is a slight possibility that the hash already exists. if (exists $g_md5hashes{$tempmd5}) { # Append the details to the previous entry. $g_md5hashes{$tempmd5} = $g_md5hashes{$tempmd5}.";" .basename ($fileprocess).":$blocknum"; } else { # Add the hash followed by the block number of the file. $g_md5hashes{$tempmd5} = basename ($fileprocess).":$blocknum"; } # If the user only wants to show the first block exit from loop. last if ($gopt_firstblockonly ne 0); # Next block number please. $blocknum++; } # Close the input file. close ($fhin); } } #----------------------------------------------------------------------------- # Set the search parameters up based on the search string, unicode and case # sensitivity params. #----------------------------------------------------------------------------- sub ProcessSearchString { # Only if the user specified a search string. if (defined $gopt_searchstr) { # Look to see if the search string starts with 0x indicating # a hex search. if ($gopt_searchstr =~ /^$gopt_hexprefix(.*)$/i) { # Set flag to do hex strings and remove the 0x from the search # string. $gopt_searchstr = $1; $gopt_searchstr = join (" ", $gopt_searchstr =~ /.{1,2}/g); $g_searchtype = HEXSEARCH; } # Look to see if the search string starts with 'md5:' indicating # an md5 search. elsif ($gopt_searchstr =~ /^$gopt_md5prefix(.*)$/i) { # Set flag to do md5 strings and remove the md5: from the # search string. $gopt_searchstr = $1; $g_searchtype = MD5SEARCH; $g_md5hashes{$gopt_searchstr} = "SearchString"; } # If the user specified unicode, which doesn't make sense for hex # or md5 searches. elsif ($gopt_searchunicode eq 1) { # Add null's between all specified characters. $gopt_searchstr =~ s/(.)/$1\0/g; $gopt_searchstr =~ s/\0$//; } # If the user requested case-insensitive searching or we are doing # a hex/md5 search. The hex and md5 searches need to be case # insensitive as we are matching the values. if (($gopt_casesensitive ne 1) || ($g_searchtype eq HEXSEARCH) || ($g_searchtype eq MD5SEARCH)) { # Use extended regular expression syntax to force # case-insensitive searches. We use the extended syntax as # we can do that easily in data rather than hardcoding. $gopt_searchstr = "(?i)$gopt_searchstr"; } } # Otherwise, if an MD5 type search was specified. elsif ((defined $gopt_md5file) || (defined $gopt_searchfile)) { # This is an MD5 type search. so only a direct match works. $g_searchtype = MD5SEARCH; } } #----------------------------------------------------------------------------- # Create a hash of the matches made so that we can highlight the matches in # the output. We take the buffer that we are searching against and iterate # over it with the search string. #----------------------------------------------------------------------------- sub CreateMatchesHash { # Capture the passed in buffer parameter. my $buffer = $_[0]; # Holder for the matches we are returning. my %matches; # If the user specified a search string and we are not doing an # MD5 search where we don't highlight the hits. if ((defined $gopt_searchstr) && ($g_searchtype ne MD5SEARCH)) { # For all of the matches. while ($buffer =~ /$gopt_searchstr/gs) { # If there was a match. if (length ($&) > 0) { # Figure out the position based on pre and post match. my $starti = length ($`); my $endi = length ($&); # If the search type was hex we need to divide the indices # by 3 to account for 3 chars to represent a byte (we added # spaces between the 2 hex chars for alignment). if ($g_searchtype eq HEXSEARCH) { # Adjust. $starti = $starti /3; $endi = $endi /3; } # Fill the hash in for every single match entry to make # for a quick and easy lookup later on. for (my $i = $starti; $i < ($starti + $endi); $i++) { # Add the entry. $matches{$i} = 1; } } } } # Return the matches we found. return %matches; } #----------------------------------------------------------------------------- # Set the defaults for the optional parameters and canonicalize. #----------------------------------------------------------------------------- sub SetDefaults { # Set defaults unless the value has already been defined. $gopt_blocksize = DEFAULT_BLOCK_SIZE unless defined $gopt_blocksize; $gopt_linewidth = 32 unless defined $gopt_linewidth; $gopt_start = 0 unless defined $gopt_start; $gopt_end = -1 unless defined $gopt_end; $gopt_length = -1 unless defined $gopt_length; $gopt_output = "text" unless defined $gopt_output; $gopt_showall = 0 unless defined $gopt_showall; $gopt_casesensitive = 0 unless defined $gopt_casesensitive; $gopt_outblocknum = 1 unless defined $gopt_outblocknum; $gopt_outoffsetdec = 1 unless defined $gopt_outoffsetdec; $gopt_outoffsethex = 1 unless defined $gopt_outoffsethex; $gopt_outascii = 1 unless defined $gopt_outascii; $gopt_outhex = 1 unless defined $gopt_outhex; $gopt_separateblocks = 1 unless defined $gopt_separateblocks; $gopt_firstblockonly = 0 unless defined $gopt_firstblockonly; $gopt_author = "anonymous" unless defined $gopt_author; $gopt_reporttitle = "Untitled" unless defined $gopt_reporttitle; $gopt_outputpdf = 0 unless defined $gopt_outputpdf; $gopt_hexprefix = HEX_PREFIX unless defined $gopt_hexprefix; $gopt_md5prefix = MD5_PREFIX unless defined $gopt_md5prefix; $gopt_searchunicode = 0 unless defined $gopt_searchunicode; $gopt_outputheaders = 0 unless defined $gopt_outputheaders; $gopt_captureheadersize = CAPTURE_HEADER_SIZE unless defined $gopt_captureheadersize; $gop_numcommonheaders = NUM_COMMON_HEADERS unless defined $gop_numcommonheaders; } #----------------------------------------------------------------------------- # Setup all of the output options. #----------------------------------------------------------------------------- sub SetOutputOptions { # Only add format string for chosen output options. if ($gopt_outblocknum eq 1) { $g_outformat .= "%06d : "; } if ($gopt_outoffsetdec eq 1) { $g_outformat .= "%09d : "; } if ($gopt_outoffsethex eq 1) { $g_outformat .= "%08X : "; } if ($gopt_outhex eq 1) { $g_outformat .= "%s : "; } if ($gopt_outascii eq 1) { $g_outformat .= "%s : "; } # Remove the nasty trailing " : ". $g_outformat =~ s/ : $//; # Make the output option lowercase to make comparison simpler. $gopt_output = lc ($gopt_output); # Lines per block is the block divided by the line width. $g_linesperblock = $gopt_blocksize / $gopt_linewidth; # Create unpack strings based on line width. This will give us an # array of linewidth entries. Note that HEX unpacking requires twice # the amount of room (4a == J). my $aunpack = 'a' . $gopt_linewidth; my $hunpack = 'H' . $gopt_linewidth * 2; # Create the final buffer format. $g_abufformat = $aunpack x $g_linesperblock; $g_hbufformat = $hunpack x $g_linesperblock; # If we are outputting binary if ($gopt_output eq "binary") { # We are not going to output to PDF as well in binary mode. $gopt_outputpdf = 0; # Change the mode for standard out, otherwise DOS will try to mangle # the output (Perl Cookbook: 8.11. Processing Binary Files). binmode (STDOUT); } } #----------------------------------------------------------------------------- # Check that we got all of the input parameters we needed and that the values # are consistent. #----------------------------------------------------------------------------- sub CheckCommandLineOptions { die "Must specify an input file.\n" unless defined ($gopt_file); die "$gopt_file must be a file.\n" unless (-f $gopt_file); die "--output must be 'text', 'binary', 'summary', or 'none'.\n" unless (($gopt_output eq OUTPUT_TEXT) || ($gopt_output eq OUTPUT_BINARY) || ($gopt_output eq OUTPUT_SUMMARY) || ($gopt_output eq OUTPUT_NONE)); die "Cannot specify an end as well as a length.\n" if (($gopt_end ne -1) && ($gopt_length ne -1)); die "End ($gopt_end) must be greater than or equal to Start ($gopt_start).\n" unless (($gopt_end >= $gopt_start) || ($gopt_end eq -1)); die "Start ($gopt_start) and End ($gopt_end) must both be >= 0.\n" unless (($gopt_start >= 0) && (($gopt_end >= 0) || ($gopt_end eq -1))); die "Length ($gopt_length) must be >= 0.\n" unless (($gopt_length >= 0) || ($gopt_length == -1)); die "MD5 Input file ($gopt_md5file) must exist\n" if ((defined $gopt_md5file) && (! (-f $gopt_md5file))); die "Search Input file ($gopt_searchfile) must exist\n" if ((defined $gopt_searchfile) && (! ((-f $gopt_searchfile) || (-d $gopt_searchfile)))); die "Cannot specify input file for searching and a search string.\n" if (((defined $gopt_md5file) || (defined $gopt_searchfile)) && (defined $gopt_searchstr)); die "Must specify only one of MD5file and searchfile.\n" if ((defined $gopt_md5file) && (defined $gopt_searchfile)); die "Line width cannot be larger than block size.\n" if ($gopt_linewidth > $gopt_blocksize); die "Line width ($gopt_linewidth) must divide evenly into ". "block size ($gopt_blocksize).\n" if (($gopt_blocksize % $gopt_linewidth) ne 0); die "Cannot specify first block only if search file not specified." if (($gopt_firstblockonly eq 1) && (! defined $gopt_searchfile)); } #----------------------------------------------------------------------------- # Output the usage of this script. #----------------------------------------------------------------------------- sub PrintUsage { # Get the text for the required invocation. my $requiredinvocation = DCULHelper::GetRequiredOptionUsage (\%g_options); # Get the text for the options. my $commandlineoptions = DCULHelper::GetOptionUsage (\%g_options, \%g_optionhelp); print<< "ENDOFUSAGE"; Usage: $g_thisscriptbasename $requiredinvocation [OPTIONS] General usage tool for dissecting and examining disk image files. You can use regular expressions to search the image as well as output sections of the image in text or binary mode. For more technical information please feel free to read the source code which has lots of comments. Command-line options: $commandlineoptions Examples: Show the contents of the image.dat file: $g_thisscriptbasename --file=image.dat Show the contents of the same file with width and block size searching for a JPEG indicator (jfif) and highlighting with bold blue: $g_thisscriptbasename --file=image.dat --linewidth=16 --blocksize=64 --searchstr=jfif --highlight=B Show the summary of the same file this time: $g_thisscriptbasename --file=image.dat --searchstr=jfif --output=summary Show the blocks that match the hashes contained in the md5.txt file: $g_thisscriptbasename --f=image.dat --md5file=md5.txt Show the header summary information only: $g_thisscriptbasename --f=image.dat --output=none --outputheaders Search for the string "the" at the start of a block: $g_thisscriptbasename --f=image.dat "--searchstr=^the" Effectively search for the equivalent of "th." in hex: (please note that regular expressions for hex searches are limited in this version of the script). $g_thisscriptbasename --f=image.dat "--searchstr=0x7468.." ENDOFUSAGE } #----------------------------------------------------------------------------- # Output the version information for this script. #----------------------------------------------------------------------------- sub PrintVersion { print<< "ENDOFVERSION"; $g_thisscriptbasename @{[ VERSION_NUMBER ]} Written by John Goalby. Part of the Data Carving Utility Library (DCUL). http://www.sftsrc.com/DCUL/ Copyright (C) 2006 SoftSource Consulting. ENDOFVERSION } #----------------------------------------------------------------------------- # The end of the road. #-----------------------------------------------------------------------------