/*************************************************************************************

This file is part of FragMend.

Written by Florian Buchholz, Glenn Henderson, David Horvath, and Jeff Jones.

Copyright (c) 2006, Florian Buchholz, Glenn Henderson, David Horvath, and Jeff Jones.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.     
    * Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.       
    * Neither the name of Florian Buchholz, Glenn Henderson, David Horvath, Jeff
      Jones, nor the names of its contributors may be used to endorse or promote
      products derived from this software without specific prior written
      permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*************************************************************************************/

import java.util.Vector;

public class OfficeUtilities {

	public static boolean containsHeader(byte[] data) {
	
		return ((data[0] == -48) &&   // 0xd0
			(data[1] == -49) && //0xcf
			(data[2] == 0x11) &&
			(data[3] == -32) && // 0xe0
			(data[4] == -95) && // 0xa1
			(data[5] == -79) && // 0xb1
			(data[6] == 0x1a) &&
			(data[7] == -31)); // 0xe1

	}
	
	public static boolean containsFooter(byte[] data) {
		
		String text = new String(data);
		
		return text.contains("Word.Document.8");

		}
	
	public static boolean containsDirectory(byte[] data) {
	
		return ((data[0] == 0x52) &&   // R
			(data[1] == 0x00) && 
			(data[2] == 0x6F) && // o
			(data[3] == 0x00) && 
			(data[4] == 0x6F) && // o
			(data[5] == 0x00) && 
			(data[6] == 0x74) && // t
			(data[7] == 0x00) &&
			(data[8] == 0x20) && // <space>
			(data[9] == 0x00) &&
			(data[10] == 0x45) && // E
			(data[11] == 0x00) &&
			(data[12] == 0x6E) && // n
			(data[13] == 0x00) &&
			(data[14] == 0x74) && // t
			(data[15] == 0x00) &&
			(data[16] == 0x72) && // r
			(data[17] == 0x00) && 
			(data[18] == 0x79) && // y
			(data[19] == 0x00)); 

	}

	public static int getSequenceCount(byte[] data) {
		
		int max_count = 0;
		
		int current_count = 1;
		long last_val = 0;
		
		for (int pos = 0; pos < data.length-3; pos += 4) {
			
			if (pos == 0)
				last_val = DataUtilities.getWordLittle(data, pos);
			else {
				long val = DataUtilities.getWordLittle(data, pos);
								
				if (val == last_val + 1) {
					current_count++;
				}
				else {
					if (current_count > max_count)
						max_count = current_count;
					current_count = 1;
				}
								
				last_val = val;
			}
			
		}
		
		if (current_count > max_count)
			max_count = current_count;
		
		return max_count;
		
	}
	
	public static int getSize(byte[] data) {
		
		int pos;		
		
		if (data.length < 512) {
			System.out.println("File too short to contain document header");
			return -1;
		}
		
		if  (!((data[0] == -48) &&   // 0xd0
			(data[1] == -49) && //0xcf
			(data[2] == 0x11) &&
			(data[3] == -32) && // 0xe0
			(data[4] == -95) && // 0xa1
			(data[5] == -79) && // 0xb1
			(data[6] == 0x1a) &&
			(data[7] == -31))) { // 0xe1
				System.out.println("No document file identifier found");
				return -1;
		}
		
		System.out.println("Byte order: " + String.format("0x%x", DataUtilities.getShortBig(data, 28)));
		
		int sector_size = 1 << DataUtilities.getShortLittle(data, 30);
		
		System.out.println("Document sector size: " + sector_size);
		
		int short_sector_size = 1 << DataUtilities.getShortLittle(data, 32);
		
		System.out.println("Document short sector size: " + short_sector_size);
		
		long sat_sector_count = DataUtilities.getWordLittle(data, 44);
		
		System.out.println("Sectors in sector allocation table: " + sat_sector_count);
		
		long directory_sid = DataUtilities.getWordLittle(data, 48);
		
		System.out.println("Directory stream starts at sector: " + directory_sid);
		
		long min_std_stream_size = DataUtilities.getWordLittle(data, 56);
		
		System.out.println("Minimum standard stream size: " + min_std_stream_size);
		
		long short_sector_table = DataUtilities.getWordLittle(data, 60);
		
		System.out.println("Short-sector allocation table starts at sector: " + short_sector_table);
		
		long short_sector_count = DataUtilities.getWordLittle(data, 64);
		
		System.out.println("Short sector count: " + short_sector_count);
		
		long master_sector_table = DataUtilities.getWordLittle(data, 68);
		
		System.out.println("Master sector allocation table starts at sector: " + master_sector_table);
		
		long master_sector_count = DataUtilities.getWordLittle(data, 72);
		
		System.out.println("Master sector count: " + master_sector_count);
		
		long last_sector = DataUtilities.getWordLittle(data, 76);
		
		System.out.println("Sectors in master allocation table:");
		
		long[] msat = new long[(int)sat_sector_count];
		int index = 0;
		
		for (pos = 80; pos < 512; pos += 4) {
			
			long current_sector = DataUtilities.getWordLittle(data, pos);
						
			msat[index] = last_sector;
			
			if (current_sector == 0xffffffffL) {
				System.out.println(last_sector);
				break;
			}
			else
				System.out.print(last_sector + ", ");
			
			last_sector = current_sector;
			index++;
			
		}
		
		try {
		
			/* load SAT */
			
			System.out.println("Loading SAT");
						
			long[] sat = getSAT(data, msat, ((int)sector_size));
			
			int max;
			
			for (max = sat.length-1; max > 0; max--)
				if (sat[max] != 0xffffffffL)
					break;
			
			System.out.println("Highest sector number found: " + max);
			
			/* load SSAT */
			
			long[] ssat = new long[0];
			
			if (short_sector_table != 0xfffffffeL) {
			
				System.out.println("\nLoading sectors for SSAT\n");
			
				long[] ssat_sectors = getStreamSectors(data, sat, short_sector_table);
			
				System.out.println("\nLoading SSAT\n");
			
				ssat = getSAT(data, ssat_sectors, ((int)sector_size));
			}
			else
				System.out.println("There is no short sector stream");
			
			/* Process root directory entry 0 */
			
			pos = 512 + ((int)directory_sid) * ((int)sector_size);
			
			long short_sector_stream_start = DataUtilities.getWordLittle(data, pos+116);
			
			System.out.println("Short sector stream id: " + short_sector_stream_start);
			
			long short_sector_stream_size = DataUtilities.getWordLittle(data, pos+120);
			
			System.out.println("Short sector stream size: " + short_sector_stream_size);

			long[] short_stream_sectors = getStreamSectors(data, sat, short_sector_stream_start);

			System.out.println("Short sector stream consists of sectors: ");
			printArrayRange(short_stream_sectors);
						
			byte[] ssid_data = getStream(data, short_stream_sectors, (int)short_sector_stream_size, (int)sector_size);
			
			System.out.println("Short stream data:\n");
			
			long[] dir_stream_sectors = getStreamSectors(data, sat, directory_sid);
			System.out.println("Root directory stream consists of sectors: ");
			printStreamSectors(dir_stream_sectors);

			printArrayRange(dir_stream_sectors);
			

			byte[] directory_data = getStream(data, dir_stream_sectors, 0, (int) sector_size);

			System.out.println("Root directory:\n");
			
			processDirectory(directory_data, sat, ssat, min_std_stream_size, (int) sector_size, data);
			
			return 512 + (max+1) * (int)sector_size;
			
		}
		catch (IndexOutOfBoundsException ioobe) {
			System.out.println("Can't access byte " + pos + ". Data length: " + data.length);
		}
		
		return -1;
		
	}
	
	private static long[] getSAT(byte[] data, long[] msat_table, int sector_size) throws IndexOutOfBoundsException {
				
		long[] ret = new long[msat_table.length * sector_size / 4];
		
		int pos = 0;
		
		for (int i = 0; i < msat_table.length; i++) {
			
			int byte_pos = 512 + ((int)msat_table[i]) * sector_size;
			
			System.out.println("Getting SIDs from sector " + msat_table[i] + " Byte position: " + byte_pos);

			for (int j = 0; j < sector_size; j += 4) {
				
				ret[pos] = DataUtilities.getWordLittle(data, byte_pos+j);
				
				System.out.println("SAT Entry " + pos + ": " + ret[pos]);
				
				pos++;
			}
		}
		
		return ret;
		
	}
	
	private static long[] getStreamSectors(byte[] data, long[] sat, long start_sid) {
		
		Vector<Long> sids = new Vector<Long>();
		
		sids.add(new Long(start_sid));
		
		long next_sid = start_sid;
		
		try {
		
			next_sid = sat[(int)start_sid];
		
			while (next_sid != 0xfffffffeL) {
				if (next_sid == start_sid) {
					System.out.println("!!! Detected loop in (S)SAT table, aborting !!!");
					return new long[0];
				}
				sids.add(new Long(next_sid));
				next_sid = sat[(int)next_sid];
			}
		}
		catch (IndexOutOfBoundsException ioobe) {
		
			System.out.println("Getting stream SIDs: error accessing SAT entry " + next_sid);
			
		}
		
		long[] ret = new long[sids.size()];
		
		for (int i = 0; i < sids.size(); i++) {
			ret[i] = sids.elementAt(i).longValue();
//			System.out.println("Sector " + i + ": " + ret[i]);
		}
		
		return ret;
		
	}
	
	private static void printStreamSectors(long[] sectors) {
	
		for (int i = 0; i < sectors.length; i++) {
			
			if (i == sectors.length-1)
				System.out.print(sectors[i] + "\n");
			else
				System.out.print(sectors[i] + ", ");
			
		}
		
	}
	
	private static byte[] getStream(byte[] data, long[] sectors, int size, int sector_size) {
		
		if (size == 0) 
			size = sectors.length * sector_size;
		
		byte[] ret = new byte[size];
		
		int counter = 0;
		int pos = 0;
		
		try {
			for (int i = 0; i < sectors.length; i++) {
				long current_sector = sectors[i];
				pos = getBytePos(current_sector, sector_size);
				for (int j = 0; j < sector_size; j++) {
					ret[counter] = data[pos+j];
					counter++;
					if (counter >= size)
						break;
				}
			}
		}
		catch (IndexOutOfBoundsException ioobe) {
			System.out.println(ioobe);
			System.out.println("Count: " + counter + " absolute byte: " + pos);
		}
		return ret;
	}
	
	private static byte[] getShortStream(byte[] data, long[] ssat, long start_entry, int size, int short_sector_size, Vector<Long> sectors) {
		
		byte[] ret = new byte[size];
		
		long current_sector = start_entry;
		int counter = 0;
		int pos = 0;
		
		try {
		
			while (current_sector != 0xfffffffeL) {
		
				pos = ((int)current_sector) * short_sector_size;
				for (int i = 0; i < short_sector_size; i++) {
					ret[counter] = data[pos+i];
					counter++;
					if (counter >= size)
						break;
				}
				sectors.add(new Long(current_sector));
				current_sector = ssat[(int)current_sector];
			}
		}
		catch (IndexOutOfBoundsException ioobe) {
			System.err.println(ioobe);
		}
		return ret;
	}
	
	private static void processDirectory(byte[] data, long[] sat, long[]ssat, long min_std_stream_size, int sector_size, byte[] stream) {
		
		for (int i = 0; i < data.length; i += 128) {
		
			System.out.println("DID " + i/128);
			
			String name;
			
			try {
				name = new String(data, i, 64, "UTF-16LE");
			}
			catch (java.io.UnsupportedEncodingException uee) {
				return;
			}
	
			System.out.println("Name: " + name);
		
			System.out.println("Type: " + getDirEntryType(data[i+66]));
			
			System.out.println("Left: " + DataUtilities.getWordLittle(data, i+68));
			System.out.println("Right: " + DataUtilities.getWordLittle(data, i+72));
			System.out.println("Root node: " + DataUtilities.getWordLittle(data, i+76));
			
			long sid = DataUtilities.getWordLittle(data, i+116);
			
			System.out.println("SID: " + sid);
			
			long stream_size = DataUtilities.getWordLittle(data, i+120);
			
			System.out.println("Stream size: " + stream_size);
			
			if ((data[i+66] == 1) || (data[i+66] == 2)) {
				long[] stream_sectors;
				String prefix = "";
				
				if ((int)stream_size < min_std_stream_size) {
					stream_sectors = getStreamSectors(data, ssat, sid);
					prefix = "Short ";
				}
				else
					stream_sectors = getStreamSectors(data, sat, sid);
				
				System.out.println(prefix + "Stream consists of sectors ");
				printArrayRange(stream_sectors);
				
				/*
				System.out.println("Name: " + name + " length: " + name.length() +
					" Workbook length: " + new String("Workbook").length() +
					" equals: " + name.equals("Workbook") + " matches: " +
					name.matches(".*Workbook.*"));
				*/
				
				if (name.matches(".*Workbook.*")) {
					
					byte[] wb_data = getStream(stream, stream_sectors, (int) stream_size, sector_size);

					/*
					System.out.println("Data start:\n");
					
					for (int k = 0; k < 128; k++)
						System.out.print(wb_data[k]);
					*/
					
					System.out.println("\nProcessing Excel Workbook:\n");
					processWorkBook(wb_data, sector_size);
				}
			}
			
			System.out.print("\n");
		}
		
	}
	
	public static void processWorkBook(byte[] data, int sector_size) {
		
		int pos = 0;
		
		while (pos < data.length) {
			
			try {
				
				int type = DataUtilities.getShortLittle(data, pos);
				int size = DataUtilities.getShortLittle(data, pos+2);
				int sector = pos / sector_size;
				
				System.out.println(pos + "(" + sector + ")" + ": Record type: " 
					+ getWorkBookRecordType(type) + " Size: " + size);
					
				pos += 4 + size;
				
			}
			catch (IndexOutOfBoundsException ioobe) {
				System.out.println(ioobe);
				return;
			}
		}
	}
	
	private static int getBytePos(long sid, int sector_size) {
		return 512 + ((int)sid) * sector_size;
	}
	
	private static void printArrayRange(long[] values) {
		
		String range = "";
		int start = 0;
		for(int i =0;i<values.length;i++) {
			if(i+1 >= values.length) {
				//last one
				if(start != i) {
					range += values[start] + "-" + values[i];
					break;
				} else {
					range += values[i];
					break;
				}
			}
			if(values[i]+1 != values[i+1]) {
				if(start != i) {
					//section
					range += values[start] + "-" + values[i] +", ";
				} else {
					//single
					range += values[i] + ", ";
				}
				start = i+1;
			}
		}
		System.out.println(range);
/*
		long start_of_range = values[0];
		long last_in_range = start_of_range;
		
		for (int i = 1; i < values.length; i++) {
			
			if (last_in_range == values[i] - 1) {
				last_in_range = values[i];
				continue;
			}
			
			if (start_of_range == last_in_range)
				System.out.print(start_of_range);
			else
				System.out.print(start_of_range + "-" + last_in_range);
			
			start_of_range = values[i];
			last_in_range = start_of_range;
			
			if (i < values.length - 1)
				System.out.print(", ");
			
		}
		
		System.out.print("\n");
*/
		
	}
	
	private static String getDirEntryType(byte type) {
		switch (type) {
			case 0:	return "Empty";
			case 1: return "User storage";
			case 2: return "User stream";
			case 3: return "LockBytes";
			case 4: return "Property";
			case 5: return "Root storage";
		}
		
		return "Unknown";
	}
	
	private static String getWorkBookRecordType(int type) {
		
		switch(type) {
		
			case 0x0006: return "FORMULA";
			case 0x000a: return "EOF";
			case 0x000c: return "CALCCOUNT";
			case 0x000d: return "CALCMODE";
			case 0x000e: return "PRECISION";
			case 0x000f: return "REFMODE";
			case 0x0010: return "DELTA";
			case 0x0011: return "ITERATION";
			case 0x0012: return "PROTECT";
			case 0x0013: return "PASSWORD";
			case 0x0014: return "HEADER";
			case 0x0015: return "FOOTER";
			case 0x0017: return "EXTERNSHEET";
			case 0x0018: return "NAME";
			case 0x0019: return "WINDOWPROTECT";
			case 0x001a: return "VERTICALPAGEBREAKS";
			case 0x001b: return "HORIZONTALPAGEBREAKS";
			case 0x001c: return "NOTE";
			case 0x001d: return "SELECTION";
			case 0x0022: return "DATEMODE";
			case 0x0026: return "LEFTMARGIN";
			case 0x0027: return "RIGHTMARGIN";
			case 0x0028: return "TOPMARGIN";
			case 0x0029: return "BOTTOMMARGIN";
			case 0x002a: return "PRINTHEADERS";
			case 0x002b: return "PRINTGRIDLINES";
			case 0x002f: return "FILEPASS";
			case 0x0031: return "FONT";
			case 0x003c: return "CONTINUE";
			case 0x003d: return "WINDOW1";
			case 0x0040: return "BACKUP";
			case 0x0041: return "PANE";
			case 0x0042: return "CODEPAGE";
			case 0x0051: return "DCONREF";
			case 0x0055: return "DEFCOLWIDTH";
			case 0x0059: return "XCT";
			case 0x005a: return "CRN";
			case 0x005b: return "FILESHARING";
			case 0x005c: return "WRITEACCESS";
			case 0x005e: return "UNCALCED";
			case 0x005f: return "SAVERECALC";
			case 0x0063: return "OBJECTPROTECT";
			case 0x007d: return "COLINFO";
			case 0x0080: return "GUTS";
			case 0x0081: return "WSBOOL";
			case 0x0082: return "GRIDSET";
			case 0x0083: return "HCENTER";
			case 0x0084: return "VCENTER";
			case 0x0085: return "BOUNDSHEET";
			case 0x0086: return "WRITEPROT";
			case 0x008c: return "COUNTRY";
			case 0x008d: return "HIDEOBJ";
			case 0x0090: return "SORT";
			case 0x0092: return "PALETTE";
			case 0x0099: return "STANDARDWIDTH";
			case 0x00a0: return "SCL";
			case 0x00a1: return "SETUP";
			case 0x00bd: return "MULRK";
			case 0x00be: return "MULBLANK";
			case 0x00d6: return "RSTRING";
			case 0x00d7: return "DBCELL";
			case 0x00da: return "BOOKBOOL";
			case 0x00dd: return "SCENPROTECT";
			case 0x00e0: return "XF";
			case 0x00e5: return "MERGEDCELLS";
			case 0x00e9: return "BITMAP";
			case 0x00ef: return "PHONETIC";
			case 0x00fc: return "SST";
			case 0x00fd: return "LABELSST";
			case 0x00ff: return "EXTSST";
			case 0x015f: return "LABELRANGES";
			case 0x0160: return "USESELFS";
			case 0x0161: return "DSF";
			case 0x01ae: return "SUPBOOK";
			case 0x01b0: return "CONDFMT";
			case 0x01b2: return "DVAL";
			case 0x01b8: return "HLINK";
			case 0x01be: return "DV";
			case 0x0200: return "DIMENSIONS";
			case 0x0201: return "BLANK";
			case 0x0203: return "NUMBER";
			case 0x0204: return "LABEL";
			case 0x0205: return "BOOLERR";
			case 0x0207: return "STRING";
			case 0x0208: return "ROW";
			case 0x020b: return "INDEX";
			case 0x0221: return "ARRAY";
			case 0x0225: return "DEFAULTROWHEIGHT";
			case 0x0236: return "TABLEOP";
			case 0x023e: return "WINDOW2";
			case 0x027e: return "RK";
			case 0x0293: return "STYLE";
			case 0x041e: return "FORMAT";
			case 0x04bc: return "SHRFMLA";
			case 0x0800: return "QUICKTIP";
			case 0x0809: return "BOF";
			case 0x0862: return "SHEETLAYOUT";
			case 0x0867: return "SHEETPROTECTION";
			case 0x0868: return "RANGEPROTECTION";
			
		}
		
		return "UNKNOWN ("+type+")";
	}
	
}

