# E.V.O.: Search for Eden Decompressor # Written by Alchemic # 2011 Jul 22 # # # # A terse description of the compression format: # # LZSS with a 4096-byte sliding window. Depending on a header # byte, it is possible to copy 0-397 previously-seen bytes. # References to past data are relative to the current output # position. Control bits (which indicate whether the following # commands are pastcopies or literals) are grouped into eights # and packed into their own bytes. Compressed data is padded # to fit in an even number of bytes. (Word-aligned data?) # # # # In greater detail: # # - Compressed data is prefixed with a three-byte header: an # 8-bit integer related to the size of pastcopies, and a # 16-bit integer indicating the length of the data once # decompressed. # # - Following this is the compressed data, which is comprised # of blocks. Each block contains one control byte and eight # commands. The last block may have fewer than eight. # # - If the last block DOES have eight commands, a null control # byte (0x00) will follow. I think it's just a leftover of # how the programmers' compressor worked ("Write a control # byte after writing eight commands, update its bits as you # write the next eight"). Since there's no more data, it # serves no purpose except to waste 1-2 bytes of storage. # # - The control byte indicates which of the next eight commands # is pastcopy (zero) or literal (one). The control byte is # read one bit at a time, least significant to most (0x01, # 0x02, 0x04 ... 0x80). # # - The commands: # # Pastcopy (0) = [SSSSSSSS LLLLSSSS] <-- Normal # Pastcopy (0) = [SSSSSSSS LLLLSSSS PPPPPPPP] <-- Extended # Literal (1) = [NNNNNNNN] # # - Pastcopy is a bit complicated. Let's take it in parts. # # - PASTCOPY SOURCE # First, we read two bytes as a 16-bit integer. # The low twelve bits of this word (0x-SSS) are the source. # To get the actual source address, add one to the source # value, and subtract this number from the current writing # location. A source of 0 gets the previous byte, 1 gets # the byte before that, and so on. # # - PASTCOPY LENGTH # Look again at the initial 16-bit integer. # The high four bits (0xL---) are the length. # If the length is 0xF and the 0x80 bit of the first byte of # the header is set, it's an extended pastcopy. Read another # byte and add its value to the length. # Finally, regardless of type, add: # (First header byte & 0x7F) # To the length. # # - The maximum length of a pastcopy: # F = Maximum possible original L value # + FF = Maximum possible extended pastcopy value # + 7F = Maximum result of (First header byte & 0x7F) # ----- # 0x18D = 397 bytes # # - Literal is exactly what it says on the tin. The N argument # is one uncompressed byte. import sys import struct import array # Check for incorrect usage. argc = len(sys.argv) if argc < 3 or argc > 4: sys.stdout.write("Usage: ") sys.stdout.write("{0:s} ".format(sys.argv[0])) sys.stdout.write(" [outFile]\n") sys.exit(1) # Copy the arguments. romFile = sys.argv[1] startOffset = int(sys.argv[2], 16) outFile = None if argc == 4: outFile = sys.argv[3] # Open the ROM. romStream = open(romFile, "rb") romStream.seek(startOffset) # Prepare for decompression. firstByte = struct.unpack(">= 12 if (copyLength == 0xF) and bool(firstByte & 0x80): copyLength += struct.unpack("= decompSize: copyLength = decompSize - decompPos # Copy the past data. for i in range(copyLength): decomp[decompPos] = decomp[decompPos - copySource] decompPos += 1 # Prepare to handle the next control bit. controlMask <<= 1 if controlMask > 0x80: controlByte = struct.unpack("