# Terranigma Decompressor # Written by Alchemic # 2011 Sep 10 # # # # A detailed description of the compression format: # # - Compressed data is prefixed with a four-byte header: # * uint8 Mystery byte # * uint16 Length of the data, once decompressed # * uint8 First byte of the decompressed output # # - Following this is the compressed data itself, which is # comprised of blocks. Each block contains one control byte # and a variable number of additional bytes, which act as # arguments for the control byte's commands. # # - The control byte is read from the most significant bit to # the least (0x80, 0x40, 0x20 ... 0x01). Commands may be # split across multiple control bytes. # # - The commands: # # Literal (1) = [NNNNNNNN] # Pastcopy A (01) = [SSSSSSSS SSSSSLLL] <-- Normal # Pastcopy A (01) = [SSSSSSSS SSSSSLLL XXXXXXXX] <-- Zero-length case # Pastcopy B (00xx) = [SSSSSSSS] # # - Literal is exactly what it says on the tin. The N argument # is one uncompressed byte. # # - Pastcopy A reads a BIG-ENDIAN 16-bit integer. The high 13 # bits indicate the pastcopy source, and the low 3 indicate # the pastcopy length. # # e.g. Pastcopy A with [FF C5] as the argument word: # # Source = [ 11111 11111000] # This is sign-extended to a full 16 bits (OR 0xE000): # Source = [11111111 11111000] # = -8 # # Length = [101] = 5 # Add 2 to the length # Length = 7 # # - Pastcopy A's behaviour changes if the given length is 0. # Read an additional byte - if it's nonzero, add 1 to it # to get the new pastcopy length; and if it's zero, we've # reached the end of the compressed data. # # - Pastcopy B reads an 8-bit integer, which indicates the # pastcopy source. The length depends on the last two bits # of the actual command. # # e.g. Pastcopy B via (0010), [F0] # # Source = [ 11110000] # This is sign-extended to a full 16 bits (OR 0xFF00): # Source = [11111111 11110000] # = -16 # # Length = (10) = 2 # Add 2 to the length # Length = 4 # # # # This program currently only decompresses data with a mystery # byte of 0x00 or 0x01. Anecdotally, this covers most (all?) of # Terranigma's compressed data. import sys import struct import array # Check for incorrect usage. argc = len(sys.argv) if argc < 3 or argc > 4: sys.stdout.write("Usage: ") sys.stdout.write("{0:s} ".format(sys.argv[0])) sys.stdout.write(" [outFile]\n") sys.exit(1) # Copy the arguments. romFile = sys.argv[1] startOffset = int(sys.argv[2], 16) outFile = None if argc == 4: outFile = sys.argv[3] # Open the ROM. romStream = open(romFile, "rb") romStream.seek(startOffset) # Header - Mystery byte mysteryByte = struct.unpack(">= 1 # Determine the next command if nextBit == True: # (1) - Literal case literalByte = struct.unpack(">= 1 copySource = 0 copyLength = 0 if nextBit == True: # (01) - Pastcopy A case # Note that this word is big-endian pastCopy = struct.unpack(">H", romStream.read(2))[0] # Copy source copySource = pastCopy copySource >>= 3 copySource &= 0x1FFF copySource -= 0x2000 # Copy length copyLength = pastCopy copyLength &= 0x0007 # Handle the zero-length case if copyLength != 0: copyLength += 2 else: copyLength = struct.unpack(">= 1 if nextBit == True: copyLength += 2 if controlMask == 0x00: controlByte = struct.unpack(">= 1 if nextBit == True: copyLength += 1 copyLength += 2 # Copy source copySource = struct.unpack("= decompSize: copyLength = decompSize - decompPos # Copy the past data for i in range(copyLength): decomp[decompPos] = decomp[decompPos + copySource] decompPos += 1 # Report the last offset. lastOffset = romStream.tell() - 1 sys.stdout.write("Last offset read, inclusive: {0:X}\n".format(lastOffset)) # Write the decompressed output, if appropriate. if outFile is not None: outStream = open(outFile, "wb") decomp.tofile(outStream) outStream.close() # Exit. sys.exit(0)