From 0544a39dc3cbefef2917be8d6a3b5c057da22a42 Mon Sep 17 00:00:00 2001 From: modeco80 Date: Fri, 8 Jul 2022 04:23:46 -0500 Subject: [PATCH] Init version of jmmt_tools --- .clang-format | 61 +++++++ .gitignore | 10 ++ CMakeLists.txt | 13 ++ README.md | 30 ++++ cmake/Policies.cmake | 45 +++++ include/jmmt/crc.h | 21 +++ include/jmmt/lzss.h | 39 +++++ include/jmmt/package.h | 92 ++++++++++ src/libjmmt/CMakeLists.txt | 13 ++ src/libjmmt/crc.cpp | 75 ++++++++ src/libjmmt/lzss.cpp | 114 +++++++++++++ src/tools/CMakeLists.txt | 17 ++ src/tools/jmmt_pack_extractor.cpp | 272 ++++++++++++++++++++++++++++++ src/tools/jmmt_renamer.cpp | 110 ++++++++++++ 14 files changed, 912 insertions(+) create mode 100755 .clang-format create mode 100644 .gitignore create mode 100755 CMakeLists.txt create mode 100644 README.md create mode 100755 cmake/Policies.cmake create mode 100644 include/jmmt/crc.h create mode 100644 include/jmmt/lzss.h create mode 100644 include/jmmt/package.h create mode 100644 src/libjmmt/CMakeLists.txt create mode 100644 src/libjmmt/crc.cpp create mode 100644 src/libjmmt/lzss.cpp create mode 100644 src/tools/CMakeLists.txt create mode 100644 src/tools/jmmt_pack_extractor.cpp create mode 100644 src/tools/jmmt_renamer.cpp diff --git a/.clang-format b/.clang-format new file mode 100755 index 0000000..343112f --- /dev/null +++ b/.clang-format @@ -0,0 +1,61 @@ +# Clang-Format file + +# google style is the closest unfortunately +BasedOnStyle: Google + +# force T* or T& +# rather than T * or T & +DerivePointerAlignment: false +PointerAlignment: Left + +# I think if these two aren't the same +# it won't indent with tabs even with UseTab set to Always +TabWidth: 4 +IndentWidth: 4 + +UseTab: Always + +IndentPPDirectives: BeforeHash + +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never + +BinPackArguments: true +BinPackParameters: true +BreakConstructorInitializers: BeforeColon +BreakStringLiterals: false + +# 130 columns is good but causes some weird issues I don't quite like +# especially in some codebases +#ColumnLimit: 130 +ColumnLimit: 0 +CompactNamespaces: false + +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ContinuationIndentWidth: 0 + +# turning this on causes major issues with initalizer lists, +# turn it off +Cpp11BracedListStyle: false + +# this is turned on to allow something like: +# +# T MyTValue { +# initalizer list... +# }; +SpaceBeforeCpp11BracedList: true + +FixNamespaceComments: true + +NamespaceIndentation: All +ReflowComments: true + +SortIncludes: CaseInsensitive +SortUsingDeclarations: true + + +SpacesInSquareBrackets: false +SpaceBeforeParens: Never +SpacesBeforeTrailingComments: 1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4dd14ed --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/.idea +cmake-build-* +build/ + +# swap +*.kate-swp +*.swp + +# TEMPORARY!!!! +/attic \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100755 index 0000000..40f3eb1 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.10) + +# Prohibit in-source tree builds. +if(" ${CMAKE_SOURCE_DIR}" STREQUAL " ${CMAKE_BINARY_DIR}") + message(FATAL_ERROR "In-source builds are strictly prohibited.") +endif() + +include(cmake/Policies.cmake) + +project(jmmt_tools) + +add_subdirectory(src/libjmmt) +add_subdirectory(src/tools) diff --git a/README.md b/README.md new file mode 100644 index 0000000..a5fd4eb --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +## JMMT tools + +Tools to work with files for Jonny Moseley Mad Trix on the PS2. + +## The Libraries + +### libjmmt + +A library which has: +- the CRC32 hash implementation(s) used by the game +- the LZSS decompression implementation used by the game +- Documented package file structures + +Used by the following tools. + +## The Tools + +### `jmmt_renamer` + +Renames the .dat files in data/ on the disc to filenames which are actually useful. + +### `jmmt_pack_extractor` + +Extractor for .pak files. Unlike the BMS script, this extractor takes into account several things about the format. + +Currently kind of crashy. Does extract a few archives in full though. + +### `jmmt_met_extractor` + +TODO diff --git a/cmake/Policies.cmake b/cmake/Policies.cmake new file mode 100755 index 0000000..e81dfc5 --- /dev/null +++ b/cmake/Policies.cmake @@ -0,0 +1,45 @@ +# CMake policy configuration + +if(POLICY CMP0026) + cmake_policy(SET CMP0026 NEW) +endif() + +if(POLICY CMP0042) + cmake_policy(SET CMP0042 NEW) # CMake 3.0+ (2.8.12): MacOS "@rpath" in target's install name +endif() + +if(POLICY CMP0046) + cmake_policy(SET CMP0046 NEW) # warn about non-existed dependencies +endif() + +if(POLICY CMP0051) + cmake_policy(SET CMP0051 NEW) +endif() + +if(POLICY CMP0054) # CMake 3.1: Only interpret if() arguments as variables or keywords when unquoted. + cmake_policy(SET CMP0054 NEW) +endif() + +if(POLICY CMP0056) + cmake_policy(SET CMP0056 NEW) # try_compile(): link flags +endif() + +if(POLICY CMP0066) + cmake_policy(SET CMP0066 NEW) # CMake 3.7: try_compile(): use per-config flags, like CMAKE_CXX_FLAGS_RELEASE +endif() + +if(POLICY CMP0067) + cmake_policy(SET CMP0067 NEW) # CMake 3.8: try_compile(): honor language standard variables (like C++11) +endif() + +if(POLICY CMP0068) + cmake_policy(SET CMP0068 NEW) # CMake 3.9+: `RPATH` settings on macOS do not affect `install_name`. +endif() + +if(POLICY CMP0075) + cmake_policy(SET CMP0075 NEW) # CMake 3.12+: Include file check macros honor `CMAKE_REQUIRED_LIBRARIES` +endif() + +if(POLICY CMP0077) + cmake_policy(SET CMP0077 NEW) # CMake 3.13+: option() honors normal variables. +endif() diff --git a/include/jmmt/crc.h b/include/jmmt/crc.h new file mode 100644 index 0000000..74b1b34 --- /dev/null +++ b/include/jmmt/crc.h @@ -0,0 +1,21 @@ +#ifndef JMMT_CRC_H +#define JMMT_CRC_H + +#include + +namespace jmmt { + /** + * Result type of HashString()/HashStringCase(). + */ + using crc32_t = std::uint32_t; + + crc32_t HashString(const char* s); + + /** + * Hash a case-sensitive string. + */ + crc32_t HashStringCase(const char* s); + +} // namespace jmmt + +#endif // JMMT_CRC_H diff --git a/include/jmmt/lzss.h b/include/jmmt/lzss.h new file mode 100644 index 0000000..476e5d8 --- /dev/null +++ b/include/jmmt/lzss.h @@ -0,0 +1,39 @@ +#ifndef JMMT_LZSS_H +#define JMMT_LZSS_H + +#include + +namespace jmmt { + + struct LzssHeader { + std::uint32_t next; // done to keep data layout consistent with PS2 + std::uint8_t cByteId; + std::uint8_t cHdrSize; // should be sizeof(LzssHeader) + std::uint8_t nMaxMatch; + std::uint8_t nFillByte; + std::uint16_t nRingSize; + std::uint16_t nErrorId; + std::uint32_t nUnCompressedBytes; + std::uint32_t nCompressedBytes; + std::uint32_t nCRC; + std::uint32_t nFileId; + std::uint32_t nCompressedDataCRC; + }; + + static_assert(sizeof(LzssHeader) == 0x20, "LzssHeader doesn't match game expectations, you are CERTAINLY breaking structures"); + + /** + * Decompress TECH LZSS data. + * + * \param[in,out] header LZSS header. Unused. Set to nullptr for now. + * \param[in] compressedInput LZSS compressed input data. + * \param[in] compressedLength Compressed length. + * \param[out] destBuffer Destination buffer. + * + * \return 0 on success. Non zero value means error. + */ + int DecompressLzss(LzssHeader* header, std::uint8_t* compressedInput, std::int32_t compressedLength, std::uint8_t* destBuffer); + +} // namespace jmmt + +#endif diff --git a/include/jmmt/package.h b/include/jmmt/package.h new file mode 100644 index 0000000..e213490 --- /dev/null +++ b/include/jmmt/package.h @@ -0,0 +1,92 @@ +// JMMT PAK structures + +#ifndef JMMT_PACKAGE_H +#define JMMT_PACKAGE_H + +#include + +// for LzssHeader +#include + +namespace jmmt { + + template + struct BasicStructureWithMagic { + constexpr static TMagic TypeMagic = ValidMagic; + }; + + template + using FourCCMagic = BasicStructureWithMagic; + + // This is the "file header" of sorts. + struct PackageEofHeader { + std::uint32_t headerSize; + std::uint32_t debugInfoSize; + + std::uint32_t headerStartOffset; + }; + + struct PackageGroup : public FourCCMagic<0x50524750 /* 'PGRP' */> { + uint32_t magic; + uint32_t groupNameCrc; + + uint32_t fileCount; + uint32_t padding; // 0xcdcdcdcd - padding to 0x10 bytes + }; + + struct PackageFile : public FourCCMagic<0x4C494650 /* 'PFIL' */> { + uint32_t magic; + uint32_t unk[2]; // Don't know what these are? + + // Sequence number of the chunk. + // This represents the order of each chunk, + // presumably so order can just be whatever. + // + // However, the archives seem to order chunks for files + // in order, and doesn't start/interleave other files + // in between of files. + // + // In other words: this is a nice waste of 16 bits. + uint16_t chunkSequenceNumber; + + // Amount of chunks which need to be read + // from to read this file completely. + // + // 1 means this file starts and ends on this chunk. + uint16_t chunkAmount; + + // A CRC32 hash of the path of this file. + // Hashed with jmmt::HashString(). + uint32_t filenameCrc; + + uint32_t unk2[7]; // more unknown stuff I don't know about yet + + // Uncompressed size of this file chunk. Has a maximum of 65535 bytes. + uint32_t chunkSize; + + // Offset where this file chunk should start, + // inside of a larger buffer. + uint32_t blockOffset; + + // Compressed (stored) size of this chunk. + uint32_t compressedChunkSize; + + // Offset inside of the package file where + // the compressed data blob starts. + uint32_t dataOffset; + + uint32_t fileSize; + + // TECH LZSS header. + // Used to (shocker) configure LZSS decompression. + // + // Duplicates a few things in the file. + LzssHeader lzssHeader; + }; + + static_assert(sizeof(PackageEofHeader) == 0xc, "PackageEofHeader has invalid size. Extractor 100% won't work, good job"); + static_assert(sizeof(PackageGroup) == 0x10, "PackageGroup has invalid size, extractor won't work"); + static_assert(sizeof(PackageFile) == 0x64, "PackageFile has invalid size, extractor won't work"); +} // namespace jmmt + +#endif // JMMT_PACKAGE_H diff --git a/src/libjmmt/CMakeLists.txt b/src/libjmmt/CMakeLists.txt new file mode 100644 index 0000000..bf6f6fb --- /dev/null +++ b/src/libjmmt/CMakeLists.txt @@ -0,0 +1,13 @@ + + +add_library(jmmt + crc.cpp + lzss.cpp + ) + +target_include_directories(jmmt PUBLIC ${PROJECT_SOURCE_DIR}/include) + +set_target_properties(jmmt PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + ) \ No newline at end of file diff --git a/src/libjmmt/crc.cpp b/src/libjmmt/crc.cpp new file mode 100644 index 0000000..b94a351 --- /dev/null +++ b/src/libjmmt/crc.cpp @@ -0,0 +1,75 @@ +#include + +namespace jmmt { + + // Standard Ethernet-II CRC32 polynominal table. + constinit static crc32_t Crc32Table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d + }; + + // TODO: + // - Use string_view, because C++20 (& it's cleaner) + + crc32_t HashString(const char* s) { + crc32_t crc = 0; + + while(*s) { + crc = Crc32Table[(crc ^ (*s++ & ~0x20)) & 0xff] ^ (crc >> 8); + } + return crc; + } + + // Hash a string which is case-sensitive. + crc32_t HashStringCase(const char* s) { + crc32_t crc = 0; + + while(*s) { + crc = Crc32Table[(crc ^ (*s++)) & 0xff] ^ (crc >> 8); + } + return crc; + } + + +} \ No newline at end of file diff --git a/src/libjmmt/lzss.cpp b/src/libjmmt/lzss.cpp new file mode 100644 index 0000000..e9a2e79 --- /dev/null +++ b/src/libjmmt/lzss.cpp @@ -0,0 +1,114 @@ +#include +#include +#include + +#define LZSS_DEFAULT_RINGSIZE 512 +#define LZSS_DEFAULT_MATCHSIZE 66 +#define LZSS_EOF -1 +#define LZSS_CHUNKSIZE 256 +#define LZSS_RINGBITS 9 +#define LZSS_THRESHOLD 2 +#define LZSS_STALLBIT (30) + +#define LZSS_GETBYTE(in,inb,out) \ + do { \ + if( (inb) <= (nInputBufferIndex) ) \ + (out) = -1; \ + else \ + { \ + /* std::printf("getting '%c'\n", *((in)+nInputBufferIndex)); */ \ + (out) = *((uint8_t*)((in)+nInputBufferIndex)); \ + nInputBufferIndex++; \ + } \ + }while(0) + +// this version logs what it's going to put and where. +//#define LZSS_PUTBYTE(outp,outb) std::printf("LZSS_PUTBYTE(%llu, %x)\n", outp - oldptr, outb); \ + *(outp)++ = (uint8_t)(outb) + +#define LZSS_PUTBYTE(outp,outb) *(outp)++ = (uint8_t)(outb) + +#define FileIO_ZeroMemory(dst, size) memset(dst, 0, size) + +namespace jmmt { + + int DecompressLzss(LzssHeader* header, std::uint8_t* compressedInput, std::int32_t compressedLength, std::uint8_t* destBuffer) { + int32_t nRingIndex, nInSize, nInputBufferIndex, nRingBits, nRingSize; + uint8_t aRingBuffer[LZSS_DEFAULT_RINGSIZE], *pRingBuffer; + uint32_t nBitFlags = 0; + + std::int32_t nInByte; + + //auto* oldptr = destBuffer; // uncomment for logging version of LZSS_PUTBYTE + + // TODO: this is where we might want to place header usage. You know, if we need to. + nBitFlags = 0; + nInputBufferIndex = 0; + nInSize = compressedLength; + nRingSize = LZSS_DEFAULT_RINGSIZE; + nRingBits = LZSS_RINGBITS; + nRingIndex = LZSS_DEFAULT_RINGSIZE - LZSS_DEFAULT_MATCHSIZE; + + // Use stack allocated default ring buffer + pRingBuffer = &aRingBuffer[0]; + + FileIO_ZeroMemory(pRingBuffer, nRingSize); + //memset(pRingBuffer, ' ', nRingSize); + + + for(;;) { // get next 8 opcodes? + if(((nBitFlags >>= 1) & 256) == 0) { + LZSS_GETBYTE(compressedInput, nInSize, nInByte); + if(nInByte == -1) + break; + + //std::printf("LZSS new opcodes\n"); + + // store 255 in upper word, when zero get next 8 opcodes + nBitFlags = nInByte | 0xff00; + } + + // single char + if(nBitFlags & 1) { + LZSS_GETBYTE(compressedInput, nInSize, nInByte); + if(nInByte == -1) + break; + + //std::printf("LZSS single char '%c'\n", nInByte); + + LZSS_PUTBYTE(destBuffer, nInByte); + pRingBuffer[nRingIndex++] = (uint8_t)nInByte; + nRingIndex &= (nRingSize - 1); + } + + // string + else { // get position & length pair (note: 1 bit of position is stored in length word) + int32_t i, j; + LZSS_GETBYTE(compressedInput, nInSize, i); + if(i == -1) + break; + LZSS_GETBYTE(compressedInput, nInSize, j); + + i |= ((j >> (16 - nRingBits)) << 8); + j = (j & (0x00FF >> (nRingBits - 8))) + LZSS_THRESHOLD; + + // std::printf("LZSS string pos %d len %d\n", i , j); + + //LZSS_VALIDATE(j <= LZSS_DEFAULT_MATCHSIZE, "Invalid match size for decompression"); + + for(int32_t k = 0; k <= j; ++k) { + nInByte = pRingBuffer[(i + k) & (nRingSize - 1)]; + + //std::printf("LZSS string byte '%c'\n", nInByte); + + LZSS_PUTBYTE(destBuffer, nInByte); + pRingBuffer[nRingIndex++] = (uint8_t)nInByte; + nRingIndex &= (nRingSize - 1); + } + } + } + + return 0; + } + +} // namespace jmmt diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt new file mode 100644 index 0000000..16b6b90 --- /dev/null +++ b/src/tools/CMakeLists.txt @@ -0,0 +1,17 @@ + +add_executable(jmmt_renamer jmmt_renamer.cpp) +target_link_libraries(jmmt_renamer PUBLIC jmmt) + +set_target_properties(jmmt_renamer PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + ) + + +add_executable(jmmt_pack_extractor jmmt_pack_extractor.cpp) +target_link_libraries(jmmt_pack_extractor PUBLIC jmmt) + +set_target_properties(jmmt_pack_extractor PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + ) \ No newline at end of file diff --git a/src/tools/jmmt_pack_extractor.cpp b/src/tools/jmmt_pack_extractor.cpp new file mode 100644 index 0000000..7f7bbee --- /dev/null +++ b/src/tools/jmmt_pack_extractor.cpp @@ -0,0 +1,272 @@ +// Package file extractor. +// WIP, and a little crashy. +// Yes, this code is messy, but I just wanted it to work after days of it not doing so. + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +// This is lame. But it works :) +template +T LameRead(std::istream& is) { + if(!is) + throw std::runtime_error("stream is bad"); + + T t {}; + is.read(reinterpret_cast(&t), sizeof(T)); + return t; +} + + +/** + * Reads package files. + */ +struct PackageReader { + + /** + * Decompressed and un-split package file. + */ + struct DecompressedFile { + std::string filename; + std::vector data; + }; + + explicit PackageReader(std::istream& is) + : is(is) { + } + + void Init() { + is.seekg(-0xc, std::istream::end); + + // Read the eof header + eofHeader = LameRead(is); + + // We ideally should be at the end of file after reading the eof header. + auto fileSize = is.tellg(); + + // Seek to the header start and read the pgrp. + is.seekg(static_cast(eofHeader.headerStartOffset), std::istream::beg); + group = LameRead(is); + + if(group.magic != jmmt::PackageGroup::TypeMagic) { + fileInvalid = true; + return; + } + + // Read the string table, and hash every string out into a map. + // This is used to build our crc->filename mapping for this archive. + { + is.seekg(static_cast(eofHeader.headerStartOffset) + static_cast(eofHeader.headerSize), std::istream::beg); + + auto ReadString = [&]() { + std::string s; + char c {}; + + while(true) { + c = is.get(); + + // std::printf("%c\n", c); + + if(c == '\0') + return s; + + s.push_back(c); + } + }; + + auto l = is.tellg(); + // seek ahead of the "header" of the debug info/string table, + // since we don't care about it (we read strings until we "stop". though + // it might be smart to trust it? idk.) + is.seekg(sizeof(uint32_t), std::istream::cur); + + while(l != fileSize - static_cast(sizeof(eofHeader))) { + auto string = ReadString(); + crcToFilename[jmmt::HashString(string.c_str())] = string; + l = is.tellg(); + + // print out the creation of the crc/filename map for debugging + // std::printf("%s -> 0x%x\n", string.c_str(), jmmt::HashString(string.c_str())); + } + } + + // Go to the start of the first pfil (skipping the pgrp we just read) + // after we setup our map. + + is.seekg(static_cast(eofHeader.headerStartOffset) + sizeof(jmmt::PackageGroup), std::istream::beg); + } + + /** + * + * \return false if file isn't invalid, true otherwise. + */ + [[nodiscard]] bool Invalid() const { + return fileInvalid; + } + + // Read a file chunk. + void ReadFileChunk() { + if(fileInvalid) + return; + + currChunk = LameRead(is); + if(currChunk.magic != jmmt::PackageFile::TypeMagic) { + fileInvalid = true; + return; + } + + // Setup some variables + + // TODO: Implement CRC-based fallback, if required. + // It PROBABLY isn't. + + currFileName = crcToFilename[currChunk.filenameCrc]; + chunksLeft = currChunk.chunkAmount - 1; + + // If we finished a file, the work buffer is empty. + if(fileWorkBuffer.empty()) + fileWorkBuffer.resize(currChunk.fileSize); + + std::vector compressedBuffer(currChunk.compressedChunkSize); + + auto old = is.tellg(); + + is.seekg(currChunk.dataOffset, std::istream::beg); + + // Read and decompress where we need to, taking the block offset into account. + is.read(reinterpret_cast(compressedBuffer.data()), currChunk.compressedChunkSize); + jmmt::DecompressLzss(nullptr, compressedBuffer.data(), currChunk.compressedChunkSize, fileWorkBuffer.data() + currChunk.blockOffset); + + // Seek back to the old place the stream was before reading and decompress + is.seekg(old, std::istream::beg); + } + + /** + * Read a file from this package. + * \param[in] cb Called when file is read + */ + template + void ReadFile(DoneCallback&& cb) { + if(fileInvalid) + return; + + // Read first file chunk. + // It's perfectly legal for this to be all we need to do. + ReadFileChunk(); + + // Read additional chunks, if required. + for(auto i = 0; i < chunksLeft; ++i) { + // std::printf("reading additional chunk %d/%d\n", i, chunksLeft); + ReadFileChunk(); + } + + //std::cout << "Read file \"" << currFileName << "\"\n"; + + cb(DecompressedFile { .filename = currFileName, + .data = fileWorkBuffer }); + + // write file data to stdout (debugging!) + // std::cout.write(reinterpret_cast(fileWorkBuffer.data()), fileWorkBuffer.size()); + + fileWorkBuffer.clear(); + } + + /** + * Read all possible files from this package. + * \param[in] cb Called when file is read + */ + template + void ReadFiles(DoneCallback&& cb) { + if(fileInvalid) + return; + + for(auto i = 0; i < group.fileCount; ++i) + ReadFile(cb); + } + + jmmt::PackageGroup& GetGroup() { + return group; + } + + private: + std::istream& is; + + // Set to true on any invalid file data. + bool fileInvalid = false; + + jmmt::PackageEofHeader eofHeader {}; + + jmmt::PackageGroup group {}; + + /** + * CRC->sensible filename map. + */ + std::map crcToFilename; + + // file stuff + uint32_t chunksLeft {}; + + // The name of the file we are processing. + std::string currFileName; + + // The current chunk the reader is reading. + jmmt::PackageFile currChunk {}; + + // File-sized work buffer used to store the file + // we're currently working on. Freed when a file is + // finished being extracted. + std::vector fileWorkBuffer; +}; + +int main(int argc, char** argv) { + // std::ifstream ifs("config.pak", std::ifstream::binary); + + if(argc != 2) { + std::cout << "Usage: " << argv[0] << " [path 2 JMMT PAK file]"; + return 1; + } + + std::ifstream ifs(argv[1], std::ifstream::binary); + + if(!ifs) { + std::cout << "Invalid file \"" << argv[1] << "\"\n"; + return 1; + } + + PackageReader reader(ifs); + + reader.Init(); + + if(reader.Invalid()) { + std::cout << "File \"" << argv[1] << "\" doesn't seem to be a PAK file.\n"; + return 1; + } + + auto path = fs::path(argv[1]).stem(); + + reader.ReadFiles([&](const auto& file) { + auto outpath = path / file.filename; + fs::create_directories(outpath.parent_path()); + + std::ofstream ofs(outpath.string(), std::ofstream::binary); + if(!ofs) { + std::cout << "Could not open \"" << outpath.string() << "\".\n"; + std::exit(1); + } + + ofs.write(reinterpret_cast(file.data.data()), static_cast(file.data.size())); + ofs.close(); + + std::cout << "Wrote \"" << outpath.string() << "\".\n"; + }); + + return 0; +} diff --git a/src/tools/jmmt_renamer.cpp b/src/tools/jmmt_renamer.cpp new file mode 100644 index 0000000..cd6e5bf --- /dev/null +++ b/src/tools/jmmt_renamer.cpp @@ -0,0 +1,110 @@ +// JMMT .DAT file renamer utility thingy +// +// Renames the .DAT files in /DATA on the disc to +// the original filenames, for easier identification, +// less pain, and.. well just because a bunch of DAT +// files is really stupid to go through every time. +// +// (C) 2022 modeco80. +// +// Usage: +// +// - Compile the tool (or scream at me for a binary) +// - Run the tool in the DATA directory of the files +// - ... +// - Profit? + +#include +#include +#include +#include +#include +#include +namespace fs = std::filesystem; + +#include + +// These are the original filenames that the game tries to load, +// extracted from the game binary. +// +// We could brute-force these, but since the game has them in executable, +// it's a whole lot faster to just try every game filename and see +// what sticks (& rename it if it does). +constinit static std::array OriginalFilenames = { + // First loaded by the game + "package.toc", + + // General packs + "config.pak", + + // This file is referenced in the game files, + // but doesn't seem to exist anymore in the final build. + //"shell.pak", + + "shell_character_select.pak", + "shell_main.pak", + "shell_title.pak", + "shell_venue.pak", + "shell_event.pak", + "shell_option.pak", + "win_screens.pak", + + // Game levels + "SF_san_fran.pak", + "DC_washington.pak", + "MK_MT_KILI.pak", + "MP_MACHU_PIHU.pak", + "LV_Las_Vegas.pak", + "AN_ANTARTICA.pak", + "NP_Nepal.pak", + "TH_TAHOE.pak", + "VA_Valdez_alaska.pak", + "RV_Rome.pak", + "TR_training.pak" +}; + +std::string MakeDatFilename(const char* filename) { + char datFile[13] {}; + + // .DAT and .MET filenames are formatted like "[hex char * 8].DAT" + // The name component is the CRC32 of the original filename. + // + // The DAT/MET filename can be a max of 13 characters long. + int res = std::snprintf(&datFile[0], 13, "%X.DAT", jmmt::HashString(filename)); + + // FIXME: probably throw exception + if(res == -1) + return ""; + + return { &datFile[0], static_cast(res) }; +} + +int main() { + int renamedFiles = 0; + + for(auto filename : OriginalFilenames) { + auto datFile = MakeDatFilename(filename.data()); + + if(fs::exists(datFile)) { + // Try to rename the .DAT file to the game filename. + try { + fs::rename(datFile, filename); + } catch(std::exception& ex) { + // If there's an error renaming, we already catch + // if the source .DAT file (that's supposed to exist) + // doesn't exist, so print the exception and exit. + std::printf("Got exception: %s\n", ex.what()); + return 1; + } + + std::printf("\"%s\" -> \"%s\"\n", datFile.c_str(), filename.data()); + renamedFiles++; + } else { + // FIXME: should probably stop here? + std::printf("???? Generated hash filename \"%s\" (for \"%s\") which does not exist on disk\n", datFile.c_str(), filename.data()); + } + } + + std::printf("Renamed %d files successfully.\n", renamedFiles); + return 0; +}