Try to optimize things a bit

PakReader won't read everything in ReadData(), it now only reads the header and TOC.

Files now need to be read in separately.

Adds progress bar to europa_pack_extractor.

Moved code around.
This commit is contained in:
Lily Tsuru 2022-09-07 04:07:40 -05:00
parent 5d03f49e21
commit fae513fdfe
20 changed files with 236 additions and 193 deletions

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
[submodule "third_party/libpixel"] [submodule "third_party/libpixel"]
path = third_party/libpixel path = third_party/libpixel
url = https://github.com/modeco80/libpixel.git url = https://github.com/modeco80/libpixel.git
[submodule "third_party/indicators"]
path = third_party/indicators
url = https://github.com/p-ranav/indicators

View File

@ -10,6 +10,7 @@ include(cmake/Policies.cmake)
project(EuropaTools) project(EuropaTools)
add_subdirectory(third_party/libpixel) add_subdirectory(third_party/libpixel)
add_subdirectory(third_party/indicators)
add_subdirectory(src/libeuropa) add_subdirectory(src/libeuropa)
add_subdirectory(src/tools) add_subdirectory(src/tools)

View File

@ -10,9 +10,9 @@ Provides archive IO utilities and the structures.
## The Tools ## The Tools
### `europa_pak_extractor` ### `eupak`
Extractor for .pak and .pmdl (they use 1:1 the same format) files. Swiss army knife for Europa packfiles.
TODO: this will be one `eupak` utility later, probably. Can create, extract, and show info on them.

View File

@ -7,6 +7,8 @@
// //
#pragma endian little #pragma endian little
// Big archives need a big pattern limit
#pragma max_patterns 0x40000
namespace europa { namespace europa {
@ -56,7 +58,7 @@ namespace europa {
struct PakFile { struct PakFile {
PakHeader header; PakHeader header;
//PakTocEntry toc[header.fileCount] @ header.tocOffset; PakTocEntry toc[header.fileCount] @ header.tocOffset;
}; };
} // namespace europa } // namespace europa

View File

@ -9,47 +9,43 @@
#ifndef EUROPA_IO_PAKFILE_H #ifndef EUROPA_IO_PAKFILE_H
#define EUROPA_IO_PAKFILE_H #define EUROPA_IO_PAKFILE_H
#include <vector>
#include <cstdint>
#include <europa/structs/Pak.h> #include <europa/structs/Pak.h>
#include <cstdint>
#include <vector>
namespace europa::io { namespace europa::io {
struct PakReader; struct PakReader;
struct PakWriter; struct PakWriter;
struct PakFile { struct PakFile {
using DataType = std::vector<std::uint8_t>;
using DataType = std::vector<std::uint8_t>; /**
* Get the file data.
*/
[[nodiscard]] const DataType& GetData() const;
/** /**
* Get the file data. * Get the TOC entry responsible.
*/ */
[[nodiscard]] const DataType& GetData() const; [[nodiscard]] const structs::PakTocEntry& GetTOCEntry() const;
/** void SetData(DataType&& data);
* Get the TOC entry responsible.
*/
[[nodiscard]] const structs::PakTocEntry& GetTOCEntry() const;
void SetData(DataType&& data); structs::PakTocEntry& GetTOCEntry();
structs::PakTocEntry& GetTOCEntry(); void FillTOCEntry();
void FillTOCEntry(); private:
friend PakReader;
friend PakWriter;
private: std::vector<std::uint8_t> data;
friend PakReader; structs::PakTocEntry tocData;
friend PakWriter; };
} // namespace europa::io
std::vector<std::uint8_t> data; #endif // EUROPA_IO_PAKFILE_H
structs::PakTocEntry tocData;
};
}
#endif //EUROPA_IO_PAKFILE_H

View File

@ -10,7 +10,6 @@
#define EUROPA_IO_PAKREADER_H #define EUROPA_IO_PAKREADER_H
#include <europa/io/PakFile.h> #include <europa/io/PakFile.h>
#include <europa/structs/Pak.h> #include <europa/structs/Pak.h>
#include <iosfwd> #include <iosfwd>
@ -20,16 +19,25 @@
namespace europa::io { namespace europa::io {
struct PakReader { struct PakReader {
using MapType = std::unordered_map<std::string, PakFile>;
explicit PakReader(std::istream& is); explicit PakReader(std::istream& is);
void ReadData(); void ReadData();
void ReadFiles();
/**
* Read in a specific file.
*/
void ReadFile(const std::string& file);
bool Invalid() const { bool Invalid() const {
return invalid; return invalid;
} }
const std::unordered_map<std::string, PakFile>& GetFiles() const; MapType& GetFiles();
const MapType& GetFiles() const;
private: private:
std::istream& stream; std::istream& stream;
@ -37,7 +45,7 @@ namespace europa::io {
structs::PakHeader header {}; structs::PakHeader header {};
std::unordered_map<std::string, PakFile> files; MapType files;
}; };
} // namespace europa::io } // namespace europa::io

View File

@ -9,12 +9,9 @@
#ifndef EUROPA_IO_PAKWRITER_H #ifndef EUROPA_IO_PAKWRITER_H
#define EUROPA_IO_PAKWRITER_H #define EUROPA_IO_PAKWRITER_H
#include <europa/io/PakFile.h> #include <europa/io/PakFile.h>
#include <iosfwd> #include <iosfwd>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -24,24 +21,20 @@ namespace europa::io {
* Writer for package files. * Writer for package files.
*/ */
struct PakWriter { struct PakWriter {
void Init(structs::PakVersion version);
void Init(); std::unordered_map<std::string, PakFile>& GetFiles();
void AddFile(const std::string& path, const PakFile& data);
void RemoveFile(const std::string& path);
/** /**
* Write the resulting archive to the given output stream. * Write the resulting archive to the given output stream.
*/ */
void Write(std::ostream& os); void Write(std::ostream& os);
private: private:
structs::PakHeader pakHeader{}; structs::PakHeader pakHeader {};
std::unordered_map<std::string, PakFile> archiveFiles; std::unordered_map<std::string, PakFile> archiveFiles;
}; };
} // namespace europa::io
}
#endif // EUROPA_IO_PAKWRITER_H #endif // EUROPA_IO_PAKWRITER_H

View File

@ -53,20 +53,47 @@ namespace europa::structs {
return sizeof(magic) + static_cast<std::size_t>(headerSize); return sizeof(magic) + static_cast<std::size_t>(headerSize);
} }
void Init(PakVersion ver) { /**
// clear any junk * Initialize this header (used when writing).
memset(this, 0, sizeof(PakHeader)); */
void Init(PakVersion ver) noexcept {
// clear any junk
memset(this, 0, sizeof(PakHeader));
// Copy important things. // Copy important things.
std::memcpy(&magic[0], &VALID_MAGIC[0], sizeof(VALID_MAGIC)); std::memcpy(&magic[0], &VALID_MAGIC[0], sizeof(VALID_MAGIC));
headerSize = sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1);
// Set archive version // Set proper header size.
version = ver; headerSize = sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1);
}
// Set archive version
version = ver;
}
[[nodiscard]] bool Valid() const noexcept {
// Magic must match.
if(std::strcmp(magic, VALID_MAGIC) != 0)
return false;
using enum PakVersion;
// Version must match ones we support,
// otherwise it's invalid.
switch(version) {
case Starfighter:
case Ver2:
break;
default:
return false;
}
// Header is okay.
return true;
}
}; };
// A Toc entry (without string. Needs to be read in seperately) // A Toc entry (without string. Needs to be read in separately)
struct [[gnu::packed]] PakTocEntry { struct [[gnu::packed]] PakTocEntry {
u32 offset; u32 offset;
u32 size; u32 size;
@ -76,8 +103,8 @@ namespace europa::structs {
u32 unk3; u32 unk3;
}; };
static_assert(sizeof(PakHeader) == 0x29, "PakHeader wrong size!!"); static_assert(sizeof(PakHeader) == 0x29, "PakHeader wrong size!!");
static_assert(sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1) == 0x1a, "PakHeader::headerSize will be invalid"); static_assert(sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1) == 0x1a, "PakHeader::headerSize will be invalid when writing archives.");
static_assert(sizeof(PakTocEntry) == 0xc, "PakTocEntry wrong size!"); static_assert(sizeof(PakTocEntry) == 0xc, "PakTocEntry wrong size!");
} // namespace europa::structs } // namespace europa::structs

View File

@ -16,12 +16,12 @@ namespace europa::util {
/** /**
* A compile-time string. Usable as a C++20 cNTTP. * A compile-time string. Usable as a C++20 cNTTP.
*/ */
template<std::size_t N> template <std::size_t N>
struct FixedString { struct FixedString {
char buf[N + 1]{}; char buf[N + 1] {};
constexpr FixedString(const char* s) { // NOLINT constexpr FixedString(const char* s) { // NOLINT
for (unsigned i = 0; i != N; ++i) for(unsigned i = 0; i != N; ++i)
buf[i] = s[i]; buf[i] = s[i];
} }
@ -34,9 +34,9 @@ namespace europa::util {
} }
}; };
template<std::size_t N> template <std::size_t N>
FixedString(char const (&)[N]) -> FixedString<N - 1>; FixedString(char const (&)[N]) -> FixedString<N - 1>;
} } // namespace europa::util
#endif // EUROPA_UTIL_FIXEDSTRING_H #endif // EUROPA_UTIL_FIXEDSTRING_H

View File

@ -9,17 +9,17 @@
#ifndef EUROPA_FOURCC_H #ifndef EUROPA_FOURCC_H
#define EUROPA_FOURCC_H #define EUROPA_FOURCC_H
#include <bit>
#include <europa/util/FixedString.h> #include <europa/util/FixedString.h>
#include <bit>
namespace europa::util { namespace europa::util {
/** /**
* A multi-endian, compile-time FourCC generator. * A multi-endian, compile-time FourCC generator.
* You love to see it. * You love to see it.
*/ */
template<FixedString fccString, std::endian Endian = std::endian::little> template <FixedString fccString, std::endian Endian = std::endian::little>
consteval std::uint32_t FourCC() { consteval std::uint32_t FourCC() {
static_assert(fccString.Length() == 4, "Provided string is not a FourCC"); static_assert(fccString.Length() == 4, "Provided string is not a FourCC");
@ -36,6 +36,6 @@ namespace europa::util {
return 0xffffffff; return 0xffffffff;
} }
} } // namespace europa::util
#endif // EUROPA_FOURCC_H #endif // EUROPA_FOURCC_H

View File

@ -23,4 +23,4 @@ set_target_properties(libeuropa PROPERTIES
# Projects which libeuropa depends on # Projects which libeuropa depends on
target_link_libraries(libeuropa PUBLIC target_link_libraries(libeuropa PUBLIC
pixel::libpixel pixel::libpixel
) )

View File

@ -10,25 +10,24 @@
namespace europa::io { namespace europa::io {
const PakFile::DataType& PakFile::GetData() const { const PakFile::DataType& PakFile::GetData() const {
return data; return data;
} }
const structs::PakTocEntry& PakFile::GetTOCEntry() const { const structs::PakTocEntry& PakFile::GetTOCEntry() const {
return tocData; return tocData;
} }
structs::PakTocEntry& PakFile::GetTOCEntry() {
return tocData;
}
structs::PakTocEntry& PakFile::GetTOCEntry() { void PakFile::SetData(PakFile::DataType&& newData) {
return tocData; data = std::move(newData);
} }
void PakFile::SetData(PakFile::DataType&& newData) { void PakFile::FillTOCEntry() {
data = std::move(newData); tocData.size = static_cast<std::uint32_t>(data.size());
} }
void PakFile::FillTOCEntry() { } // namespace europa::io
tocData.size = static_cast<std::uint32_t>(data.size());
}
}

View File

@ -20,57 +20,51 @@ namespace europa::io {
} }
void PakReader::ReadData() { void PakReader::ReadData() {
auto ReadHeader = [&]() { header = impl::ReadStreamType<structs::PakHeader>(stream);
header = impl::ReadStreamType<structs::PakHeader>(stream);
};
auto ReadTocEntry = [&]() { if(!header.Valid()) {
invalid = true;
return;
}
// Read the archive TOC
stream.seekg(header.tocOffset, std::istream::beg);
for(auto i = 0; i < header.fileCount; ++i) {
// The first part of the TOC entry is a VLE string, // The first part of the TOC entry is a VLE string,
// which we don't store inside the type (because we can't) // which we don't store inside the type (because we can't)
// //
// Read this in first. // Read this in first.
auto filename = impl::ReadPString(stream); auto filename = impl::ReadPString(stream);
files[filename].GetTOCEntry() = impl::ReadStreamType<structs::PakTocEntry>(stream); files[filename].GetTOCEntry() = impl::ReadStreamType<structs::PakTocEntry>(stream);
};
ReadHeader();
// Validate the archive header
if(std::strcmp(header.magic, structs::PakHeader::VALID_MAGIC) != 0) {
invalid = true;
return;
}
switch(header.version) {
case structs::PakVersion::Starfighter:
case structs::PakVersion::Ver2:
break;
default:
invalid = true;
return;
}
stream.seekg(header.tocOffset, std::istream::beg);
// Read the archive TOC
for(auto i = 0; i < header.fileCount; ++i)
ReadTocEntry();
// Read all file data in
for(auto& [filename, file] : files) {
auto& toc = file.GetTOCEntry();
file.data.resize(toc.size);
stream.seekg(toc.offset, std::istream::beg);
stream.read(reinterpret_cast<char*>(&file.data[0]), toc.size);
} }
} }
const std::unordered_map<std::string, PakFile>& PakReader::GetFiles() const { void PakReader::ReadFiles() {
for(auto& [filename, file] : files)
ReadFile(filename);
}
void PakReader::ReadFile(const std::string& file) {
auto& fileObject = files[file];
// This file was already read in, or has data
// the user may not want to overwrite.
if(!fileObject.data.empty())
return;
auto& toc = fileObject.GetTOCEntry();
fileObject.data.resize(toc.size);
stream.seekg(toc.offset, std::istream::beg);
stream.read(reinterpret_cast<char*>(&fileObject.data[0]), toc.size);
}
PakReader::MapType& PakReader::GetFiles() {
return files; return files;
} }
const PakReader::MapType& PakReader::GetFiles() const {
return files;
}
} // namespace europa::io } // namespace europa::io

View File

@ -7,56 +7,58 @@
// //
#include <europa/io/PakWriter.h> #include <europa/io/PakWriter.h>
#include <iostream> #include <iostream>
#include "StreamUtils.h" #include "StreamUtils.h"
namespace europa::io { namespace europa::io {
void PakWriter::Init(structs::PakVersion version) {
// for now.
pakHeader.Init(version);
}
void PakWriter::Init() { std::unordered_map<std::string, PakFile>& PakWriter::GetFiles() {
// for now. return archiveFiles;
pakHeader.Init(structs::PakVersion::Starfighter); }
}
void PakWriter::AddFile(const std::string &path, const PakFile& data) { void PakWriter::Write(std::ostream& os) {
archiveFiles[path] = data; // Set up the header a bit more...
} pakHeader.fileCount = archiveFiles.size();
void PakWriter::RemoveFile(const std::string &path) { // Leave space for the header
archiveFiles.erase(path); os.seekp(sizeof(structs::PakHeader), std::ostream::beg);
}
void PakWriter::Write(std::ostream &os) { // Seek forwards for version 2 PAKs, as the only
// Set up the header a bit more... // difference seems to be
pakHeader.fileCount = archiveFiles.size(); if(pakHeader.version == structs::PakVersion::Ver2) {
os.seekp(6, std::ostream::cur);
}
// Leave space for the header // Write file data
os.seekp(sizeof(structs::PakHeader), std::ostream::beg); for(auto& [filename, file] : archiveFiles) {
file.GetTOCEntry().offset = os.tellp();
os.write(reinterpret_cast<const char*>(file.GetData().data()), file.GetData().size());
}
// Write file data pakHeader.tocOffset = os.tellp();
for (auto &[filename, file]: archiveFiles) {
file.GetTOCEntry().offset = os.tellp();
os.write(reinterpret_cast<const char *>(file.GetData().data()), file.GetData().size());
}
pakHeader.tocOffset = os.tellp(); // Write the TOC
for(auto& [filename, file] : archiveFiles) {
file.FillTOCEntry();
// Write the TOC // Write the pstring
for (auto &[filename, file]: archiveFiles) { os.put(static_cast<char>(filename.length() + 1));
file.FillTOCEntry(); for(const auto c : filename)
os.put(c);
os.put('\0');
// Write the pstring impl::WriteStreamType(os, file.GetTOCEntry());
os.put(static_cast<char>(filename.length() + 1)); }
for (const auto c: filename)
os.put(c);
os.put('\0');
impl::WriteStreamType(os, file.GetTOCEntry()); os.seekp(0, std::ostream::beg);
} impl::WriteStreamType(os, pakHeader);
}
os.seekp(0, std::ostream::beg); } // namespace europa::io
impl::WriteStreamType(os, pakHeader);
}
}

View File

@ -19,7 +19,6 @@ namespace europa::io::impl {
is.read(&buffer[0], size); is.read(&buffer[0], size);
} }
void WriteStreamTypeImpl(std::ostream& os, const char* buffer, std::size_t buffer_size) { void WriteStreamTypeImpl(std::ostream& os, const char* buffer, std::size_t buffer_size) {
os.write(&buffer[0], buffer_size); os.write(&buffer[0], buffer_size);
} }

View File

@ -17,7 +17,7 @@ namespace europa::io::impl {
namespace detail { namespace detail {
void ReadStreamTypeImpl(std::istream& is, char* buffer, std::size_t size); void ReadStreamTypeImpl(std::istream& is, char* buffer, std::size_t size);
void WriteStreamTypeImpl(std::ostream& os, const char* buffer, std::size_t buffer_size); void WriteStreamTypeImpl(std::ostream& os, const char* buffer, std::size_t buffer_size);
} } // namespace detail
// This is lame. But it works :) // This is lame. But it works :)
template <class T> template <class T>

View File

@ -7,10 +7,11 @@
// //
#include <europa/io/YatfReader.h> #include <europa/io/YatfReader.h>
#include "StreamUtils.h"
#include <vector> #include <vector>
#include "StreamUtils.h"
namespace europa::io { namespace europa::io {
YatfReader::YatfReader(std::istream& is) YatfReader::YatfReader(std::istream& is)
@ -56,6 +57,4 @@ namespace europa::io {
return header; return header;
} }
} // namespace europa::io
}

View File

@ -1,6 +1,8 @@
add_executable(europa_pack_extractor europa_pack_extractor.cpp) add_executable(europa_pack_extractor europa_pack_extractor.cpp)
target_link_libraries(europa_pack_extractor PUBLIC libeuropa) target_link_libraries(europa_pack_extractor PUBLIC libeuropa
indicators::indicators
)
set_target_properties(europa_pack_extractor PROPERTIES set_target_properties(europa_pack_extractor PROPERTIES
CXX_STANDARD 20 CXX_STANDARD 20

View File

@ -10,6 +10,8 @@
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <indicators/cursor_control.hpp>
#include <indicators/progress_bar.hpp>
#include <iostream> #include <iostream>
namespace fs = std::filesystem; namespace fs = std::filesystem;
@ -38,6 +40,19 @@ int main(int argc, char** argv) {
return 1; return 1;
} }
indicators::ProgressBar progress {
indicators::option::BarWidth { 50 },
indicators::option::ForegroundColor { indicators::Color::green },
indicators::option::MaxProgress { reader.GetFiles().size() },
indicators::option::ShowPercentage { true },
indicators::option::ShowElapsedTime { true },
indicators::option::ShowRemainingTime { true },
indicators::option::PrefixText { "Extracting archive " }
};
indicators::show_console_cursor(false);
for(auto& [filename, file] : reader.GetFiles()) { for(auto& [filename, file] : reader.GetFiles()) {
auto nameCopy = filename; auto nameCopy = filename;
@ -50,11 +65,15 @@ int main(int argc, char** argv) {
} }
#endif #endif
progress.set_option(indicators::option::PostfixText { filename });
auto outpath = (baseDirectory / nameCopy); auto outpath = (baseDirectory / nameCopy);
if(!fs::exists(outpath.parent_path())) if(!fs::exists(outpath.parent_path()))
fs::create_directories(outpath.parent_path()); fs::create_directories(outpath.parent_path());
reader.ReadFile(filename);
std::ofstream ofs(outpath.string(), std::ofstream::binary); std::ofstream ofs(outpath.string(), std::ofstream::binary);
if(!ofs) { if(!ofs) {
@ -63,10 +82,9 @@ int main(int argc, char** argv) {
} }
ofs.write(reinterpret_cast<const char*>(file.GetData().data()), static_cast<std::streampos>(file.GetTOCEntry().size)); ofs.write(reinterpret_cast<const char*>(file.GetData().data()), static_cast<std::streampos>(file.GetTOCEntry().size));
ofs.close(); progress.tick();
std::cout << "Wrote \"" << outpath.string() << "\" to disk.\n";
} }
indicators::show_console_cursor(true);
return 0; return 0;
} }

View File

@ -11,32 +11,32 @@
#include <europa/io/PakReader.h> #include <europa/io/PakReader.h>
#include <europa/io/PakWriter.h> #include <europa/io/PakWriter.h>
#include <iostream>
#include <fstream> #include <fstream>
#include <iostream>
int main(int argc, char** argv) { int main(int argc, char** argv) {
std::ifstream ifs(argv[1], std::ifstream::binary); std::ifstream ifs(argv[1], std::ifstream::binary);
std::ofstream ofs("new_archive.pak", std::ofstream::binary); std::ofstream ofs("new_archive.pak", std::ofstream::binary);
europa::io::PakWriter writer; europa::io::PakWriter writer;
writer.Init(); writer.Init(europa::structs::PakVersion::Ver2);
// Read pak data and vomit it into the writer. // Read pak data and vomit it into the writer.
// This will temporarily consume 2x the memory (so about 240mb for the biggest paks I've seen), // This will temporarily consume 2x the memory (so about 240mb for the biggest paks I've seen),
// but the writer will contain the first copy, // but the writer will contain the first copy,
// until it's cleared. // until it's cleared.
{ {
europa::io::PakReader reader(ifs); europa::io::PakReader reader(ifs);
reader.ReadData(); reader.ReadData();
for (auto &[filename, file]: reader.GetFiles()) { for(auto& [filename, file] : reader.GetFiles()) {
writer.AddFile(filename, file); writer.AddFile(filename, file);
} }
} }
writer.Write(ofs); writer.Write(ofs);
std::cout << "Wrote regurgitated archive to new.pak!\n"; std::cout << "Wrote regurgitated archive to new.pak!\n";
return 0; return 0;
} }