Try to optimize things a bit

PakReader won't read everything in ReadData(), it now only reads the header and TOC.

Files now need to be read in separately.

Adds progress bar to europa_pack_extractor.

Moved code around.
This commit is contained in:
Lily Tsuru 2022-09-07 04:07:40 -05:00
parent 5d03f49e21
commit fae513fdfe
20 changed files with 236 additions and 193 deletions

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
[submodule "third_party/libpixel"]
path = third_party/libpixel
url = https://github.com/modeco80/libpixel.git
[submodule "third_party/indicators"]
path = third_party/indicators
url = https://github.com/p-ranav/indicators

View File

@ -10,6 +10,7 @@ include(cmake/Policies.cmake)
project(EuropaTools)
add_subdirectory(third_party/libpixel)
add_subdirectory(third_party/indicators)
add_subdirectory(src/libeuropa)
add_subdirectory(src/tools)

View File

@ -10,9 +10,9 @@ Provides archive IO utilities and the structures.
## The Tools
### `europa_pak_extractor`
### `eupak`
Extractor for .pak and .pmdl (they use 1:1 the same format) files.
Swiss army knife for Europa packfiles.
TODO: this will be one `eupak` utility later, probably.
Can create, extract, and show info on them.

View File

@ -7,6 +7,8 @@
//
#pragma endian little
// Big archives need a big pattern limit
#pragma max_patterns 0x40000
namespace europa {
@ -56,7 +58,7 @@ namespace europa {
struct PakFile {
PakHeader header;
//PakTocEntry toc[header.fileCount] @ header.tocOffset;
PakTocEntry toc[header.fileCount] @ header.tocOffset;
};
} // namespace europa

View File

@ -9,47 +9,43 @@
#ifndef EUROPA_IO_PAKFILE_H
#define EUROPA_IO_PAKFILE_H
#include <vector>
#include <cstdint>
#include <europa/structs/Pak.h>
#include <cstdint>
#include <vector>
namespace europa::io {
struct PakReader;
struct PakWriter;
struct PakReader;
struct PakWriter;
struct PakFile {
struct PakFile {
using DataType = std::vector<std::uint8_t>;
using DataType = std::vector<std::uint8_t>;
/**
* Get the file data.
*/
[[nodiscard]] const DataType& GetData() const;
/**
* Get the file data.
*/
[[nodiscard]] const DataType& GetData() const;
/**
* Get the TOC entry responsible.
*/
[[nodiscard]] const structs::PakTocEntry& GetTOCEntry() const;
/**
* Get the TOC entry responsible.
*/
[[nodiscard]] const structs::PakTocEntry& GetTOCEntry() const;
void SetData(DataType&& data);
void SetData(DataType&& data);
structs::PakTocEntry& GetTOCEntry();
structs::PakTocEntry& GetTOCEntry();
void FillTOCEntry();
void FillTOCEntry();
private:
friend PakReader;
friend PakWriter;
private:
friend PakReader;
friend PakWriter;
std::vector<std::uint8_t> data;
structs::PakTocEntry tocData;
};
} // namespace europa::io
std::vector<std::uint8_t> data;
structs::PakTocEntry tocData;
};
}
#endif //EUROPA_IO_PAKFILE_H
#endif // EUROPA_IO_PAKFILE_H

View File

@ -10,7 +10,6 @@
#define EUROPA_IO_PAKREADER_H
#include <europa/io/PakFile.h>
#include <europa/structs/Pak.h>
#include <iosfwd>
@ -20,16 +19,25 @@
namespace europa::io {
struct PakReader {
using MapType = std::unordered_map<std::string, PakFile>;
explicit PakReader(std::istream& is);
void ReadData();
void ReadFiles();
/**
* Read in a specific file.
*/
void ReadFile(const std::string& file);
bool Invalid() const {
return invalid;
}
const std::unordered_map<std::string, PakFile>& GetFiles() const;
MapType& GetFiles();
const MapType& GetFiles() const;
private:
std::istream& stream;
@ -37,7 +45,7 @@ namespace europa::io {
structs::PakHeader header {};
std::unordered_map<std::string, PakFile> files;
MapType files;
};
} // namespace europa::io

View File

@ -9,12 +9,9 @@
#ifndef EUROPA_IO_PAKWRITER_H
#define EUROPA_IO_PAKWRITER_H
#include <europa/io/PakFile.h>
#include <iosfwd>
#include <string>
#include <unordered_map>
@ -24,24 +21,20 @@ namespace europa::io {
* Writer for package files.
*/
struct PakWriter {
void Init(structs::PakVersion version);
void Init();
void AddFile(const std::string& path, const PakFile& data);
void RemoveFile(const std::string& path);
std::unordered_map<std::string, PakFile>& GetFiles();
/**
* Write the resulting archive to the given output stream.
*/
void Write(std::ostream& os);
private:
structs::PakHeader pakHeader{};
std::unordered_map<std::string, PakFile> archiveFiles;
private:
structs::PakHeader pakHeader {};
std::unordered_map<std::string, PakFile> archiveFiles;
};
}
} // namespace europa::io
#endif // EUROPA_IO_PAKWRITER_H

View File

@ -53,20 +53,47 @@ namespace europa::structs {
return sizeof(magic) + static_cast<std::size_t>(headerSize);
}
void Init(PakVersion ver) {
// clear any junk
memset(this, 0, sizeof(PakHeader));
/**
* Initialize this header (used when writing).
*/
void Init(PakVersion ver) noexcept {
// clear any junk
memset(this, 0, sizeof(PakHeader));
// Copy important things.
std::memcpy(&magic[0], &VALID_MAGIC[0], sizeof(VALID_MAGIC));
headerSize = sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1);
// Copy important things.
std::memcpy(&magic[0], &VALID_MAGIC[0], sizeof(VALID_MAGIC));
// Set archive version
version = ver;
}
// Set proper header size.
headerSize = sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1);
// Set archive version
version = ver;
}
[[nodiscard]] bool Valid() const noexcept {
// Magic must match.
if(std::strcmp(magic, VALID_MAGIC) != 0)
return false;
using enum PakVersion;
// Version must match ones we support,
// otherwise it's invalid.
switch(version) {
case Starfighter:
case Ver2:
break;
default:
return false;
}
// Header is okay.
return true;
}
};
// A Toc entry (without string. Needs to be read in seperately)
// A Toc entry (without string. Needs to be read in separately)
struct [[gnu::packed]] PakTocEntry {
u32 offset;
u32 size;
@ -76,8 +103,8 @@ namespace europa::structs {
u32 unk3;
};
static_assert(sizeof(PakHeader) == 0x29, "PakHeader wrong size!!");
static_assert(sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1) == 0x1a, "PakHeader::headerSize will be invalid");
static_assert(sizeof(PakHeader) == 0x29, "PakHeader wrong size!!");
static_assert(sizeof(PakHeader) - (sizeof(PakHeader::VALID_MAGIC) - 1) == 0x1a, "PakHeader::headerSize will be invalid when writing archives.");
static_assert(sizeof(PakTocEntry) == 0xc, "PakTocEntry wrong size!");
} // namespace europa::structs

View File

@ -16,12 +16,12 @@ namespace europa::util {
/**
* A compile-time string. Usable as a C++20 cNTTP.
*/
template<std::size_t N>
template <std::size_t N>
struct FixedString {
char buf[N + 1]{};
char buf[N + 1] {};
constexpr FixedString(const char* s) { // NOLINT
for (unsigned i = 0; i != N; ++i)
for(unsigned i = 0; i != N; ++i)
buf[i] = s[i];
}
@ -34,9 +34,9 @@ namespace europa::util {
}
};
template<std::size_t N>
template <std::size_t N>
FixedString(char const (&)[N]) -> FixedString<N - 1>;
}
} // namespace europa::util
#endif // EUROPA_UTIL_FIXEDSTRING_H

View File

@ -9,17 +9,17 @@
#ifndef EUROPA_FOURCC_H
#define EUROPA_FOURCC_H
#include <bit>
#include <europa/util/FixedString.h>
#include <bit>
namespace europa::util {
/**
* A multi-endian, compile-time FourCC generator.
* You love to see it.
*/
template<FixedString fccString, std::endian Endian = std::endian::little>
template <FixedString fccString, std::endian Endian = std::endian::little>
consteval std::uint32_t FourCC() {
static_assert(fccString.Length() == 4, "Provided string is not a FourCC");
@ -36,6 +36,6 @@ namespace europa::util {
return 0xffffffff;
}
}
} // namespace europa::util
#endif // EUROPA_FOURCC_H

View File

@ -23,4 +23,4 @@ set_target_properties(libeuropa PROPERTIES
# Projects which libeuropa depends on
target_link_libraries(libeuropa PUBLIC
pixel::libpixel
)
)

View File

@ -10,25 +10,24 @@
namespace europa::io {
const PakFile::DataType& PakFile::GetData() const {
return data;
}
const PakFile::DataType& PakFile::GetData() const {
return data;
}
const structs::PakTocEntry& PakFile::GetTOCEntry() const {
return tocData;
}
const structs::PakTocEntry& PakFile::GetTOCEntry() const {
return tocData;
}
structs::PakTocEntry& PakFile::GetTOCEntry() {
return tocData;
}
structs::PakTocEntry& PakFile::GetTOCEntry() {
return tocData;
}
void PakFile::SetData(PakFile::DataType&& newData) {
data = std::move(newData);
}
void PakFile::SetData(PakFile::DataType&& newData) {
data = std::move(newData);
}
void PakFile::FillTOCEntry() {
tocData.size = static_cast<std::uint32_t>(data.size());
}
void PakFile::FillTOCEntry() {
tocData.size = static_cast<std::uint32_t>(data.size());
}
}
} // namespace europa::io

View File

@ -20,57 +20,51 @@ namespace europa::io {
}
void PakReader::ReadData() {
auto ReadHeader = [&]() {
header = impl::ReadStreamType<structs::PakHeader>(stream);
};
header = impl::ReadStreamType<structs::PakHeader>(stream);
auto ReadTocEntry = [&]() {
if(!header.Valid()) {
invalid = true;
return;
}
// Read the archive TOC
stream.seekg(header.tocOffset, std::istream::beg);
for(auto i = 0; i < header.fileCount; ++i) {
// The first part of the TOC entry is a VLE string,
// which we don't store inside the type (because we can't)
//
// Read this in first.
auto filename = impl::ReadPString(stream);
files[filename].GetTOCEntry() = impl::ReadStreamType<structs::PakTocEntry>(stream);
};
ReadHeader();
// Validate the archive header
if(std::strcmp(header.magic, structs::PakHeader::VALID_MAGIC) != 0) {
invalid = true;
return;
}
switch(header.version) {
case structs::PakVersion::Starfighter:
case structs::PakVersion::Ver2:
break;
default:
invalid = true;
return;
}
stream.seekg(header.tocOffset, std::istream::beg);
// Read the archive TOC
for(auto i = 0; i < header.fileCount; ++i)
ReadTocEntry();
// Read all file data in
for(auto& [filename, file] : files) {
auto& toc = file.GetTOCEntry();
file.data.resize(toc.size);
stream.seekg(toc.offset, std::istream::beg);
stream.read(reinterpret_cast<char*>(&file.data[0]), toc.size);
}
}
const std::unordered_map<std::string, PakFile>& PakReader::GetFiles() const {
void PakReader::ReadFiles() {
for(auto& [filename, file] : files)
ReadFile(filename);
}
void PakReader::ReadFile(const std::string& file) {
auto& fileObject = files[file];
// This file was already read in, or has data
// the user may not want to overwrite.
if(!fileObject.data.empty())
return;
auto& toc = fileObject.GetTOCEntry();
fileObject.data.resize(toc.size);
stream.seekg(toc.offset, std::istream::beg);
stream.read(reinterpret_cast<char*>(&fileObject.data[0]), toc.size);
}
PakReader::MapType& PakReader::GetFiles() {
return files;
}
const PakReader::MapType& PakReader::GetFiles() const {
return files;
}
} // namespace europa::io

View File

@ -7,56 +7,58 @@
//
#include <europa/io/PakWriter.h>
#include <iostream>
#include "StreamUtils.h"
namespace europa::io {
void PakWriter::Init(structs::PakVersion version) {
// for now.
pakHeader.Init(version);
}
void PakWriter::Init() {
// for now.
pakHeader.Init(structs::PakVersion::Starfighter);
}
std::unordered_map<std::string, PakFile>& PakWriter::GetFiles() {
return archiveFiles;
}
void PakWriter::AddFile(const std::string &path, const PakFile& data) {
archiveFiles[path] = data;
}
void PakWriter::Write(std::ostream& os) {
// Set up the header a bit more...
pakHeader.fileCount = archiveFiles.size();
void PakWriter::RemoveFile(const std::string &path) {
archiveFiles.erase(path);
}
// Leave space for the header
os.seekp(sizeof(structs::PakHeader), std::ostream::beg);
void PakWriter::Write(std::ostream &os) {
// Set up the header a bit more...
pakHeader.fileCount = archiveFiles.size();
// Seek forwards for version 2 PAKs, as the only
// difference seems to be
if(pakHeader.version == structs::PakVersion::Ver2) {
os.seekp(6, std::ostream::cur);
}
// Leave space for the header
os.seekp(sizeof(structs::PakHeader), std::ostream::beg);
// Write file data
for(auto& [filename, file] : archiveFiles) {
file.GetTOCEntry().offset = os.tellp();
os.write(reinterpret_cast<const char*>(file.GetData().data()), file.GetData().size());
}
// Write file data
for (auto &[filename, file]: archiveFiles) {
file.GetTOCEntry().offset = os.tellp();
os.write(reinterpret_cast<const char *>(file.GetData().data()), file.GetData().size());
}
pakHeader.tocOffset = os.tellp();
pakHeader.tocOffset = os.tellp();
// Write the TOC
for(auto& [filename, file] : archiveFiles) {
file.FillTOCEntry();
// Write the TOC
for (auto &[filename, file]: archiveFiles) {
file.FillTOCEntry();
// Write the pstring
os.put(static_cast<char>(filename.length() + 1));
for(const auto c : filename)
os.put(c);
os.put('\0');
// Write the pstring
os.put(static_cast<char>(filename.length() + 1));
for (const auto c: filename)
os.put(c);
os.put('\0');
impl::WriteStreamType(os, file.GetTOCEntry());
}
impl::WriteStreamType(os, file.GetTOCEntry());
}
os.seekp(0, std::ostream::beg);
impl::WriteStreamType(os, pakHeader);
}
os.seekp(0, std::ostream::beg);
impl::WriteStreamType(os, pakHeader);
}
}
} // namespace europa::io

View File

@ -19,7 +19,6 @@ namespace europa::io::impl {
is.read(&buffer[0], size);
}
void WriteStreamTypeImpl(std::ostream& os, const char* buffer, std::size_t buffer_size) {
os.write(&buffer[0], buffer_size);
}

View File

@ -17,7 +17,7 @@ namespace europa::io::impl {
namespace detail {
void ReadStreamTypeImpl(std::istream& is, char* buffer, std::size_t size);
void WriteStreamTypeImpl(std::ostream& os, const char* buffer, std::size_t buffer_size);
}
} // namespace detail
// This is lame. But it works :)
template <class T>

View File

@ -7,10 +7,11 @@
//
#include <europa/io/YatfReader.h>
#include "StreamUtils.h"
#include <vector>
#include "StreamUtils.h"
namespace europa::io {
YatfReader::YatfReader(std::istream& is)
@ -56,6 +57,4 @@ namespace europa::io {
return header;
}
}
} // namespace europa::io

View File

@ -1,6 +1,8 @@
add_executable(europa_pack_extractor europa_pack_extractor.cpp)
target_link_libraries(europa_pack_extractor PUBLIC libeuropa)
target_link_libraries(europa_pack_extractor PUBLIC libeuropa
indicators::indicators
)
set_target_properties(europa_pack_extractor PROPERTIES
CXX_STANDARD 20

View File

@ -10,6 +10,8 @@
#include <filesystem>
#include <fstream>
#include <indicators/cursor_control.hpp>
#include <indicators/progress_bar.hpp>
#include <iostream>
namespace fs = std::filesystem;
@ -38,6 +40,19 @@ int main(int argc, char** argv) {
return 1;
}
indicators::ProgressBar progress {
indicators::option::BarWidth { 50 },
indicators::option::ForegroundColor { indicators::Color::green },
indicators::option::MaxProgress { reader.GetFiles().size() },
indicators::option::ShowPercentage { true },
indicators::option::ShowElapsedTime { true },
indicators::option::ShowRemainingTime { true },
indicators::option::PrefixText { "Extracting archive " }
};
indicators::show_console_cursor(false);
for(auto& [filename, file] : reader.GetFiles()) {
auto nameCopy = filename;
@ -50,11 +65,15 @@ int main(int argc, char** argv) {
}
#endif
progress.set_option(indicators::option::PostfixText { filename });
auto outpath = (baseDirectory / nameCopy);
if(!fs::exists(outpath.parent_path()))
fs::create_directories(outpath.parent_path());
reader.ReadFile(filename);
std::ofstream ofs(outpath.string(), std::ofstream::binary);
if(!ofs) {
@ -63,10 +82,9 @@ int main(int argc, char** argv) {
}
ofs.write(reinterpret_cast<const char*>(file.GetData().data()), static_cast<std::streampos>(file.GetTOCEntry().size));
ofs.close();
std::cout << "Wrote \"" << outpath.string() << "\" to disk.\n";
progress.tick();
}
indicators::show_console_cursor(true);
return 0;
}

View File

@ -11,32 +11,32 @@
#include <europa/io/PakReader.h>
#include <europa/io/PakWriter.h>
#include <iostream>
#include <fstream>
#include <iostream>
int main(int argc, char** argv) {
std::ifstream ifs(argv[1], std::ifstream::binary);
std::ofstream ofs("new_archive.pak", std::ofstream::binary);
std::ifstream ifs(argv[1], std::ifstream::binary);
std::ofstream ofs("new_archive.pak", std::ofstream::binary);
europa::io::PakWriter writer;
europa::io::PakWriter writer;
writer.Init();
writer.Init(europa::structs::PakVersion::Ver2);
// Read pak data and vomit it into the writer.
// This will temporarily consume 2x the memory (so about 240mb for the biggest paks I've seen),
// but the writer will contain the first copy,
// until it's cleared.
{
europa::io::PakReader reader(ifs);
reader.ReadData();
// Read pak data and vomit it into the writer.
// This will temporarily consume 2x the memory (so about 240mb for the biggest paks I've seen),
// but the writer will contain the first copy,
// until it's cleared.
{
europa::io::PakReader reader(ifs);
reader.ReadData();
for (auto &[filename, file]: reader.GetFiles()) {
writer.AddFile(filename, file);
}
}
for(auto& [filename, file] : reader.GetFiles()) {
writer.AddFile(filename, file);
}
}
writer.Write(ofs);
writer.Write(ofs);
std::cout << "Wrote regurgitated archive to new.pak!\n";
return 0;
std::cout << "Wrote regurgitated archive to new.pak!\n";
return 0;
}