*: Remove map flattening copy

Memory size decrease and possible huge performance improvement! Yay!
This commit is contained in:
Lily Tsuru 2022-09-22 19:15:06 -05:00
parent cf9d84cb24
commit e698d1da3b
3 changed files with 15 additions and 61 deletions

View File

@ -13,7 +13,7 @@
#include <europa/io/PakProgressReportSink.hpp> #include <europa/io/PakProgressReportSink.hpp>
#include <iosfwd> #include <iosfwd>
#include <string> #include <string>
#include <unordered_map> #include <utility>
namespace europa::io { namespace europa::io {
@ -21,21 +21,19 @@ namespace europa::io {
* Writer for package files. * Writer for package files.
*/ */
struct PakWriter { struct PakWriter {
using FlattenedType = std::pair<std::string, PakFile>;
void Init(structs::PakHeader::Version version); void Init(structs::PakHeader::Version version);
// TODO: accessor for header const structs::PakHeader& GetHeader() const { return pakHeader; }
// use flattened vector format anyhow (less allocs, higher perf)
std::unordered_map<std::string, PakFile>& GetFiles();
/** /**
* Write the resulting archive to the given output stream. * Write the resulting archive to the given output stream.
*/ */
void Write(std::ostream& os, PakProgressReportSink& sink); void Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink);
private: private:
structs::PakHeader pakHeader {}; structs::PakHeader pakHeader {};
std::unordered_map<std::string, PakFile> archiveFiles;
}; };
} // namespace europa::io } // namespace europa::io

View File

@ -20,10 +20,6 @@ namespace europa::io {
pakHeader.Init(version); pakHeader.Init(version);
} }
std::unordered_map<std::string, PakFile>& PakWriter::GetFiles() {
return archiveFiles;
}
// move to a util/ header // move to a util/ header
template<class T> template<class T>
@ -31,53 +27,17 @@ namespace europa::io {
return (-value) & alignment - 1; return (-value) & alignment - 1;
} }
/**
* Class functor for flattening a map.
*/
template<class Map>
struct MapFlatten {
/**
* Storage type to store one key -> value pair.
*/
using FlattenedType = std::pair<typename Map::key_type, typename Map::mapped_type>;
using ArrayType = std::vector<FlattenedType>;
constexpr explicit MapFlatten(Map& mapToFlatten)
: map(mapToFlatten) {
}
ArrayType operator()() const {
ArrayType arr;
arr.reserve(map.size());
for(auto& [ key, value ] : map)
arr.emplace_back(std::make_pair(key, value));
return arr;
}
private:
Map& map;
};
// TODO: // TODO:
// - Composable operations (WriteTOC, WriteFile, WriteHeader) // - Composable operations (WriteTOC, WriteFile, WriteHeader)
// - Add IProgressReportSink reporting
void PakWriter::Write(std::ostream& os, PakProgressReportSink& sink) { void PakWriter::Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink) {
// This essentially converts our map we use for faster insertion std::vector<FlattenedType> sortedFiles = std::move(vec);
// into a flat array we can sort easily.
//
// NB: this copies by value, so during this function we use 2x the ram.
// doesn't seem to be a big problem though.
auto sortedFiles = MapFlatten{archiveFiles}();
// Sort the flattened array by file size, the biggest first. // Sort the flattened array by file size, the biggest first.
// Doesn't seem to help (neither does name length) // Doesn't seem to help (neither does name length)
std::ranges::sort(sortedFiles, std::greater{}, [](const decltype(MapFlatten{archiveFiles})::FlattenedType& elem) { std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) {
return std::get<1>(elem).GetTOCEntry().size; return elem.second.GetTOCEntry().size;
}); });
// Leave space for the header // Leave space for the header
@ -91,9 +51,6 @@ namespace europa::io {
// Write file data // Write file data
for(auto& [filename, file] : sortedFiles) { for(auto& [filename, file] : sortedFiles) {
//std::cout << "PakWriteFile \"" << filename << "\"\n Size " << file.GetTOCEntry().size << "\n";
sink.OnEvent({ sink.OnEvent({
PakProgressReportSink::FileEvent::Type::FileBeginWrite, PakProgressReportSink::FileEvent::Type::FileBeginWrite,
filename filename
@ -105,7 +62,6 @@ namespace europa::io {
// Flush on file writing // Flush on file writing
os.flush(); os.flush();
sink.OnEvent({ sink.OnEvent({
PakProgressReportSink::FileEvent::Type::FileEndWrite, PakProgressReportSink::FileEvent::Type::FileEndWrite,
filename filename
@ -114,7 +70,6 @@ namespace europa::io {
pakHeader.tocOffset = os.tellp(); pakHeader.tocOffset = os.tellp();
sink.OnEvent({ sink.OnEvent({
PakProgressReportSink::PakEvent::Type::WritingToc PakProgressReportSink::PakEvent::Type::WritingToc
}); });
@ -139,7 +94,7 @@ namespace europa::io {
// Fill out the rest of the header. // Fill out the rest of the header.
pakHeader.fileCount = archiveFiles.size(); pakHeader.fileCount = sortedFiles.size();
pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1); pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1);

View File

@ -108,6 +108,9 @@ namespace eupak::tasks {
// TODO: use time to write in the header // TODO: use time to write in the header
// also: is there any point to verbosity? could add archive written size ig // also: is there any point to verbosity? could add archive written size ig
std::vector<europa::io::PakWriter::FlattenedType> files;
files.reserve(fileCount);
for(auto& ent : fs::recursive_directory_iterator(args.inputDirectory)) { for(auto& ent : fs::recursive_directory_iterator(args.inputDirectory)) {
if(ent.is_directory()) if(ent.is_directory())
continue; continue;
@ -144,8 +147,7 @@ namespace eupak::tasks {
file.GetTOCEntry().creationUnixTime = static_cast<std::uint32_t>(lastModified.time_since_epoch().count()); file.GetTOCEntry().creationUnixTime = static_cast<std::uint32_t>(lastModified.time_since_epoch().count());
writer.GetFiles()[relativePathName] = std::move(file); files.emplace_back(std::make_pair(relativePathName, std::move(file)));
progress.tick(); progress.tick();
currFile++; currFile++;
} }
@ -159,10 +161,9 @@ namespace eupak::tasks {
return 1; return 1;
} }
CreateArchiveReportSink sink(fileCount); CreateArchiveReportSink sink(fileCount);
writer.Write(ofs, sink); writer.Write(ofs, std::move(files), sink);
return 0; return 0;
} }