From 4c7a283e55b635417b71d4de2a34c8528a207967 Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Thu, 30 May 2013 12:53:53 -0400 Subject: base: Avoid size limitation on protobuf coded streams This patch changes how the streams are created to avoid the size limitation on the coded streams. As we only read/write a single message at a time, there is never any message larger than a few bytes. However, the coded stream eventually complains that its internal counter reaches 64+ MByte if the total file size exceeds this value. Based on suggestions in the protobuf discussion forums, the coded stream is now created for every message that is read/written. The result is that the internal byte count never goes about tens of bytes, and we can read/write any size file that the underlying file I/O can handle. --- src/proto/protoio.cc | 62 ++++++++++++++++++++++++++++++---------------------- src/proto/protoio.hh | 12 +++++----- 2 files changed, 42 insertions(+), 32 deletions(-) (limited to 'src/proto') diff --git a/src/proto/protoio.cc b/src/proto/protoio.cc index 76364794f..70e3fcb44 100644 --- a/src/proto/protoio.cc +++ b/src/proto/protoio.cc @@ -45,7 +45,7 @@ using namespace google::protobuf; ProtoOutputStream::ProtoOutputStream(const string& filename) : fileStream(filename.c_str(), ios::out | ios::binary | ios::trunc), - zeroCopyStream(NULL), gzipStream(NULL), codedStream(NULL) + wrappedFileStream(NULL), gzipStream(NULL), zeroCopyStream(NULL) { if (!fileStream.good()) panic("Could not open %s for writing\n", filename); @@ -53,17 +53,18 @@ ProtoOutputStream::ProtoOutputStream(const string& filename) : // Wrap the output file in a zero copy stream, that in turn is // wrapped in a gzip stream if the filename ends with .gz. The // latter stream is in turn wrapped in a coded stream - zeroCopyStream = new io::OstreamOutputStream(&fileStream); + wrappedFileStream = new io::OstreamOutputStream(&fileStream); if (filename.find_last_of('.') != string::npos && filename.substr(filename.find_last_of('.') + 1) == "gz") { - gzipStream = new io::GzipOutputStream(zeroCopyStream); - codedStream = new io::CodedOutputStream(gzipStream); + gzipStream = new io::GzipOutputStream(wrappedFileStream); + zeroCopyStream = gzipStream; } else { - codedStream = new io::CodedOutputStream(zeroCopyStream); + zeroCopyStream = wrappedFileStream; } // Write the magic number to the file - codedStream->WriteLittleEndian32(magicNumber); + io::CodedOutputStream codedStream(zeroCopyStream); + codedStream.WriteLittleEndian32(magicNumber); // Note that each type of stream (packet, instruction etc) should // add its own header and perform the appropriate checks @@ -71,29 +72,32 @@ ProtoOutputStream::ProtoOutputStream(const string& filename) : ProtoOutputStream::~ProtoOutputStream() { - delete codedStream; // As the compression is optional, see if the stream exists if (gzipStream != NULL) delete gzipStream; - delete zeroCopyStream; + delete wrappedFileStream; fileStream.close(); } void ProtoOutputStream::write(const Message& msg) { + // Due to the byte limit of the coded stream we create it for + // every single mesage (based on forum discussions around the size + // limitation) + io::CodedOutputStream codedStream(zeroCopyStream); + // Write the size of the message to the stream - codedStream->WriteVarint32(msg.ByteSize()); + codedStream.WriteVarint32(msg.ByteSize()); // Write the message itself to the stream - if (!msg.SerializeToCodedStream(codedStream)) - panic("Unable to write message to coded stream\n"); + msg.SerializeWithCachedSizes(&codedStream); } ProtoInputStream::ProtoInputStream(const string& filename) : fileStream(filename.c_str(), ios::in | ios::binary), fileName(filename), useGzip(false), - zeroCopyStream(NULL), gzipStream(NULL), codedStream(NULL) + wrappedFileStream(NULL), gzipStream(NULL), zeroCopyStream(NULL) { if (!fileStream.good()) panic("Could not open %s for reading\n", filename); @@ -114,22 +118,23 @@ void ProtoInputStream::createStreams() { // All streams should be NULL at this point - assert(zeroCopyStream == NULL && gzipStream == NULL && - codedStream == NULL); + assert(wrappedFileStream == NULL && gzipStream == NULL && + zeroCopyStream == NULL); // Wrap the input file in a zero copy stream, that in turn is // wrapped in a gzip stream if the filename ends with .gz. The // latter stream is in turn wrapped in a coded stream - zeroCopyStream = new io::IstreamInputStream(&fileStream); + wrappedFileStream = new io::IstreamInputStream(&fileStream); if (useGzip) { - gzipStream = new io::GzipInputStream(zeroCopyStream); - codedStream = new io::CodedInputStream(gzipStream); + gzipStream = new io::GzipInputStream(wrappedFileStream); + zeroCopyStream = gzipStream; } else { - codedStream = new io::CodedInputStream(zeroCopyStream); + zeroCopyStream = wrappedFileStream; } uint32_t magic_check; - if (!codedStream->ReadLittleEndian32(&magic_check) || + io::CodedInputStream codedStream(zeroCopyStream); + if (!codedStream.ReadLittleEndian32(&magic_check) || magic_check != magicNumber) panic("Input file %s is not a valid gem5 proto format.\n", fileName); @@ -138,14 +143,14 @@ ProtoInputStream::createStreams() void ProtoInputStream::destroyStreams() { - delete codedStream; - codedStream = NULL; // As the compression is optional, see if the stream exists if (gzipStream != NULL) { delete gzipStream; gzipStream = NULL; } - delete zeroCopyStream; + delete wrappedFileStream; + wrappedFileStream = NULL; + zeroCopyStream = NULL; } @@ -173,10 +178,15 @@ ProtoInputStream::read(Message& msg) // Read a message from the stream by getting the size, using it as // a limit when parsing the message, then popping the limit again uint32_t size; - if (codedStream->ReadVarint32(&size)) { - io::CodedInputStream::Limit limit = codedStream->PushLimit(size); - if (msg.ParseFromCodedStream(codedStream)) { - codedStream->PopLimit(limit); + + // Due to the byte limit of the coded stream we create it for + // every single mesage (based on forum discussions around the size + // limitation) + io::CodedInputStream codedStream(zeroCopyStream); + if (codedStream.ReadVarint32(&size)) { + io::CodedInputStream::Limit limit = codedStream.PushLimit(size); + if (msg.ParseFromCodedStream(&codedStream)) { + codedStream.PopLimit(limit); // All went well, the message is parsed and the limit is // popped again return true; diff --git a/src/proto/protoio.hh b/src/proto/protoio.hh index d5c6a4bac..27bb336c4 100644 --- a/src/proto/protoio.hh +++ b/src/proto/protoio.hh @@ -122,13 +122,13 @@ class ProtoOutputStream : public ProtoStream std::ofstream fileStream; /// Zero Copy stream wrapping the STL output stream - google::protobuf::io::OstreamOutputStream* zeroCopyStream; + google::protobuf::io::OstreamOutputStream* wrappedFileStream; /// Optional Gzip stream to wrap the Zero Copy stream google::protobuf::io::GzipOutputStream* gzipStream; - /// Top-level coded stream that messages are written to - google::protobuf::io::CodedOutputStream* codedStream; + /// Top-level zero-copy stream, either with compression or not + google::protobuf::io::ZeroCopyOutputStream* zeroCopyStream; }; @@ -193,13 +193,13 @@ class ProtoInputStream : public ProtoStream bool useGzip; /// Zero Copy stream wrapping the STL input stream - google::protobuf::io::IstreamInputStream* zeroCopyStream; + google::protobuf::io::IstreamInputStream* wrappedFileStream; /// Optional Gzip stream to wrap the Zero Copy stream google::protobuf::io::GzipInputStream* gzipStream; - /// Top-level coded stream that messages are read from - google::protobuf::io::CodedInputStream* codedStream; + /// Top-level zero-copy stream, either with compression or not + google::protobuf::io::ZeroCopyInputStream* zeroCopyStream; }; -- cgit v1.2.3