diff options
author | Amin Hassani <ahassani@google.com> | 2018-02-16 18:01:50 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2018-02-16 18:01:50 +0000 |
commit | 4758cebeb47746c632e4f28d6ec6223fd13d65bf (patch) | |
tree | ea9645c305d87ee19f0625d25fd701bf1ced917b | |
parent | da44b95cb9ebbdd5de31f9a48cdf3bc8a8d44185 (diff) | |
parent | 5135f0dc3c086b44d1af84513a81a2d3df6486a9 (diff) | |
download | platform_external_puffin-4758cebeb47746c632e4f28d6ec6223fd13d65bf.tar.gz platform_external_puffin-4758cebeb47746c632e4f28d6ec6223fd13d65bf.tar.bz2 platform_external_puffin-4758cebeb47746c632e4f28d6ec6223fd13d65bf.zip |
Parse gzip stream am: 4a212ed06e
am: 5135f0dc3c
Change-Id: I05296f73a3992857de11877fb27510e676d9962e
-rw-r--r-- | src/include/puffin/utils.h | 5 | ||||
-rw-r--r-- | src/main.cc | 6 | ||||
-rw-r--r-- | src/utils.cc | 72 | ||||
-rw-r--r-- | src/utils_unittest.cc | 45 |
4 files changed, 127 insertions, 1 deletions
diff --git a/src/include/puffin/utils.h b/src/include/puffin/utils.h index d5f1832..4ff9c16 100644 --- a/src/include/puffin/utils.h +++ b/src/include/puffin/utils.h @@ -42,6 +42,11 @@ bool LocateDeflatesInZlibBlocks(const std::string& file_path, const std::vector<ByteExtent>& zlibs, std::vector<BitExtent>* deflates); +// Searches for deflate locations in a gzip file. The results are +// saved in |deflate_blocks|. +bool LocateDeflatesInGzip(const Buffer& data, + std::vector<ByteExtent>* deflate_blocks); + // Search for the deflates in a zip archive, and put the result in // |deflate_blocks|. bool LocateDeflatesInZipArchive(const Buffer& data, diff --git a/src/main.cc b/src/main.cc index 06d76a5..43ed7e9 100644 --- a/src/main.cc +++ b/src/main.cc @@ -143,7 +143,11 @@ int main(int argc, char** argv) { src_stream, zlibs, &src_deflates_bit), -1); } else if (FLAGS_src_file_type == "gzip") { - // TODO(ahassani): Implement gzip format parsing + puffin::Buffer src_data(stream_size); + TEST_AND_RETURN_VALUE(src_stream->Read(src_data.data(), src_data.size()), + -1); + TEST_AND_RETURN_VALUE( + puffin::LocateDeflatesInGzip(src_data, &src_deflates_byte), -1); } else if (FLAGS_src_file_type == "zip") { puffin::Buffer src_data(stream_size); TEST_AND_RETURN_VALUE(src_stream->Read(src_data.data(), src_data.size()), diff --git a/src/utils.cc b/src/utils.cc index 35a39e9..ba10570 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -176,6 +176,78 @@ bool LocateDeflatesInZlibBlocks(const string& file_path, return LocateDeflatesInZlibBlocks(src, zlibs, deflates); } +// For more information about gzip format, refer to RFC 1952 located at: +// https://www.ietf.org/rfc/rfc1952.txt +bool LocateDeflatesInGzip(const Buffer& data, + vector<ByteExtent>* deflate_blocks) { + size_t member_start = 0; + while (member_start < data.size()) { + // Each member entry has the following format + // 0 1 0x1F + // 1 1 0x8B + // 2 1 compression method (8 denotes deflate) + // 3 1 set of flags + // 4 4 modification time + // 8 1 extra flags + // 9 1 operating system + TEST_AND_RETURN_FALSE(member_start + 10 <= data.size()); + TEST_AND_RETURN_FALSE(data[member_start + 0] == 0x1F); + TEST_AND_RETURN_FALSE(data[member_start + 1] == 0x8B); + TEST_AND_RETURN_FALSE(data[member_start + 2] == 8); + + size_t offset = member_start + 10; + int flag = data[member_start + 3]; + // Extra field + if (flag & 4) { + TEST_AND_RETURN_FALSE(offset + 2 <= data.size()); + uint16_t extra_length = data[offset++]; + extra_length |= static_cast<uint16_t>(data[offset++]) << 8; + TEST_AND_RETURN_FALSE(offset + extra_length <= data.size()); + offset += extra_length; + } + // File name field + if (flag & 8) { + while (true) { + TEST_AND_RETURN_FALSE(offset + 1 <= data.size()); + if (data[offset++] == 0) { + break; + } + } + } + // File comment field + if (flag & 16) { + while (true) { + TEST_AND_RETURN_FALSE(offset + 1 <= data.size()); + if (data[offset++] == 0) { + break; + } + } + } + // CRC16 field + if (flag & 2) { + offset += 2; + } + + size_t compressed_size, uncompressed_size; + TEST_AND_RETURN_FALSE(CalculateSizeOfDeflateBlock( + data, offset, &compressed_size, &uncompressed_size)); + TEST_AND_RETURN_FALSE(offset + compressed_size <= data.size()); + deflate_blocks->push_back(ByteExtent(offset, compressed_size)); + offset += compressed_size; + + // Ignore CRC32; + TEST_AND_RETURN_FALSE(offset + 8 <= data.size()); + offset += 4; + uint32_t u_size = 0; + for (size_t i = 0; i < 4; i++) { + u_size |= static_cast<uint32_t>(data[offset++]) << (i * 8); + } + TEST_AND_RETURN_FALSE(uncompressed_size % (1 << 31) == u_size); + member_start = offset; + } + return true; +} + // For more information about the zip format, refer to // https://support.pkware.com/display/PKZIP/APPNOTE bool LocateDeflatesInZipArchive(const Buffer& data, diff --git a/src/utils_unittest.cc b/src/utils_unittest.cc index 10a4c0d..55f2fa1 100644 --- a/src/utils_unittest.cc +++ b/src/utils_unittest.cc @@ -47,6 +47,32 @@ const uint8_t kZipEntryWithDataDescriptor[] = { 0x2e, 0x00, 0xb4, 0xa0, 0xf2, 0x36, 0x06, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00}; +// echo "0123456789" > test1.txt && echo "9876543210" > test2.txt && +// gzip -kf test1.txt test2.txt && cat test1.txt.gz test2.txt.gz | +// hexdump -v -e '12/1 "0x%02x, " "\n"' +const uint8_t kGzipEntryWithMultipleMembers[] = { + 0x1f, 0x8b, 0x08, 0x08, 0x77, 0xd5, 0x84, 0x5a, 0x00, 0x03, 0x74, 0x65, + 0x73, 0x74, 0x31, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x33, 0x30, 0x34, 0x32, + 0x36, 0x31, 0x35, 0x33, 0xb7, 0xb0, 0xe4, 0x02, 0x00, 0xd1, 0xe5, 0x76, + 0x40, 0x0b, 0x00, 0x00, 0x00, 0x1f, 0x8b, 0x08, 0x08, 0x77, 0xd5, 0x84, + 0x5a, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74, 0x32, 0x2e, 0x74, 0x78, 0x74, + 0x00, 0xb3, 0xb4, 0x30, 0x37, 0x33, 0x35, 0x31, 0x36, 0x32, 0x34, 0xe0, + 0x02, 0x00, 0x20, 0x9c, 0x5f, 0x89, 0x0b, 0x00, 0x00, 0x00}; + +// echo "0123456789" > test1.txt && gzip -kf test1.txt && cat test1.txt.gz | +// hexdump -v -e '12/1 "0x%02x, " "\n"' +// And manually insert extra field with two byte length (10) followed by: +// echo "extrafield" | hexdump -v -e '12/1 "0x%02x, " "\n"' +// Then change the forth byte of array to -x0c to enable the extra field. +const uint8_t kGzipEntryWithExtraField[] = { + 0x1f, 0x8b, 0x08, 0x0c, 0xcf, 0x0e, 0x86, 0x5a, 0x00, 0x03, + // Extra field begin + 0x0A, 0x00, 0x65, 0x78, 0x74, 0x72, 0x61, 0x66, 0x69, 0x65, 0x6c, 0x64, + // Extra field end + 0x74, 0x65, 0x73, 0x74, 0x31, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x33, 0x30, + 0x34, 0x32, 0x36, 0x31, 0x35, 0x33, 0xb7, 0xb0, 0xe4, 0x02, 0x00, 0xd1, + 0xe5, 0x76, 0x40, 0x0b, 0x00, 0x00, 0x00}; + void FindDeflatesInZlibBlocks(const Buffer& src, const vector<ByteExtent>& zlibs, const vector<BitExtent>& deflates) { @@ -125,4 +151,23 @@ TEST(UtilsTest, LocateDeflatesInZipArchiveErrorChecks) { EXPECT_EQ(static_cast<size_t>(0), deflates_incomplete.size()); } +TEST(UtilsTest, LocateDeflatesInGzip) { + Buffer gzip_data(kGzipEntryWithMultipleMembers, + std::end(kGzipEntryWithMultipleMembers)); + vector<ByteExtent> deflates; + EXPECT_TRUE(LocateDeflatesInGzip(gzip_data, &deflates)); + EXPECT_EQ(static_cast<size_t>(2), deflates.size()); + EXPECT_EQ(ByteExtent(20, 13), deflates[0]); + EXPECT_EQ(ByteExtent(61, 13), deflates[1]); +} + +TEST(UtilsTest, LocateDeflatesInGzipWithExtraField) { + Buffer gzip_data(kGzipEntryWithExtraField, + std::end(kGzipEntryWithExtraField)); + vector<ByteExtent> deflates; + EXPECT_TRUE(LocateDeflatesInGzip(gzip_data, &deflates)); + EXPECT_EQ(static_cast<size_t>(1), deflates.size()); + EXPECT_EQ(ByteExtent(32, 13), deflates[0]); +} + } // namespace puffin |