From 55fd293dfe89c7b1f856bc60cac643ee72d5ef22 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Sun, 28 May 2017 22:59:04 -0700 Subject: Add libziparchive-based unzip. Bug: N/A Test: new toybox unzip.test Change-Id: I00be388578be9a0a0390a9e2ecfac664c6ab39ca --- include/ziparchive/zip_archive.h | 9 + libziparchive/Android.bp | 33 +++- libziparchive/unzip.cpp | 345 +++++++++++++++++++++++++++++++++++++ libziparchive/zip_archive.cc | 22 +++ libziparchive/zip_archive_common.h | 4 +- libziparchive/zip_writer_test.cc | 17 +- 6 files changed, 407 insertions(+), 23 deletions(-) create mode 100644 libziparchive/unzip.cpp diff --git a/include/ziparchive/zip_archive.h b/include/ziparchive/zip_archive.h index 31fc2dfc9..ece86931a 100644 --- a/include/ziparchive/zip_archive.h +++ b/include/ziparchive/zip_archive.h @@ -71,8 +71,17 @@ struct ZipEntry { // Modification time. The zipfile format specifies // that the first two little endian bytes contain the time // and the last two little endian bytes contain the date. + // See `GetModificationTime`. + // TODO: should be overridden by extra time field, if present. uint32_t mod_time; + // Returns `mod_time` as a broken-down struct tm. + struct tm GetModificationTime() const; + + // Suggested Unix mode for this entry, from the zip archive if created on + // Unix, or a default otherwise. + mode_t unix_mode; + // 1 if this entry contains a data descriptor segment, 0 // otherwise. uint8_t has_data_descriptor; diff --git a/libziparchive/Android.bp b/libziparchive/Android.bp index 287a99c60..1084d59e5 100644 --- a/libziparchive/Android.bp +++ b/libziparchive/Android.bp @@ -52,17 +52,25 @@ cc_defaults { ], } - cc_library { name: "libziparchive", host_supported: true, - vendor_available:true, + vendor_available: true, - defaults: ["libziparchive_defaults", "libziparchive_flags"], - shared_libs: ["liblog", "libbase"], + defaults: [ + "libziparchive_defaults", + "libziparchive_flags", + ], + shared_libs: [ + "liblog", + "libbase", + ], target: { android: { - shared_libs: ["libz", "libutils"], + shared_libs: [ + "libz", + "libutils", + ], }, host: { static_libs: ["libutils"], @@ -88,7 +96,10 @@ cc_library { name: "libziparchive-host", host_supported: true, device_supported: false, - defaults: ["libziparchive_defaults", "libziparchive_flags"], + defaults: [ + "libziparchive_defaults", + "libziparchive_flags", + ], shared_libs: ["libz-host"], static_libs: ["libutils"], } @@ -150,3 +161,13 @@ cc_benchmark { }, }, } + +cc_binary { + name: "unzip", + defaults: ["libziparchive_flags"], + srcs: ["unzip.cpp"], + shared_libs: [ + "libbase", + "libziparchive", + ], +} diff --git a/libziparchive/unzip.cpp b/libziparchive/unzip.cpp new file mode 100644 index 000000000..6756007af --- /dev/null +++ b/libziparchive/unzip.cpp @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +enum OverwriteMode { + kAlways, + kNever, + kPrompt, +}; + +static OverwriteMode overwrite_mode = kPrompt; +static const char* flag_d = nullptr; +static bool flag_l = false; +static bool flag_p = false; +static bool flag_q = false; +static bool flag_v = false; +static const char* archive_name = nullptr; +static std::set includes; +static std::set excludes; +static uint64_t total_uncompressed_length = 0; +static uint64_t total_compressed_length = 0; +static size_t file_count = 0; + +static bool Filter(const std::string& name) { + if (!excludes.empty() && excludes.find(name) != excludes.end()) return true; + if (!includes.empty() && includes.find(name) == includes.end()) return true; + return false; +} + +static bool MakeDirectoryHierarchy(const std::string& path) { + // stat rather than lstat because a symbolic link to a directory is fine too. + struct stat sb; + if (stat(path.c_str(), &sb) != -1 && S_ISDIR(sb.st_mode)) return true; + + // Ensure the parent directories exist first. + if (!MakeDirectoryHierarchy(android::base::Dirname(path))) return false; + + // Then try to create this directory. + return (mkdir(path.c_str(), 0777) != -1); +} + +static int CompressionRatio(int64_t uncompressed, int64_t compressed) { + if (uncompressed == 0) return 0; + return (100LL * (uncompressed - compressed)) / uncompressed; +} + +static void MaybeShowHeader() { + if (!flag_q) printf("Archive: %s\n", archive_name); + if (flag_v) { + printf( + " Length Method Size Cmpr Date Time CRC-32 Name\n" + "-------- ------ ------- ---- ---------- ----- -------- ----\n"); + } else if (flag_l) { + printf( + " Length Date Time Name\n" + "--------- ---------- ----- ----\n"); + } +} + +static void MaybeShowFooter() { + if (flag_v) { + printf( + "-------- ------- --- -------\n" + "%8" PRId64 " %8" PRId64 " %3d%% %zu file%s\n", + total_uncompressed_length, total_compressed_length, + CompressionRatio(total_uncompressed_length, total_compressed_length), file_count, + (file_count == 1) ? "" : "s"); + } else if (flag_l) { + printf( + "--------- -------\n" + "%9" PRId64 " %zu file%s\n", + total_uncompressed_length, file_count, (file_count == 1) ? "" : "s"); + } +} + +static bool PromptOverwrite(const std::string& dst) { + // TODO: [r]ename not implemented because it doesn't seem useful. + printf("replace %s? [y]es, [n]o, [A]ll, [N]one: ", dst.c_str()); + fflush(stdout); + while (true) { + char* line = nullptr; + size_t n; + if (getline(&line, &n, stdin) == -1) { + error(1, 0, "(EOF/read error; assuming [N]one...)"); + overwrite_mode = kNever; + return false; + } + if (n == 0) continue; + char cmd = line[0]; + free(line); + switch (cmd) { + case 'y': + return true; + case 'n': + return false; + case 'A': + overwrite_mode = kAlways; + return true; + case 'N': + overwrite_mode = kNever; + return false; + } + } +} + +static void ExtractToPipe(ZipArchiveHandle zah, ZipEntry& entry, const std::string& name) { + // We need to extract to memory because ExtractEntryToFile insists on + // being able to seek and truncate, and you can't do that with stdout. + uint8_t* buffer = new uint8_t[entry.uncompressed_length]; + int err = ExtractToMemory(zah, &entry, buffer, entry.uncompressed_length); + if (err < 0) { + error(1, 0, "failed to extract %s: %s", name.c_str(), ErrorCodeString(err)); + } + if (!android::base::WriteFully(1, buffer, entry.uncompressed_length)) { + error(1, errno, "failed to write %s to stdout", name.c_str()); + } + delete[] buffer; +} + +static void ExtractOne(ZipArchiveHandle zah, ZipEntry& entry, const std::string& name) { + // Bad filename? + if (android::base::StartsWith(name, "/") || android::base::StartsWith(name, "../") || + name.find("/../") != std::string::npos) { + error(1, 0, "bad filename %s", name.c_str()); + } + + // Where are we actually extracting to (for human-readable output)? + std::string dst; + if (flag_d) { + dst = flag_d; + if (!android::base::EndsWith(dst, "/")) dst += '/'; + } + dst += name; + + // Ensure the directory hierarchy exists. + if (!MakeDirectoryHierarchy(android::base::Dirname(name))) { + error(1, errno, "couldn't create directory hierarchy for %s", dst.c_str()); + } + + // An entry in a zip file can just be a directory itself. + if (android::base::EndsWith(name, "/")) { + if (mkdir(name.c_str(), entry.unix_mode) == -1) { + // If the directory already exists, that's fine. + if (errno == EEXIST) { + struct stat sb; + if (stat(name.c_str(), &sb) != -1 && S_ISDIR(sb.st_mode)) return; + } + error(1, errno, "couldn't extract directory %s", dst.c_str()); + } + return; + } + + // Create the file. + int fd = open(name.c_str(), O_CREAT | O_WRONLY | O_CLOEXEC | O_EXCL, entry.unix_mode); + if (fd == -1 && errno == EEXIST) { + if (overwrite_mode == kNever) return; + if (overwrite_mode == kPrompt && !PromptOverwrite(dst)) return; + // Either overwrite_mode is kAlways or the user consented to this specific case. + fd = open(name.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC | O_TRUNC, entry.unix_mode); + } + if (fd == -1) error(1, errno, "couldn't create file %s", dst.c_str()); + + // Actually extract into the file. + if (!flag_q) printf(" inflating: %s\n", dst.c_str()); + int err = ExtractEntryToFile(zah, &entry, fd); + if (err < 0) error(1, 0, "failed to extract %s: %s", dst.c_str(), ErrorCodeString(err)); + close(fd); +} + +static void ListOne(const ZipEntry& entry, const std::string& name) { + tm t = entry.GetModificationTime(); + char time[32]; + snprintf(time, sizeof(time), "%04d-%02d-%02d %02d:%02d", t.tm_year + 1900, t.tm_mon + 1, + t.tm_mday, t.tm_hour, t.tm_min); + if (flag_v) { + printf("%8d %s %7d %3d%% %s %08x %s\n", entry.uncompressed_length, + (entry.method == kCompressStored) ? "Stored" : "Defl:N", entry.compressed_length, + CompressionRatio(entry.uncompressed_length, entry.compressed_length), time, entry.crc32, + name.c_str()); + } else { + printf("%9d %s %s\n", entry.uncompressed_length, time, name.c_str()); + } +} + +static void ProcessOne(ZipArchiveHandle zah, ZipEntry& entry, const std::string& name) { + if (flag_l || flag_v) { + // -l or -lv or -lq or -v. + ListOne(entry, name); + } else { + // Actually extract. + if (flag_p) { + ExtractToPipe(zah, entry, name); + } else { + ExtractOne(zah, entry, name); + } + } + total_uncompressed_length += entry.uncompressed_length; + total_compressed_length += entry.compressed_length; + ++file_count; +} + +static void ProcessAll(ZipArchiveHandle zah) { + MaybeShowHeader(); + + // libziparchive iteration order doesn't match the central directory. + // We could sort, but that would cost extra and wouldn't match either. + void* cookie; + int err = StartIteration(zah, &cookie, nullptr, nullptr); + if (err != 0) { + error(1, 0, "couldn't iterate %s: %s", archive_name, ErrorCodeString(err)); + } + + ZipEntry entry; + ZipString string; + while ((err = Next(cookie, &entry, &string)) >= 0) { + std::string name(string.name, string.name + string.name_length); + if (!Filter(name)) ProcessOne(zah, entry, name); + } + + if (err < -1) error(1, 0, "failed iterating %s: %s", archive_name, ErrorCodeString(err)); + EndIteration(cookie); + + MaybeShowFooter(); +} + +static void ShowHelp(bool full) { + fprintf(full ? stdout : stderr, "usage: unzip [-d DIR] [-lnopqv] ZIP [FILE...] [-x FILE...]\n"); + if (!full) exit(EXIT_FAILURE); + + printf( + "\n" + "Extract FILEs from ZIP archive. Default is all files.\n" + "\n" + "-d DIR Extract into DIR\n" + "-l List contents (-lq excludes archive name, -lv is verbose)\n" + "-n Never overwrite files (default: prompt)\n" + "-o Always overwrite files\n" + "-p Pipe to stdout\n" + "-q Quiet\n" + "-v List contents verbosely\n" + "-x FILE Exclude files\n"); + exit(EXIT_SUCCESS); +} + +int main(int argc, char* argv[]) { + static struct option opts[] = { + {"help", no_argument, 0, 'h'}, + }; + bool saw_x = false; + int opt; + while ((opt = getopt_long(argc, argv, "-d:hlnopqvx", opts, nullptr)) != -1) { + switch (opt) { + case 'd': + flag_d = optarg; + break; + case 'h': + ShowHelp(true); + break; + case 'l': + flag_l = true; + break; + case 'n': + overwrite_mode = kNever; + break; + case 'o': + overwrite_mode = kAlways; + break; + case 'p': + flag_p = flag_q = true; + break; + case 'q': + flag_q = true; + break; + case 'v': + flag_v = true; + break; + case 'x': + saw_x = true; + break; + case 1: + // -x swallows all following arguments, so we use '-' in the getopt + // string and collect files here. + if (!archive_name) { + archive_name = optarg; + } else if (saw_x) { + excludes.insert(optarg); + } else { + includes.insert(optarg); + } + break; + default: + ShowHelp(false); + } + } + + if (!archive_name) error(1, 0, "missing archive filename"); + + // We can't support "-" to unzip from stdin because libziparchive relies on mmap. + ZipArchiveHandle zah; + int32_t err; + if ((err = OpenArchive(archive_name, &zah)) != 0) { + error(1, 0, "couldn't open %s: %s", archive_name, ErrorCodeString(err)); + } + + // Implement -d by changing into that directory. + // We'll create implicit directories based on paths in the zip file, but we + // require that the -d directory already exists. + if (flag_d && chdir(flag_d) == -1) error(1, errno, "couldn't chdir to %s", flag_d); + + ProcessAll(zah); + + CloseArchive(zah); + return 0; +} diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index 246575f81..6860c4e2d 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -589,6 +590,13 @@ static int32_t FindEntry(const ZipArchive* archive, const int ent, data->has_data_descriptor = 1; } + // 4.4.2.1: the upper byte of `version_made_by` gives the source OS. Unix is 3. + if ((cdr->version_made_by >> 8) == 3) { + data->unix_mode = (cdr->external_file_attributes >> 16) & 0xffff; + } else { + data->unix_mode = 0777; + } + // Check that the local file header name matches the declared // name in the central directory. if (lfh->file_name_length == nameLen) { @@ -1227,3 +1235,17 @@ bool ZipArchive::InitializeCentralDirectory(const char* debug_file_name, off64_t } return true; } + +tm ZipEntry::GetModificationTime() const { + tm t = {}; + + t.tm_hour = (mod_time >> 11) & 0x1f; + t.tm_min = (mod_time >> 5) & 0x3f; + t.tm_sec = (mod_time & 0x1f) << 1; + + t.tm_year = ((mod_time >> 25) & 0x7f) + 80; + t.tm_mon = ((mod_time >> 21) & 0xf) - 1; + t.tm_mday = (mod_time >> 16) & 0x1f; + + return t; +} diff --git a/libziparchive/zip_archive_common.h b/libziparchive/zip_archive_common.h index ca4250942..bc1ebb429 100644 --- a/libziparchive/zip_archive_common.h +++ b/libziparchive/zip_archive_common.h @@ -73,7 +73,7 @@ struct CentralDirectoryRecord { // The start of record signature. Must be |kSignature|. uint32_t record_signature; - // Tool version. Ignored by this implementation. + // Source tool version. Top byte gives source OS. uint16_t version_made_by; // Tool version. Ignored by this implementation. uint16_t version_needed; @@ -106,7 +106,7 @@ struct CentralDirectoryRecord { uint16_t file_start_disk; // File attributes. Ignored by this implementation. uint16_t internal_file_attributes; - // File attributes. Ignored by this implementation. + // File attributes. For archives created on Unix, the top bits are the mode. uint32_t external_file_attributes; // The offset to the local file header for this entry, from the // beginning of this archive. diff --git a/libziparchive/zip_writer_test.cc b/libziparchive/zip_writer_test.cc index 5b526a4f8..9ad025255 100644 --- a/libziparchive/zip_writer_test.cc +++ b/libziparchive/zip_writer_test.cc @@ -135,17 +135,6 @@ TEST_F(zipwriter, WriteUncompressedZipFileWithAlignedFlag) { CloseArchive(handle); } -static void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) { - memset(tm, 0, sizeof(struct tm)); - tm->tm_hour = (zip_time >> 11) & 0x1f; - tm->tm_min = (zip_time >> 5) & 0x3f; - tm->tm_sec = (zip_time & 0x1f) << 1; - - tm->tm_year = ((zip_time >> 25) & 0x7f) + 80; - tm->tm_mon = ((zip_time >> 21) & 0xf) - 1; - tm->tm_mday = (zip_time >> 16) & 0x1f; -} - static struct tm MakeTm() { struct tm tm; memset(&tm, 0, sizeof(struct tm)); @@ -177,8 +166,7 @@ TEST_F(zipwriter, WriteUncompressedZipFileWithAlignedFlagAndTime) { ASSERT_EQ(0, FindEntry(handle, ZipString("align.txt"), &data)); EXPECT_EQ(0, data.offset & 0x03); - struct tm mod; - ConvertZipTimeToTm(data.mod_time, &mod); + struct tm mod = data.GetModificationTime(); EXPECT_EQ(tm.tm_sec, mod.tm_sec); EXPECT_EQ(tm.tm_min, mod.tm_min); EXPECT_EQ(tm.tm_hour, mod.tm_hour); @@ -228,8 +216,7 @@ TEST_F(zipwriter, WriteUncompressedZipFileWithAlignedValueAndTime) { ASSERT_EQ(0, FindEntry(handle, ZipString("align.txt"), &data)); EXPECT_EQ(0, data.offset & 0xfff); - struct tm mod; - ConvertZipTimeToTm(data.mod_time, &mod); + struct tm mod = data.GetModificationTime(); EXPECT_EQ(tm.tm_sec, mod.tm_sec); EXPECT_EQ(tm.tm_min, mod.tm_min); EXPECT_EQ(tm.tm_hour, mod.tm_hour); -- cgit v1.2.3