aboutsummaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorSasha Smundak <asmundak@google.com>2019-07-16 18:45:24 -0700
committerSasha Smundak <asmundak@google.com>2019-08-06 14:23:35 -0700
commit1459a9259e28904f0d5f6da85a6612b59c611005 (patch)
tree116f686beca7e46fe4e5e8592aede10cda3ab916 /cmd
parentd62b4af8b7648acd961e927e4b8d5d7eb97a9dba (diff)
downloadbuild_soong-1459a9259e28904f0d5f6da85a6612b59c611005.tar.gz
build_soong-1459a9259e28904f0d5f6da85a6612b59c611005.tar.bz2
build_soong-1459a9259e28904f0d5f6da85a6612b59c611005.zip
Allow reading zipfile list from a file.
Android build generates 180K+ zip files that are to be merged and the result passed to the indexing pipeline to generate the source code cross-reference. Their names cannot be passed on the command line, hence the need for this change. That required extensive changes to the code as it no longer can be assumed that all the input files can be kept open. Bug: 121267023 Test: internal + treehugger + combine index files Change-Id: I0a829f96ff7187ad967fb6b8cede387501ec57cc
Diffstat (limited to 'cmd')
-rw-r--r--cmd/merge_zips/Android.bp1
-rw-r--r--cmd/merge_zips/merge_zips.go906
-rw-r--r--cmd/merge_zips/merge_zips_test.go100
3 files changed, 680 insertions, 327 deletions
diff --git a/cmd/merge_zips/Android.bp b/cmd/merge_zips/Android.bp
index ab658fd0..f70c86eb 100644
--- a/cmd/merge_zips/Android.bp
+++ b/cmd/merge_zips/Android.bp
@@ -18,6 +18,7 @@ blueprint_go_binary {
"android-archive-zip",
"blueprint-pathtools",
"soong-jar",
+ "soong-zip",
],
srcs: [
"merge_zips.go",
diff --git a/cmd/merge_zips/merge_zips.go b/cmd/merge_zips/merge_zips.go
index 68fe2592..27179cb2 100644
--- a/cmd/merge_zips/merge_zips.go
+++ b/cmd/merge_zips/merge_zips.go
@@ -30,457 +30,723 @@ import (
"android/soong/jar"
"android/soong/third_party/zip"
+ soongZip "android/soong/zip"
)
-type fileList []string
+// Input zip: we can open it, close it, and obtain an array of entries
+type InputZip interface {
+ Name() string
+ Open() error
+ Close() error
+ Entries() []*zip.File
+ IsOpen() bool
+}
-func (f *fileList) String() string {
- return `""`
+// An entry that can be written to the output zip
+type ZipEntryContents interface {
+ String() string
+ IsDir() bool
+ CRC32() uint32
+ Size() uint64
+ WriteToZip(dest string, zw *zip.Writer) error
}
-func (f *fileList) Set(name string) error {
- *f = append(*f, filepath.Clean(name))
+// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip
+// identified by the input zip and the index of the entry in its entries array
+type ZipEntryFromZip struct {
+ inputZip InputZip
+ index int
+ name string
+ isDir bool
+ crc32 uint32
+ size uint64
+}
- return nil
+func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip {
+ fi := inputZip.Entries()[entryIndex]
+ newEntry := ZipEntryFromZip{inputZip: inputZip,
+ index: entryIndex,
+ name: fi.Name,
+ isDir: fi.FileInfo().IsDir(),
+ crc32: fi.CRC32,
+ size: fi.UncompressedSize64,
+ }
+ return &newEntry
}
-type zipsToNotStripSet map[string]bool
+func (ze ZipEntryFromZip) String() string {
+ return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name)
+}
-func (s zipsToNotStripSet) String() string {
- return `""`
+func (ze ZipEntryFromZip) IsDir() bool {
+ return ze.isDir
}
-func (s zipsToNotStripSet) Set(zip_path string) error {
- s[zip_path] = true
+func (ze ZipEntryFromZip) CRC32() uint32 {
+ return ze.crc32
+}
- return nil
+func (ze ZipEntryFromZip) Size() uint64 {
+ return ze.size
}
-var (
- sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
- emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
- emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
- stripDirs fileList
- stripFiles fileList
- zipsToNotStrip = make(zipsToNotStripSet)
- stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
- manifest = flag.String("m", "", "manifest file to insert in jar")
- pyMain = flag.String("pm", "", "__main__.py file to insert in par")
- prefix = flag.String("prefix", "", "A file to prefix to the zip file")
- ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
-)
+func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error {
+ if err := ze.inputZip.Open(); err != nil {
+ return err
+ }
+ return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest)
+}
-func init() {
- flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
- flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
- flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
+// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte
+type ZipEntryFromBuffer struct {
+ fh *zip.FileHeader
+ content []byte
}
-func main() {
- flag.Usage = func() {
- fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]")
- flag.PrintDefaults()
- }
+func (be ZipEntryFromBuffer) String() string {
+ return "internal buffer"
+}
- // parse args
- flag.Parse()
- args := flag.Args()
- if len(args) < 1 {
- flag.Usage()
- os.Exit(1)
- }
- outputPath := args[0]
- inputs := args[1:]
+func (be ZipEntryFromBuffer) IsDir() bool {
+ return be.fh.FileInfo().IsDir()
+}
- log.SetFlags(log.Lshortfile)
+func (be ZipEntryFromBuffer) CRC32() uint32 {
+ return crc32.ChecksumIEEE(be.content)
+}
- // make writer
- output, err := os.Create(outputPath)
+func (be ZipEntryFromBuffer) Size() uint64 {
+ return uint64(len(be.content))
+}
+
+func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error {
+ w, err := zw.CreateHeader(be.fh)
if err != nil {
- log.Fatal(err)
+ return err
}
- defer output.Close()
- var offset int64
- if *prefix != "" {
- prefixFile, err := os.Open(*prefix)
- if err != nil {
- log.Fatal(err)
- }
- offset, err = io.Copy(output, prefixFile)
+ if !be.IsDir() {
+ _, err = w.Write(be.content)
if err != nil {
- log.Fatal(err)
+ return err
}
}
- writer := zip.NewWriter(output)
- defer func() {
- err := writer.Close()
- if err != nil {
- log.Fatal(err)
- }
- }()
- writer.SetOffset(offset)
+ return nil
+}
- // make readers
- readers := []namedZipReader{}
- for _, input := range inputs {
- reader, err := zip.OpenReader(input)
- if err != nil {
- log.Fatal(err)
- }
- defer reader.Close()
- namedReader := namedZipReader{path: input, reader: &reader.Reader}
- readers = append(readers, namedReader)
- }
+// Processing state.
+type OutputZip struct {
+ outputWriter *zip.Writer
+ stripDirEntries bool
+ emulateJar bool
+ sortEntries bool
+ ignoreDuplicates bool
+ excludeDirs []string
+ excludeFiles []string
+ sourceByDest map[string]ZipEntryContents
+}
- if *manifest != "" && !*emulateJar {
- log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
+func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip {
+ return &OutputZip{
+ outputWriter: outputWriter,
+ stripDirEntries: stripDirEntries,
+ emulateJar: emulateJar,
+ sortEntries: sortEntries,
+ sourceByDest: make(map[string]ZipEntryContents, 0),
+ ignoreDuplicates: ignoreDuplicates,
}
+}
- if *pyMain != "" && !*emulatePar {
- log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
+func (oz *OutputZip) setExcludeDirs(excludeDirs []string) {
+ oz.excludeDirs = make([]string, len(excludeDirs))
+ for i, dir := range excludeDirs {
+ oz.excludeDirs[i] = filepath.Clean(dir)
}
+}
- // do merge
- err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
- *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
- if err != nil {
- log.Fatal(err)
+func (oz *OutputZip) setExcludeFiles(excludeFiles []string) {
+ oz.excludeFiles = excludeFiles
+}
+
+// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents
+// if entry with given name already exists.
+func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) {
+ if existingSource, exists := oz.sourceByDest[name]; exists {
+ return existingSource, nil
+ }
+ oz.sourceByDest[name] = source
+ // Delay writing an entry if entries need to be rearranged.
+ if oz.emulateJar || oz.sortEntries {
+ return nil, nil
}
+ return nil, source.WriteToZip(name, oz.outputWriter)
}
-// a namedZipReader reads a .zip file and can say which file it's reading
-type namedZipReader struct {
- path string
- reader *zip.Reader
+// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file
+func (oz *OutputZip) addManifest(manifestPath string) error {
+ if !oz.stripDirEntries {
+ if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil {
+ return err
+ }
+ }
+ contents, err := ioutil.ReadFile(manifestPath)
+ if err == nil {
+ fh, buf, err := jar.ManifestFileContents(contents)
+ if err == nil {
+ _, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf})
+ }
+ }
+ return err
}
-// a zipEntryPath refers to a file contained in a zip
-type zipEntryPath struct {
- zipName string
- entryName string
+// Adds an entry with given name and contents read from given file
+func (oz *OutputZip) addZipEntryFromFile(name string, path string) error {
+ buf, err := ioutil.ReadFile(path)
+ if err == nil {
+ fh := &zip.FileHeader{
+ Name: name,
+ Method: zip.Store,
+ UncompressedSize64: uint64(len(buf)),
+ }
+ fh.SetMode(0700)
+ fh.SetModTime(jar.DefaultTime)
+ _, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf})
+ }
+ return err
}
-func (p zipEntryPath) String() string {
- return p.zipName + "/" + p.entryName
+func (oz *OutputZip) addEmptyEntry(entry string) error {
+ var emptyBuf []byte
+ fh := &zip.FileHeader{
+ Name: entry,
+ Method: zip.Store,
+ UncompressedSize64: uint64(len(emptyBuf)),
+ }
+ fh.SetMode(0700)
+ fh.SetModTime(jar.DefaultTime)
+ _, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf})
+ return err
}
-// a zipEntry is a zipSource that pulls its content from another zip
-type zipEntry struct {
- path zipEntryPath
- content *zip.File
+// Returns true if given entry is to be excluded
+func (oz *OutputZip) isEntryExcluded(name string) bool {
+ for _, dir := range oz.excludeDirs {
+ dir = filepath.Clean(dir)
+ patterns := []string{
+ dir + "/", // the directory itself
+ dir + "/**/*", // files recursively in the directory
+ dir + "/**/*/", // directories recursively in the directory
+ }
+
+ for _, pattern := range patterns {
+ match, err := pathtools.Match(pattern, name)
+ if err != nil {
+ panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+ }
+ if match {
+ if oz.emulateJar {
+ // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
+ // requested.
+ // TODO(ccross): which files does this affect?
+ if name != jar.MetaDir && name != jar.ManifestFile {
+ return true
+ }
+ }
+ return true
+ }
+ }
+ }
+
+ for _, pattern := range oz.excludeFiles {
+ match, err := pathtools.Match(pattern, name)
+ if err != nil {
+ panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+ }
+ if match {
+ return true
+ }
+ }
+ return false
}
-func (ze zipEntry) String() string {
- return ze.path.String()
+// Creates a zip entry whose contents is an entry from the given input zip.
+func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error {
+ entry := NewZipEntryFromZip(inputZip, index)
+ if oz.stripDirEntries && entry.IsDir() {
+ return nil
+ }
+ existingEntry, err := oz.addZipEntry(entry.name, entry)
+ if err != nil {
+ return err
+ }
+ if existingEntry == nil {
+ return nil
+ }
+
+ // File types should match
+ if existingEntry.IsDir() != entry.IsDir() {
+ return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
+ entry.name, existingEntry, entry)
+ }
+
+ if oz.ignoreDuplicates ||
+ // Skip manifest and module info files that are not from the first input file
+ (oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) ||
+ // Identical entries
+ (existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) ||
+ // Directory entries
+ entry.IsDir() {
+ return nil
+ }
+
+ return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name())
}
-func (ze zipEntry) IsDir() bool {
- return ze.content.FileInfo().IsDir()
+func (oz *OutputZip) entriesArray() []string {
+ entries := make([]string, len(oz.sourceByDest))
+ i := 0
+ for entry := range oz.sourceByDest {
+ entries[i] = entry
+ i++
+ }
+ return entries
}
-func (ze zipEntry) CRC32() uint32 {
- return ze.content.FileHeader.CRC32
+func (oz *OutputZip) jarSorted() []string {
+ entries := oz.entriesArray()
+ sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) })
+ return entries
}
-func (ze zipEntry) Size() uint64 {
- return ze.content.FileHeader.UncompressedSize64
+func (oz *OutputZip) alphanumericSorted() []string {
+ entries := oz.entriesArray()
+ sort.Strings(entries)
+ return entries
}
-func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
- return zw.CopyFrom(ze.content, dest)
+func (oz *OutputZip) writeEntries(entries []string) error {
+ for _, entry := range entries {
+ source, _ := oz.sourceByDest[entry]
+ if err := source.WriteToZip(entry, oz.outputWriter); err != nil {
+ return err
+ }
+ }
+ return nil
}
-// a bufferEntry is a zipSource that pulls its content from a []byte
-type bufferEntry struct {
- fh *zip.FileHeader
- content []byte
+func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) {
+ // the runfiles packages needs to be populated with "__init__.py".
+ // the runfiles dirs have been treated as packages.
+ allPackages := make(map[string]bool)
+ initedPackages := make(map[string]bool)
+ getPackage := func(path string) string {
+ ret := filepath.Dir(path)
+ // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
+ if ret == "." || ret == "/" {
+ return ""
+ }
+ return ret
+ }
+
+ // put existing __init__.py files to a set first. This set is used for preventing
+ // generated __init__.py files from overwriting existing ones.
+ for _, inputZip := range inputZips {
+ if err := inputZip.Open(); err != nil {
+ return nil, err
+ }
+ for _, file := range inputZip.Entries() {
+ pyPkg := getPackage(file.Name)
+ if filepath.Base(file.Name) == "__init__.py" {
+ if _, found := initedPackages[pyPkg]; found {
+ panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name))
+ }
+ initedPackages[pyPkg] = true
+ }
+ for pyPkg != "" {
+ if _, found := allPackages[pyPkg]; found {
+ break
+ }
+ allPackages[pyPkg] = true
+ pyPkg = getPackage(pyPkg)
+ }
+ }
+ }
+ noInitPackages := make([]string, 0)
+ for pyPkg := range allPackages {
+ if _, found := initedPackages[pyPkg]; !found {
+ noInitPackages = append(noInitPackages, pyPkg)
+ }
+ }
+ return noInitPackages, nil
}
-func (be bufferEntry) String() string {
- return "internal buffer"
+// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order.
+type ManagedInputZip struct {
+ owner *InputZipsManager
+ realInputZip InputZip
+ older *ManagedInputZip
+ newer *ManagedInputZip
}
-func (be bufferEntry) IsDir() bool {
- return be.fh.FileInfo().IsDir()
+// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened,
+// may close some other InputZip to limit the number of open ones.
+type InputZipsManager struct {
+ inputZips []*ManagedInputZip
+ nOpenZips int
+ maxOpenZips int
+ openInputZips *ManagedInputZip
}
-func (be bufferEntry) CRC32() uint32 {
- return crc32.ChecksumIEEE(be.content)
+func (miz *ManagedInputZip) unlink() {
+ olderMiz := miz.older
+ newerMiz := miz.newer
+ if newerMiz.older != miz || olderMiz.newer != miz {
+ panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v",
+ miz, miz, newerMiz, newerMiz, olderMiz, olderMiz))
+ }
+ olderMiz.newer = newerMiz
+ newerMiz.older = olderMiz
+ miz.newer = nil
+ miz.older = nil
}
-func (be bufferEntry) Size() uint64 {
- return uint64(len(be.content))
+func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) {
+ if olderMiz.newer != nil || olderMiz.older != nil {
+ panic(fmt.Errorf("inputZip is already open"))
+ }
+ oldOlderMiz := miz.older
+ if oldOlderMiz.newer != miz {
+ panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, oldOlderMiz))
+ }
+ miz.older = olderMiz
+ olderMiz.older = oldOlderMiz
+ oldOlderMiz.newer = olderMiz
+ olderMiz.newer = miz
}
-func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
- w, err := zw.CreateHeader(be.fh)
- if err != nil {
- return err
+func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager {
+ if maxOpenZips < 3 {
+ panic(fmt.Errorf("open zips limit should be above 3"))
}
+ // In the dummy element .older points to the most recently opened InputZip, and .newer points to the oldest.
+ head := new(ManagedInputZip)
+ head.older = head
+ head.newer = head
+ return &InputZipsManager{
+ inputZips: make([]*ManagedInputZip, 0, nInputZips),
+ maxOpenZips: maxOpenZips,
+ openInputZips: head,
+ }
+}
- if !be.IsDir() {
- _, err = w.Write(be.content)
- if err != nil {
+// InputZip factory
+func (izm *InputZipsManager) Manage(inz InputZip) InputZip {
+ iz := &ManagedInputZip{owner: izm, realInputZip: inz}
+ izm.inputZips = append(izm.inputZips, iz)
+ return iz
+}
+
+// Opens or reopens ManagedInputZip.
+func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error {
+ if miz.realInputZip.IsOpen() {
+ if miz != izm.openInputZips {
+ miz.unlink()
+ izm.openInputZips.link(miz)
+ }
+ return nil
+ }
+ if izm.nOpenZips >= izm.maxOpenZips {
+ if err := izm.close(izm.openInputZips.older); err != nil {
return err
}
}
+ if err := miz.realInputZip.Open(); err != nil {
+ return err
+ }
+ izm.openInputZips.link(miz)
+ izm.nOpenZips++
+ return nil
+}
+func (izm *InputZipsManager) close(miz *ManagedInputZip) error {
+ if miz.IsOpen() {
+ err := miz.realInputZip.Close()
+ izm.nOpenZips--
+ miz.unlink()
+ return err
+ }
return nil
}
-type zipSource interface {
- String() string
- IsDir() bool
- CRC32() uint32
- Size() uint64
- WriteToZip(dest string, zw *zip.Writer) error
+// Checks that openInputZips deque is valid
+func (izm *InputZipsManager) checkOpenZipsDeque() {
+ nReallyOpen := 0
+ el := izm.openInputZips
+ for {
+ elNext := el.older
+ if elNext.newer != el {
+ panic(fmt.Errorf("Element:\n %p: %v\nNext:\n %p %v", el, el, elNext, elNext))
+ }
+ if elNext == izm.openInputZips {
+ break
+ }
+ el = elNext
+ if !el.IsOpen() {
+ panic(fmt.Errorf("Found unopened element"))
+ }
+ nReallyOpen++
+ if nReallyOpen > izm.nOpenZips {
+ panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+ }
+ }
+ if nReallyOpen > izm.nOpenZips {
+ panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+ }
}
-// a fileMapping specifies to copy a zip entry from one place to another
-type fileMapping struct {
- dest string
- source zipSource
+func (miz *ManagedInputZip) Name() string {
+ return miz.realInputZip.Name()
}
-func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string,
- sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
- stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
+func (miz *ManagedInputZip) Open() error {
+ return miz.owner.reopen(miz)
+}
- sourceByDest := make(map[string]zipSource, 0)
- orderedMappings := []fileMapping{}
+func (miz *ManagedInputZip) Close() error {
+ return miz.owner.close(miz)
+}
- // if dest already exists returns a non-null zipSource for the existing source
- addMapping := func(dest string, source zipSource) zipSource {
- mapKey := filepath.Clean(dest)
- if existingSource, exists := sourceByDest[mapKey]; exists {
- return existingSource
- }
+func (miz *ManagedInputZip) IsOpen() bool {
+ return miz.realInputZip.IsOpen()
+}
- sourceByDest[mapKey] = source
- orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
- return nil
+func (miz *ManagedInputZip) Entries() []*zip.File {
+ if !miz.IsOpen() {
+ panic(fmt.Errorf("%s: is not open", miz.Name()))
}
+ return miz.realInputZip.Entries()
+}
- if manifest != "" {
- if !stripDirEntries {
- dirHeader := jar.MetaDirFileHeader()
- dirSource := bufferEntry{dirHeader, nil}
- addMapping(jar.MetaDir, dirSource)
- }
+// Actual processing.
+func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string,
+ sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
+ excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error {
- contents, err := ioutil.ReadFile(manifest)
- if err != nil {
+ out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates)
+ out.setExcludeFiles(excludeFiles)
+ out.setExcludeDirs(excludeDirs)
+ if manifest != "" {
+ if err := out.addManifest(manifest); err != nil {
return err
}
-
- fh, buf, err := jar.ManifestFileContents(contents)
- if err != nil {
+ }
+ if pyMain != "" {
+ if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil {
return err
}
-
- fileSource := bufferEntry{fh, buf}
- addMapping(jar.ManifestFile, fileSource)
}
- if pyMain != "" {
- buf, err := ioutil.ReadFile(pyMain)
+ if emulatePar {
+ noInitPackages, err := out.getUninitializedPythonPackages(inputZips)
if err != nil {
return err
}
- fh := &zip.FileHeader{
- Name: "__main__.py",
- Method: zip.Store,
- UncompressedSize64: uint64(len(buf)),
+ for _, uninitializedPyPackage := range noInitPackages {
+ if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil {
+ return err
+ }
}
- fh.SetMode(0700)
- fh.SetModTime(jar.DefaultTime)
- fileSource := bufferEntry{fh, buf}
- addMapping("__main__.py", fileSource)
}
- if emulatePar {
- // the runfiles packages needs to be populated with "__init__.py".
- newPyPkgs := []string{}
- // the runfiles dirs have been treated as packages.
- existingPyPkgSet := make(map[string]bool)
- // put existing __init__.py files to a set first. This set is used for preventing
- // generated __init__.py files from overwriting existing ones.
- for _, namedReader := range readers {
- for _, file := range namedReader.reader.File {
- if filepath.Base(file.Name) != "__init__.py" {
- continue
- }
- pyPkg := pathBeforeLastSlash(file.Name)
- if _, found := existingPyPkgSet[pyPkg]; found {
- panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
- } else {
- existingPyPkgSet[pyPkg] = true
- }
- }
+ // Finally, add entries from all the input zips.
+ for _, inputZip := range inputZips {
+ _, copyFully := zipsToNotStrip[inputZip.Name()]
+ if err := inputZip.Open(); err != nil {
+ return err
}
- for _, namedReader := range readers {
- for _, file := range namedReader.reader.File {
- var parentPath string /* the path after trimming last "/" */
- if filepath.Base(file.Name) == "__init__.py" {
- // for existing __init__.py files, we should trim last "/" for twice.
- // eg. a/b/c/__init__.py ---> a/b
- parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
- } else {
- parentPath = pathBeforeLastSlash(file.Name)
+
+ for i, entry := range inputZip.Entries() {
+ if copyFully || !out.isEntryExcluded(entry.Name) {
+ if err := out.copyEntry(inputZip, i); err != nil {
+ return err
}
- populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
}
}
- for _, pkg := range newPyPkgs {
- var emptyBuf []byte
- fh := &zip.FileHeader{
- Name: filepath.Join(pkg, "__init__.py"),
- Method: zip.Store,
- UncompressedSize64: uint64(len(emptyBuf)),
+ // Unless we need to rearrange the entries, the input zip can now be closed.
+ if !(emulateJar || sortEntries) {
+ if err := inputZip.Close(); err != nil {
+ return err
}
- fh.SetMode(0700)
- fh.SetModTime(jar.DefaultTime)
- fileSource := bufferEntry{fh, emptyBuf}
- addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
}
}
- for _, namedReader := range readers {
- _, skipStripThisZip := zipsToNotStrip[namedReader.path]
- for _, file := range namedReader.reader.File {
- if !skipStripThisZip {
- if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
- return err
- } else if skip {
- continue
- }
- }
- if stripDirEntries && file.FileInfo().IsDir() {
- continue
- }
+ if emulateJar {
+ return out.writeEntries(out.jarSorted())
+ } else if sortEntries {
+ return out.writeEntries(out.alphanumericSorted())
+ }
+ return nil
+}
- // check for other files or directories destined for the same path
- dest := file.Name
+// Process command line
+type fileList []string
- // make a new entry to add
- source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
+func (f *fileList) String() string {
+ return `""`
+}
- if existingSource := addMapping(dest, source); existingSource != nil {
- // handle duplicates
- if existingSource.IsDir() != source.IsDir() {
- return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
- dest, existingSource, source)
- }
+func (f *fileList) Set(name string) error {
+ *f = append(*f, filepath.Clean(name))
- if ignoreDuplicates {
- continue
- }
+ return nil
+}
- if emulateJar &&
- file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
- // Skip manifest and module info files that are not from the first input file
- continue
- }
+type zipsToNotStripSet map[string]bool
- if source.IsDir() {
- continue
- }
+func (s zipsToNotStripSet) String() string {
+ return `""`
+}
- if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
- continue
- }
+func (s zipsToNotStripSet) Set(path string) error {
+ s[path] = true
+ return nil
+}
- return fmt.Errorf("Duplicate path %v found in %v and %v\n",
- dest, existingSource, source)
- }
- }
- }
+var (
+ sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
+ emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
+ emulatePar = flag.Bool("p", false, "merge zip entries based on par format")
+ excludeDirs fileList
+ excludeFiles fileList
+ zipsToNotStrip = make(zipsToNotStripSet)
+ stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file")
+ manifest = flag.String("m", "", "manifest file to insert in jar")
+ pyMain = flag.String("pm", "", "__main__.py file to insert in par")
+ prefix = flag.String("prefix", "", "A file to prefix to the zip file")
+ ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
+)
- if emulateJar {
- jarSort(orderedMappings)
- } else if sortEntries {
- alphanumericSort(orderedMappings)
- }
+func init() {
+ flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
+ flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
+ flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
+}
- for _, entry := range orderedMappings {
- if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
- return err
- }
- }
+type FileInputZip struct {
+ name string
+ reader *zip.ReadCloser
+}
+func (fiz *FileInputZip) Name() string {
+ return fiz.name
+}
+
+func (fiz *FileInputZip) Close() error {
+ if fiz.IsOpen() {
+ reader := fiz.reader
+ fiz.reader = nil
+ return reader.Close()
+ }
return nil
}
-// Sets the given directory and all its ancestor directories as Python packages.
-func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
- for pkgPath != "" {
- if _, found := existingPyPkgSet[pkgPath]; !found {
- existingPyPkgSet[pkgPath] = true
- *newPyPkgs = append(*newPyPkgs, pkgPath)
- // Gets its ancestor directory by trimming last slash.
- pkgPath = pathBeforeLastSlash(pkgPath)
- } else {
- break
- }
+func (fiz *FileInputZip) Entries() []*zip.File {
+ if !fiz.IsOpen() {
+ panic(fmt.Errorf("%s: is not open", fiz.Name()))
}
+ return fiz.reader.File
+}
+
+func (fiz *FileInputZip) IsOpen() bool {
+ return fiz.reader != nil
}
-func pathBeforeLastSlash(path string) string {
- ret := filepath.Dir(path)
- // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
- if ret == "." || ret == "/" {
- return ""
+func (fiz *FileInputZip) Open() error {
+ if fiz.IsOpen() {
+ return nil
}
- return ret
+ var err error
+ fiz.reader, err = zip.OpenReader(fiz.Name())
+ return err
}
-func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
- for _, dir := range stripDirs {
- dir = filepath.Clean(dir)
- patterns := []string{
- dir + "/", // the directory itself
- dir + "/**/*", // files recursively in the directory
- dir + "/**/*/", // directories recursively in the directory
- }
+func main() {
+ flag.Usage = func() {
+ fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]")
+ flag.PrintDefaults()
+ }
- for _, pattern := range patterns {
- match, err := pathtools.Match(pattern, name)
+ // parse args
+ flag.Parse()
+ args := flag.Args()
+ if len(args) < 1 {
+ flag.Usage()
+ os.Exit(1)
+ }
+ outputPath := args[0]
+ inputs := make([]string, 0)
+ for _, input := range args[1:] {
+ if input[0] == '@' {
+ bytes, err := ioutil.ReadFile(input[1:])
if err != nil {
- return false, fmt.Errorf("%s: %s", err.Error(), pattern)
- } else if match {
- if emulateJar {
- // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
- // requested.
- // TODO(ccross): which files does this affect?
- if name != jar.MetaDir && name != jar.ManifestFile {
- return true, nil
- }
- }
- return true, nil
+ log.Fatal(err)
}
+ inputs = append(inputs, soongZip.ReadRespFile(bytes)...)
+ continue
}
+ inputs = append(inputs, input)
+ continue
}
- for _, pattern := range stripFiles {
- if match, err := pathtools.Match(pattern, name); err != nil {
- return false, fmt.Errorf("%s: %s", err.Error(), pattern)
- } else if match {
- return true, nil
+ log.SetFlags(log.Lshortfile)
+
+ // make writer
+ outputZip, err := os.Create(outputPath)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer outputZip.Close()
+
+ var offset int64
+ if *prefix != "" {
+ prefixFile, err := os.Open(*prefix)
+ if err != nil {
+ log.Fatal(err)
+ }
+ offset, err = io.Copy(outputZip, prefixFile)
+ if err != nil {
+ log.Fatal(err)
}
}
- return false, nil
-}
-func jarSort(files []fileMapping) {
- sort.SliceStable(files, func(i, j int) bool {
- return jar.EntryNamesLess(files[i].dest, files[j].dest)
- })
-}
+ writer := zip.NewWriter(outputZip)
+ defer func() {
+ err := writer.Close()
+ if err != nil {
+ log.Fatal(err)
+ }
+ }()
+ writer.SetOffset(offset)
+
+ if *manifest != "" && !*emulateJar {
+ log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
+ }
+
+ if *pyMain != "" && !*emulatePar {
+ log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
+ }
-func alphanumericSort(files []fileMapping) {
- sort.SliceStable(files, func(i, j int) bool {
- return files[i].dest < files[j].dest
- })
+ // do merge
+ inputZipsManager := NewInputZipsManager(len(inputs), 1000)
+ inputZips := make([]InputZip, len(inputs))
+ for i, input := range inputs {
+ inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input})
+ }
+ err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
+ *stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs),
+ map[string]bool(zipsToNotStrip))
+ if err != nil {
+ log.Fatal(err)
+ }
}
diff --git a/cmd/merge_zips/merge_zips_test.go b/cmd/merge_zips/merge_zips_test.go
index dbde2705..cb584360 100644
--- a/cmd/merge_zips/merge_zips_test.go
+++ b/cmd/merge_zips/merge_zips_test.go
@@ -51,6 +51,39 @@ var (
moduleInfoFile = testZipEntry{jar.ModuleInfoClass, 0755, []byte("module-info")}
)
+type testInputZip struct {
+ name string
+ entries []testZipEntry
+ reader *zip.Reader
+}
+
+func (tiz *testInputZip) Name() string {
+ return tiz.name
+}
+
+func (tiz *testInputZip) Open() error {
+ if tiz.reader == nil {
+ tiz.reader = testZipEntriesToZipReader(tiz.entries)
+ }
+ return nil
+}
+
+func (tiz *testInputZip) Close() error {
+ tiz.reader = nil
+ return nil
+}
+
+func (tiz *testInputZip) Entries() []*zip.File {
+ if tiz.reader == nil {
+ panic(fmt.Errorf("%s: should be open to get entries", tiz.Name()))
+ }
+ return tiz.reader.File
+}
+
+func (tiz *testInputZip) IsOpen() bool {
+ return tiz.reader != nil
+}
+
func TestMergeZips(t *testing.T) {
testCases := []struct {
name string
@@ -207,13 +240,9 @@ func TestMergeZips(t *testing.T) {
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
- var readers []namedZipReader
+ inputZips := make([]InputZip, len(test.in))
for i, in := range test.in {
- r := testZipEntriesToZipReader(in)
- readers = append(readers, namedZipReader{
- path: "in" + strconv.Itoa(i),
- reader: r,
- })
+ inputZips[i] = &testInputZip{name: "in" + strconv.Itoa(i), entries: in}
}
want := testZipEntriesToBuf(test.out)
@@ -221,7 +250,7 @@ func TestMergeZips(t *testing.T) {
out := &bytes.Buffer{}
writer := zip.NewWriter(out)
- err := mergeZips(readers, writer, "", "",
+ err := mergeZips(inputZips, writer, "", "",
test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates,
test.stripFiles, test.stripDirs, test.zipsToNotStrip)
@@ -304,3 +333,60 @@ func dumpZip(buf []byte) string {
return ret
}
+
+type DummyInpuZip struct {
+ isOpen bool
+}
+
+func (diz *DummyInpuZip) Name() string {
+ return "dummy"
+}
+
+func (diz *DummyInpuZip) Open() error {
+ diz.isOpen = true
+ return nil
+}
+
+func (diz *DummyInpuZip) Close() error {
+ diz.isOpen = false
+ return nil
+}
+
+func (DummyInpuZip) Entries() []*zip.File {
+ panic("implement me")
+}
+
+func (diz *DummyInpuZip) IsOpen() bool {
+ return diz.isOpen
+}
+
+func TestInputZipsManager(t *testing.T) {
+ const nInputZips = 20
+ const nMaxOpenZips = 10
+ izm := NewInputZipsManager(20, 10)
+ managedZips := make([]InputZip, nInputZips)
+ for i := 0; i < nInputZips; i++ {
+ managedZips[i] = izm.Manage(&DummyInpuZip{})
+ }
+
+ t.Run("InputZipsManager", func(t *testing.T) {
+ for i, iz := range managedZips {
+ if err := iz.Open(); err != nil {
+ t.Fatalf("Step %d: open failed: %s", i, err)
+ return
+ }
+ if izm.nOpenZips > nMaxOpenZips {
+ t.Errorf("Step %d: should be <=%d open zips", i, nMaxOpenZips)
+ }
+ }
+ if !managedZips[nInputZips-1].IsOpen() {
+ t.Error("The last input should stay open")
+ }
+ for _, iz := range managedZips {
+ iz.Close()
+ }
+ if izm.nOpenZips > 0 {
+ t.Error("Some input zips are still open")
+ }
+ })
+}