diff options
author | Dan Willemsen <dwillemsen@google.com> | 2016-08-04 15:43:03 -0700 |
---|---|---|
committer | Dan Willemsen <dwillemsen@google.com> | 2016-08-11 01:30:16 -0700 |
commit | 017d8937fa4536eafa306e99bf83c01d9f3d263e (patch) | |
tree | f44542ea160142ab5a2b5aade26ff58b9a44ad51 /cmd | |
parent | 3bf1a085050c797f4af31353325c9ee69ca6c02f (diff) | |
download | build_soong-017d8937fa4536eafa306e99bf83c01d9f3d263e.tar.gz build_soong-017d8937fa4536eafa306e99bf83c01d9f3d263e.tar.bz2 build_soong-017d8937fa4536eafa306e99bf83c01d9f3d263e.zip |
soong_jar: Parallel compression
This compresses multiple files in parallel, and will split up larger
files (5MB+) into smaller chunks (1MB) to compress in parallel.
There is a small size overhead to recombine the chunks, but it's only a
few bytes per chunk, so for a 1MB chunk, it's minimal.
Rough numbers, with everything in the page cache, this can compress
~4GB (1000 files) down to 1GB in 6.5 seconds, instead of 120 seconds with
the non-parallel soong_jar and 150 seconds with zip.
Go's DEFLATE algorithm is still a bit worse than zip's -- about 3.5%
larger file sizes, but for most of our "dist" targets that is fine.
Change-Id: Ie4886c7d0f954ace46e599156e35fea7e74d6dd7
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/soong_jar/rate_limit.go | 115 | ||||
-rw-r--r-- | cmd/soong_jar/soong_jar.go | 444 |
2 files changed, 522 insertions, 37 deletions
diff --git a/cmd/soong_jar/rate_limit.go b/cmd/soong_jar/rate_limit.go new file mode 100644 index 00000000..9e95bc14 --- /dev/null +++ b/cmd/soong_jar/rate_limit.go @@ -0,0 +1,115 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "runtime" +) + +type RateLimit struct { + requests chan struct{} + finished chan int + released chan int + stop chan struct{} +} + +// NewRateLimit starts a new rate limiter with maxExecs number of executions +// allowed to happen at a time. If maxExecs is <= 0, it will default to the +// number of logical CPUs on the system. +// +// With Finish and Release, we'll keep track of outstanding buffer sizes to be +// written. If that size goes above maxMem, we'll prevent starting new +// executions. +// +// The total memory use may be higher due to current executions. This just +// prevents runaway memory use due to slower writes. +func NewRateLimit(maxExecs int, maxMem int64) *RateLimit { + if maxExecs <= 0 { + maxExecs = runtime.NumCPU() + } + if maxMem <= 0 { + // Default to 512MB + maxMem = 512 * 1024 * 1024 + } + + ret := &RateLimit{ + requests: make(chan struct{}), + + // Let all of the pending executions to mark themselves as finished, + // even if our goroutine isn't processing input. + finished: make(chan int, maxExecs), + + released: make(chan int), + stop: make(chan struct{}), + } + + go ret.goFunc(maxExecs, maxMem) + + return ret +} + +// RequestExecution blocks until another execution can be allowed to run. +func (r *RateLimit) RequestExecution() Execution { + <-r.requests + return r.finished +} + +type Execution chan<- int + +// Finish will mark your execution as finished, and allow another request to be +// approved. +// +// bufferSize may be specified to count memory buffer sizes, and must be +// matched with calls to RateLimit.Release to mark the buffers as released. +func (e Execution) Finish(bufferSize int) { + e <- bufferSize +} + +// Call Release when finished with a buffer recorded with Finish. +func (r *RateLimit) Release(bufferSize int) { + r.released <- bufferSize +} + +// Stop the background goroutine +func (r *RateLimit) Stop() { + close(r.stop) +} + +func (r *RateLimit) goFunc(maxExecs int, maxMem int64) { + var curExecs int + var curMemory int64 + + for { + var requests chan struct{} + if curExecs < maxExecs && curMemory < maxMem { + requests = r.requests + } + + select { + case requests <- struct{}{}: + curExecs++ + case amount := <-r.finished: + curExecs-- + curMemory += int64(amount) + if curExecs < 0 { + panic("curExecs < 0") + } + case amount := <-r.released: + curMemory -= int64(amount) + case <-r.stop: + return + } + } +} diff --git a/cmd/soong_jar/soong_jar.go b/cmd/soong_jar/soong_jar.go index d3ff5faa..bca0dba9 100644 --- a/cmd/soong_jar/soong_jar.go +++ b/cmd/soong_jar/soong_jar.go @@ -15,17 +15,44 @@ package main import ( - "archive/zip" + "bytes" + "compress/flate" "flag" "fmt" + "hash/crc32" "io" "io/ioutil" "os" "path/filepath" + "runtime" + "runtime/pprof" + "runtime/trace" "strings" + "sync" "time" + + "android/soong/third_party/zip" ) +// Block size used during parallel compression of a single file. +const parallelBlockSize = 1 * 1024 * 1024 // 1MB + +// Minimum file size to use parallel compression. It requires more +// flate.Writer allocations, since we can't change the dictionary +// during Reset +const minParallelFileSize = parallelBlockSize * 6 + +// Size of the ZIP compression window (32KB) +const windowSize = 32 * 1024 + +type nopCloser struct { + io.Writer +} + +func (nopCloser) Close() error { + return nil +} + type fileArg struct { relativeRoot, file string } @@ -54,8 +81,13 @@ var ( manifest = flag.String("m", "", "input manifest file name") directories = flag.Bool("d", false, "include directories in jar") relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument") + parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use") + compLevel = flag.Int("L", 5, "deflate compression level (0-9)") listFiles fileArgs files fileArgs + + cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") + traceFile = flag.String("trace", "", "write trace to file") ) func init() { @@ -74,12 +106,51 @@ type zipWriter struct { createdDirs map[string]bool directories bool - w *zip.Writer + errors chan error + writeOps chan chan *zipEntry + + rateLimit *RateLimit + + compressorPool sync.Pool + compLevel int +} + +type zipEntry struct { + fh *zip.FileHeader + + // List of delayed io.Reader + futureReaders chan chan io.Reader } func main() { flag.Parse() + if *cpuProfile != "" { + f, err := os.Create(*cpuProfile) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } + defer f.Close() + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + + if *traceFile != "" { + f, err := os.Create(*traceFile) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } + defer f.Close() + err = trace.Start(f) + if err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + os.Exit(1) + } + defer trace.Stop() + } + if *out == "" { fmt.Fprintf(os.Stderr, "error: -o is required\n") usage() @@ -89,9 +160,9 @@ func main() { time: time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC), createdDirs: make(map[string]bool), directories: *directories, + compLevel: *compLevel, } - // TODO: Go's zip implementation doesn't support increasing the compression level yet err := w.write(*out, listFiles, *manifest) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) @@ -112,31 +183,138 @@ func (z *zipWriter) write(out string, listFiles fileArgs, manifest string) error } }() - z.w = zip.NewWriter(f) - defer z.w.Close() + z.errors = make(chan error) + defer close(z.errors) + + // This channel size can be essentially unlimited -- it's used as a fifo + // queue decouple the CPU and IO loads. Directories don't require any + // compression time, but still cost some IO. Similar with small files that + // can be very fast to compress. Some files that are more difficult to + // compress won't take a corresponding longer time writing out. + // + // The optimum size here depends on your CPU and IO characteristics, and + // the the layout of your zip file. 1000 was chosen mostly at random as + // something that worked reasonably well for a test file. + // + // The RateLimit object will put the upper bounds on the number of + // parallel compressions and outstanding buffers. + z.writeOps = make(chan chan *zipEntry, 1000) + z.rateLimit = NewRateLimit(*parallelJobs, 0) + defer z.rateLimit.Stop() + + go func() { + var err error + defer close(z.writeOps) + + for _, listFile := range listFiles { + err = z.writeListFile(listFile) + if err != nil { + z.errors <- err + return + } + } - for _, listFile := range listFiles { - err = z.writeListFile(listFile) - if err != nil { - return err + for _, file := range files { + err = z.writeRelFile(file.relativeRoot, file.file) + if err != nil { + z.errors <- err + return + } } - } - for _, file := range files { - err = z.writeRelFile(file.relativeRoot, file.file) - if err != nil { - return err + if manifest != "" { + err = z.writeFile("META-INF/MANIFEST.MF", manifest) + if err != nil { + z.errors <- err + return + } } - } + }() - if manifest != "" { - err = z.writeFile("META-INF/MANIFEST.MF", manifest) - if err != nil { + zipw := zip.NewWriter(f) + + var currentWriteOpChan chan *zipEntry + var currentWriter io.WriteCloser + var currentReaders chan chan io.Reader + var currentReader chan io.Reader + var done bool + + for !done { + var writeOpsChan chan chan *zipEntry + var writeOpChan chan *zipEntry + var readersChan chan chan io.Reader + + if currentReader != nil { + // Only read and process errors + } else if currentReaders != nil { + readersChan = currentReaders + } else if currentWriteOpChan != nil { + writeOpChan = currentWriteOpChan + } else { + writeOpsChan = z.writeOps + } + + select { + case writeOp, ok := <-writeOpsChan: + if !ok { + done = true + } + + currentWriteOpChan = writeOp + + case op := <-writeOpChan: + currentWriteOpChan = nil + + if op.fh.Method == zip.Deflate { + currentWriter, err = zipw.CreateCompressedHeader(op.fh) + } else { + var zw io.Writer + zw, err = zipw.CreateHeader(op.fh) + currentWriter = nopCloser{zw} + } + if err != nil { + return err + } + + currentReaders = op.futureReaders + if op.futureReaders == nil { + currentWriter.Close() + currentWriter = nil + } + + case futureReader, ok := <-readersChan: + if !ok { + // Done with reading + currentWriter.Close() + currentWriter = nil + currentReaders = nil + } + + currentReader = futureReader + + case reader := <-currentReader: + var count int64 + count, err = io.Copy(currentWriter, reader) + if err != nil { + return err + } + z.rateLimit.Release(int(count)) + + currentReader = nil + + case err = <-z.errors: return err } } - return nil + // One last chance to catch an error + select { + case err = <-z.errors: + return err + default: + zipw.Close() + return nil + } } func (z *zipWriter) writeListFile(listFile fileArg) error { @@ -178,6 +356,8 @@ func (z *zipWriter) writeRelFile(root, file string) error { } func (z *zipWriter) writeFile(rel, file string) error { + var fileSize int64 + if s, err := os.Lstat(file); err != nil { return err } else if s.IsDir() { @@ -189,6 +369,8 @@ func (z *zipWriter) writeFile(rel, file string) error { return z.writeSymlink(rel, file) } else if !s.Mode().IsRegular() { return fmt.Errorf("%s is not a file, directory, or symlink", file) + } else { + fileSize = s.Size() } if z.directories { @@ -199,29 +381,201 @@ func (z *zipWriter) writeFile(rel, file string) error { } } + compressChan := make(chan *zipEntry, 1) + z.writeOps <- compressChan + + // Pre-fill a zipEntry, it will be sent in the compressChan once + // we're sure about the Method and CRC. + ze := &zipEntry{ + fh: &zip.FileHeader{ + Name: rel, + Method: zip.Deflate, + + UncompressedSize64: uint64(fileSize), + }, + } + ze.fh.SetModTime(z.time) + + r, err := os.Open(file) + if err != nil { + return err + } + + exec := z.rateLimit.RequestExecution() + + if fileSize >= minParallelFileSize { + wg := new(sync.WaitGroup) + + // Allocate enough buffer to hold all readers. We'll limit + // this based on actual buffer sizes in RateLimit. + ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1) + + // Calculate the CRC in the background, since reading the entire + // file could take a while. + // + // We could split this up into chuncks as well, but it's faster + // than the compression. Due to the Go Zip API, we also need to + // know the result before we can begin writing the compressed + // data out to the zipfile. + wg.Add(1) + go z.crcFile(r, ze, exec, compressChan, wg) + + for start := int64(0); start < fileSize; start += parallelBlockSize { + sr := io.NewSectionReader(r, start, parallelBlockSize) + resultChan := make(chan io.Reader, 1) + ze.futureReaders <- resultChan + + exec := z.rateLimit.RequestExecution() + + last := !(start+parallelBlockSize < fileSize) + var dict []byte + if start >= windowSize { + dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize)) + } + + wg.Add(1) + go z.compressPartialFile(sr, dict, last, exec, resultChan, wg) + } + + close(ze.futureReaders) + + // Close the file handle after all readers are done + go func(wg *sync.WaitGroup, f *os.File) { + wg.Wait() + f.Close() + }(wg, r) + } else { + go z.compressWholeFile(rel, r, exec, compressChan) + } + + return nil +} + +func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) { + defer wg.Done() + defer exec.Finish(0) + + crc := crc32.NewIEEE() + _, err := io.Copy(crc, r) + if err != nil { + z.errors <- err + return + } + + ze.fh.CRC32 = crc.Sum32() + resultChan <- ze + close(resultChan) +} + +func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) { + defer wg.Done() + + result, err := z.compressBlock(r, dict, last) + if err != nil { + z.errors <- err + return + } + + exec.Finish(result.Len()) + resultChan <- result +} + +func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) { + buf := new(bytes.Buffer) + var fw *flate.Writer + var err error + if len(dict) > 0 { + // There's no way to Reset a Writer with a new dictionary, so + // don't use the Pool + fw, err = flate.NewWriterDict(buf, z.compLevel, dict) + } else { + var ok bool + if fw, ok = z.compressorPool.Get().(*flate.Writer); ok { + fw.Reset(buf) + } else { + fw, err = flate.NewWriter(buf, z.compLevel) + } + defer z.compressorPool.Put(fw) + } + if err != nil { + return nil, err + } + + _, err = io.Copy(fw, r) + if err != nil { + return nil, err + } + if last { + fw.Close() + } else { + fw.Flush() + } + + return buf, nil +} + +func (z *zipWriter) compressWholeFile(rel string, r *os.File, exec Execution, compressChan chan *zipEntry) { + var bufSize int + + defer r.Close() + fileHeader := &zip.FileHeader{ Name: rel, Method: zip.Deflate, } fileHeader.SetModTime(z.time) - out, err := z.w.CreateHeader(fileHeader) + crc := crc32.NewIEEE() + count, err := io.Copy(crc, r) if err != nil { - return err + z.errors <- err + return } - in, err := os.Open(file) + fileHeader.CRC32 = crc.Sum32() + fileHeader.UncompressedSize64 = uint64(count) + + _, err = r.Seek(0, 0) if err != nil { - return err + z.errors <- err + return } - defer in.Close() - _, err = io.Copy(out, in) - if err != nil { - return err + compressed, err := z.compressBlock(r, nil, true) + + ze := &zipEntry{ + fh: fileHeader, + futureReaders: make(chan chan io.Reader, 1), } + futureReader := make(chan io.Reader, 1) + ze.futureReaders <- futureReader + close(ze.futureReaders) + + if uint64(compressed.Len()) < ze.fh.UncompressedSize64 { + futureReader <- compressed + bufSize = compressed.Len() + } else { + _, err = r.Seek(0, 0) + if err != nil { + z.errors <- err + return + } - return nil + buf, err := ioutil.ReadAll(r) + if err != nil { + z.errors <- err + return + } + + ze.fh.Method = zip.Store + futureReader <- bytes.NewReader(buf) + bufSize = int(ze.fh.UncompressedSize64) + } + exec.Finish(bufSize) + close(futureReader) + + compressChan <- ze + close(compressChan) } func (z *zipWriter) writeDirectory(dir string) error { @@ -238,10 +592,12 @@ func (z *zipWriter) writeDirectory(dir string) error { dirHeader.SetMode(0700 | os.ModeDir) dirHeader.SetModTime(z.time) - _, err := z.w.CreateHeader(dirHeader) - if err != nil { - return err + ze := make(chan *zipEntry, 1) + ze <- &zipEntry{ + fh: dirHeader, } + close(ze) + z.writeOps <- ze dir, _ = filepath.Split(dir) } @@ -263,16 +619,30 @@ func (z *zipWriter) writeSymlink(rel, file string) error { fileHeader.SetModTime(z.time) fileHeader.SetMode(0700 | os.ModeSymlink) - out, err := z.w.CreateHeader(fileHeader) + dest, err := os.Readlink(file) if err != nil { return err } - dest, err := os.Readlink(file) - if err != nil { - return err + ze := make(chan *zipEntry, 1) + futureReaders := make(chan chan io.Reader, 1) + futureReader := make(chan io.Reader, 1) + futureReaders <- futureReader + close(futureReaders) + futureReader <- bytes.NewBufferString(dest) + close(futureReader) + + // We didn't ask permission to execute, since this should be very short + // but we still need to increment the outstanding buffer sizes, since + // the read will decrement the buffer size. + z.rateLimit.Release(-len(dest)) + + ze <- &zipEntry{ + fh: fileHeader, + futureReaders: futureReaders, } + close(ze) + z.writeOps <- ze - _, err = io.WriteString(out, dest) - return err + return nil } |