// Copyright 2015 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.android.ziputils; import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTCRC; import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTLEN; import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTSIZ; import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENCRC; import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENLEN; import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENSIZ; import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENTIM; import static com.google.devtools.build.android.ziputils.LocalFileHeader.LOCFLG; import static com.google.devtools.build.android.ziputils.LocalFileHeader.LOCTIM; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.Sets; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; /** * Extracts entries from a set of input archives, and copies them to N output archive of * approximately equal size, while attempting to split archives on package (directory) boundaries. * Optionally, accept a list of entries to be added to the first output archive, splitting * remaining entries by package boundaries. */ public class SplitZip implements EntryHandler { private boolean verbose = false; private boolean splitDexFiles = false; private final List<ZipIn> inputs; private final List<ZipOut> outputs; private String filterFile; private InputStream filterInputStream; private String resourceFile; private Date date; private DosTime dosTime; // Internal state variables: private boolean finished = false; private Set<String> filter; private ZipOut[] zipOuts; private ZipOut resourceOut; private final Map<String, ZipOut> assignments = new HashMap<>(); private final Map<String, CentralDirectory> centralDirectories; private final Set<String> classes = new TreeSet<>(); private Predicate<String> inputFilter = Predicates.alwaysTrue(); /** * Creates an un-configured {@code SplitZip} instance. */ public SplitZip() { inputs = new ArrayList<>(); outputs = new ArrayList<>(); centralDirectories = new HashMap<>(); } /** * Configures a resource file. By default, resources are output in the initial shard. * If a resource file is specified, resources are written to this instead. * @param resourceFile in not {@code null}, the name of a file in which to output resources. * @return this object. */ public SplitZip setResourceFile(String resourceFile) { this.resourceFile = resourceFile; return this; } // Package private for testing with mock file SplitZip setResourceFile(ZipOut resOut) { resourceOut = resOut; return this; } /** * Gets the name of the resource output file. If no resource output file is configured, resources * are output in the initial shard. * @return the name of the resource output file, or {@code null} if no file has been configured. */ public String getResourceFile() { return resourceFile; } /** * Configures a file containing a list of files to be included in the first output archive. * * @param clFile path of class file list. * @return this object */ public SplitZip setMainClassListFile(String clFile) { filterFile = clFile; return this; } // Package private for testing with mock file SplitZip setMainClassListFile(InputStream clInputStream) { filterInputStream = clInputStream; return this; } /** * Gets the path of the file listing the content of the initial shard. * @return return path of file list file, or {@code null} if not set. */ public String getMainClassListFile() { return filterFile; } /** * Configures verbose mode. * * @param flag set to {@code true} to turn on verbose mode. * @return this object */ public SplitZip setVerbose(boolean flag) { verbose = flag; return this; } /** * Gets the verbosity mode. * @return {@code true} iff verbose mode is enabled */ public boolean isVerbose() { return verbose; } /** * Configures whether to split .dex files along with .class files. * * @param flag {@code true} will split .dex files; {@code false} treats them as resources */ public SplitZip setSplitDexedClasses(boolean flag) { splitDexFiles = flag; return this; } /** * Sets date to overwrite timestamp of copied entries. Setting the date to {@code null} means * using the date and time information in the input file. Set an explicit date to override. * * @param date modified date and time to set for entries in output. * @return this object. */ public SplitZip setEntryDate(Date date) { this.date = date; this.dosTime = date == null ? null : new DosTime(date); return this; } /** * Sets date to {@link DosTime#DOS_EPOCH}. * @return this object. */ public SplitZip useDefaultEntryDate() { this.date = DosTime.DOS_EPOCH; this.dosTime = DosTime.EPOCH; return this; } /** * Gets the entry modified date. */ public Date getEntryDate() { return date; } /** * Configures multiple input file locations. * * @param inputs list of input locations. * @return this object * @throws java.io.IOException */ public SplitZip addInputs(Iterable<String> inputs) throws IOException { for (String i : inputs) { addInput(i); } return this; } /** * Configures an input location. An input file must be a zip archive. * * @param filename path for an input location. * @return this object * @throws java.io.IOException */ public SplitZip addInput(String filename) throws IOException { if (filename != null) { inputs.add(new ZipIn(new FileInputStream(filename).getChannel(), filename)); } return this; } // Package private, for testing using mock file system. SplitZip addInput(ZipIn in) throws IOException { Preconditions.checkNotNull(in); inputs.add(in); return this; } /** * Configures multiple output file locations. * * @param outputs list of output files. * @return this object * @throws java.io.IOException */ public SplitZip addOutputs(Iterable<String> outputs) throws IOException { for (String o : outputs) { addOutput(o); } return this; } /** * Configures an output location. * * @param output path for an output location. * @return this object * @throws java.io.IOException */ public SplitZip addOutput(String output) throws IOException { Preconditions.checkNotNull(output); outputs.add(new ZipOut(new FileOutputStream(output, false).getChannel(), output)); return this; } // Package private for testing with mock file SplitZip addOutput(ZipOut output) throws IOException { Preconditions.checkNotNull(output); outputs.add(output); return this; } /** * Set a predicate to only include files with matching filenames in any of the outputs. <b>Other * zip entries are dropped</b>, regardless of whether they're classes or resources and regardless * of whether they're listed in {@link #setMainClassListFile}. */ public SplitZip setInputFilter(Predicate<String> inputFilter) { this.inputFilter = Preconditions.checkNotNull(inputFilter); return this; } /** * Executes this {@code SplitZip}, reading content from the configured input locations, creating * the specified number of archives, in the configured output directory. * * @return this object * @throws java.io.IOException */ public SplitZip run() throws IOException { verbose("SplitZip: Splitting in: " + outputs.size()); verbose("SplitZip: with filter: " + filterFile); checkConfig(); // Prepare output files zipOuts = outputs.toArray(new ZipOut[outputs.size()]); if (resourceFile != null) { resourceOut = new ZipOut(new FileOutputStream(resourceFile, false).getChannel(), resourceFile); } else if (resourceOut == null) { // may have been set for testing resourceOut = zipOuts[0]; } // Read directories of input files for (ZipIn zip : inputs) { zip.endOfCentralDirectory(); centralDirectories.put(zip.getFilename(), zip.centralDirectory()); zip.centralDirectory(); } // Assign input entries to output files split(); // Copy entries to the assigned output files for (ZipIn zip : inputs) { zip.scanEntries(this); } return this; } /** * Copies an entry to the assigned output files. Called for each entry in the input files. * @param in * @param header * @param dirEntry * @param data * @throws IOException */ @Override public void handle(ZipIn in, LocalFileHeader header, DirectoryEntry dirEntry, ByteBuffer data) throws IOException { ZipOut out = assignments.remove(normalizedFilename(header.getFilename())); if (out == null) { // Skip unassigned file; includes a file with the same name as a previously processed one. // This in particular picks the first .class or .dex file encountered for a given class name // and drops any file not matched by inputFilter. return; } if (dirEntry == null) { // Shouldn't get here, as there should be no assignment. System.out.println("Warning: no directory entry"); return; } // Clone directory entry DirectoryEntry entryOut = out.nextEntry(dirEntry); if (dosTime != null) { // Overwrite time stamp header.set(LOCTIM, dosTime.time); entryOut.set(CENTIM, dosTime.time); } out.write(header); out.write(data); if ((header.get(LOCFLG) & LocalFileHeader.SIZE_MASKED_FLAG) != 0) { // Instead of this, we could fix the header with the size information // from the directory entry. For now, keep the entry encoded as-is. DataDescriptor desc = DataDescriptor.allocate() .set(EXTCRC, dirEntry.get(CENCRC)) .set(EXTSIZ, dirEntry.get(CENSIZ)) .set(EXTLEN, dirEntry.get(CENLEN)); out.write(desc); } } /** * Writes any remaining output data to the output stream. * * @throws IOException if the output stream or the filter throws an IOException * @throws IllegalStateException if this method was already called earlier */ public void finish() throws IOException { checkNotFinished(); finished = true; if (resourceOut != null) { resourceOut.finish(); } for (ZipOut zo : zipOuts) { zo.finish(); } } /** * Writes any remaining output data to the output stream and closes it. * * @throws IOException if the output stream or the filter throws an IOException */ public void close() throws IOException { if (!finished) { finish(); } if (resourceOut != null) { resourceOut.close(); } for (ZipOut zo : zipOuts) { zo.close(); } } private void checkNotFinished() { if (finished) { throw new IllegalStateException(); } } /** * Validates configuration before execution. */ private void checkConfig() throws IOException { if (outputs.size() < 1) { throw new IllegalStateException("Require at least one output file"); } filter = filterFile == null && filterInputStream == null ? null : readPaths(filterFile); } /** * Parses the entries and assign each entry to an output file. */ private void split() { for (ZipIn in : inputs) { CentralDirectory cdir = centralDirectories.get(in.getFilename()); for (DirectoryEntry entry : cdir.list()) { String filename = normalizedFilename(entry.getFilename()); if (!inputFilter.apply(filename)) { continue; } if (filename.endsWith(".class")) { // Only pass classes to the splitter, so that it can do the best job // possible distributing them across output files. classes.add(filename); } else if (!filename.endsWith("/")) { // Non class files (resources) are either assigned to the first // output file, or to a specified resource output file. assignments.put(filename, resourceOut); } } } Splitter splitter = new Splitter(outputs.size(), classes.size()); if (filter != null) { // Assign files in the filter to the first output file. splitter.assign(Sets.filter(filter, inputFilter)); splitter.nextShard(); // minimal initial shard } for (String path : classes) { // Use normalized filename so the filter file doesn't have to change int assignment = splitter.assign(path); Preconditions.checkState(assignment >= 0 && assignment < zipOuts.length); assignments.put(path, zipOuts[assignment]); } } private String normalizedFilename(String filename) { if (splitDexFiles && filename.endsWith(".class.dex")) { // suffix generated by DexBuilder return filename.substring(0, filename.length() - ".dex".length()); } return filename; } /** * Reads paths of classes required in first shard. For testing purposes, this relies * on the file system configured for the {@code Zip} library class. */ private Set<String> readPaths(String fileName) throws IOException { Set<String> paths = new HashSet<>(); BufferedReader reader = null; try { if (filterInputStream == null) { filterInputStream = new FileInputStream(fileName); } reader = new BufferedReader(new InputStreamReader(filterInputStream, UTF_8)); String line; while (null != (line = reader.readLine())) { paths.add(fixPath(line)); } return paths; } finally { if (reader != null) { reader.close(); } } } // TODO(bazel-team): Got this from 'dx'. I'm not sure we need this part. Keep it for now, // to make sure we read the main dex list the exact same way that dx would. private String fixPath(String path) { if (File.separatorChar == '\\') { path = path.replace('\\', '/'); } int index = path.lastIndexOf("/./"); if (index != -1) { return path.substring(index + 3); } if (path.startsWith("./")) { return path.substring(2); } return path; } private void verbose(String msg) { if (verbose) { System.out.println(msg); } } }