/* * (C) Copyright 2006-2015 Nuxeo SA (http://nuxeo.com/) and others. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Contributors: * Bogdan Stefanescu * Thierry Delprat * Florent Guillaume */ package org.nuxeo.ecm.core.io.impl.plugins; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Enumeration; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.CloseShieldInputStream; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.nuxeo.common.utils.Path; import org.nuxeo.ecm.core.api.Blob; import org.nuxeo.ecm.core.api.Blobs; import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob; import org.nuxeo.ecm.core.io.ExportConstants; import org.nuxeo.ecm.core.io.ExportedDocument; import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader; import org.nuxeo.ecm.core.io.impl.DWord; import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl; /** * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}. * <p> * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was * modified you need to use the NuxeoArchiveReader(File) constructor. * <p> * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used * in a random-access manner. */ public class NuxeoArchiveReader extends AbstractDocumentReader { private ZipInputStream in; private boolean inMustBeClosed; private ZipFile zipFile; private List<String> zipIndex; private final Collection<File> filesToDelete = new ArrayList<File>(); /** * Create a {@link NuxeoArchiveReader} from an {@link InputStream}. * <p> * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}. * * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter * @throws IOException */ public NuxeoArchiveReader(InputStream in) throws IOException { this(new ZipInputStream(in), true); } /** * Protected constructor used by {@link ZipReader}. Must not close the stream when done. */ protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException { this.in = in; inMustBeClosed = !checkMarker; if (checkMarker) { checkMarker(); } } /** * Create a {@link NuxeoArchiveReader} from a {@link File}. * <p> * This constructor is different from others because it allows the input zip file to have been generated by an other * engine that {@link NuxeoArchiveWriter}. * <p> * In particular, you can use this constructor on a Zip Archive that was manually modified. * * @param file a Zip archive */ public NuxeoArchiveReader(File file) throws IOException { this.zipFile = new ZipFile(file); buildOrderedZipIndex(); checkMarker(); } protected void buildOrderedZipIndex() { zipIndex = new ArrayList<String>(); Enumeration<? extends ZipEntry> entries = zipFile.entries(); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); zipIndex.add(entry.getName()); } Collections.sort(zipIndex, new Comparator<String>() { @Override public int compare(String spath1, String spath2) { return spath1.compareTo(spath2); } }); } @Override public ExportedDocument read() throws IOException { if (zipFile != null) { return readZip(); } else { return readOrderedStream(); } } protected ExportedDocument readZip() throws IOException { if (zipIndex.size() == 0) { return null; } String idxname = zipIndex.remove(0); ZipEntry entry = zipFile.getEntry(idxname); if (entry == null) { return null; } if (!entry.isDirectory()) { if (entry.getName().equals(ExportConstants.MARKER_FILE)) { return read(); } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { // the repository ROOT! TODO: how to handle root? it doesn't // have a dir .. ExportedDocument xdoc = new ExportedDocumentImpl(); xdoc.setPath(new Path("/")); xdoc.setDocument(loadXML(entry)); return xdoc; } else { throw new IOException("Invalid Nuxeo archive on entry " + entry.getName()); } } // find the direct children entry that are part of the same document // since archive is modifiable we can not rely on the Extra bits thing List<String> childEntries = new ArrayList<String>(); int depth = new Path(idxname).removeTrailingSeparator().segmentCount(); for (String path : zipIndex) { if (path.startsWith(idxname)) { int subdepth = new Path(path).removeTrailingSeparator().segmentCount(); if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) { continue; } childEntries.add(path); } else { break; } } if (childEntries.size() == 0) { return read(); // empty dir -> try next directory } String name = entry.getName(); ExportedDocument xdoc = new ExportedDocumentImpl(); xdoc.setPath(new Path(name).removeTrailingSeparator()); for (String childEntryName : childEntries) { int i = zipIndex.indexOf(childEntryName); idxname = zipIndex.remove(i); entry = zipFile.getEntry(idxname); name = entry.getName(); if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { xdoc.setDocument(loadXML(entry)); } else if (name.endsWith(".xml")) { // external doc file xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); } else { // should be a blob xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); } } return xdoc; } protected ExportedDocument readOrderedStream() throws IOException { ZipEntry entry = in.getNextEntry(); if (entry == null) { return null; } if (!entry.isDirectory()) { if (entry.getName().equals(ExportConstants.MARKER_FILE)) { return read(); } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { // the repository ROOT! TODO: how to handle root? it doesn't // have a dir .. ExportedDocument xdoc = new ExportedDocumentImpl(); xdoc.setPath(new Path("/")); xdoc.setDocument(loadXML(entry)); return xdoc; } else { throw new IOException("Invalid Nuxeo archive"); } } int count = getFilesCount(entry); if (count == 0) { return read(); // empty dir -> try next directory } String name = entry.getName(); ExportedDocument xdoc = new ExportedDocumentImpl(); xdoc.setPath(new Path(name).removeTrailingSeparator()); for (int i = 0; i < count; i++) { entry = in.getNextEntry(); name = entry.getName(); if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { xdoc.setDocument(loadXML(entry)); } else if (name.endsWith(".xml")) { // external doc file xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); } else { // should be a blob xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); } } return xdoc; } @Override public void close() { IOUtils.closeQuietly(zipFile); if (inMustBeClosed) { IOUtils.closeQuietly(in); } for (File file : filesToDelete) { file.delete(); } } private static int getFilesCount(ZipEntry entry) throws IOException { byte[] bytes = entry.getExtra(); if (bytes == null) { return 0; } else if (bytes.length != 4) { throw new IOException("Invalid Nuxeo Archive"); } else { return new DWord(bytes).getInt(); } } private Document loadXML(ZipEntry entry) throws IOException { try { SAXReader saxReader = new SAXReader(); if (zipFile != null) { try (InputStream stream = zipFile.getInputStream(entry)) { return saxReader.read(stream); } } else { // SAXReader.read always closes the stream, but we don't want that // so wrap it in a CloseShieldInputStream try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { return saxReader.read(ncin); } } } catch (DocumentException e) { throw new IOException("Failed to read zip entry: " + entry.getName(), e); } } private Blob createBlob(ZipEntry entry) throws IOException { if (zipFile != null) { return new ZipEntryBlob(zipFile, entry); } else { // should decompress since this is a generic stream // FileBlob always closes the stream, but we don't want that // so wrap it in a CloseShieldInputStream try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { Blob blob = Blobs.createBlob(ncin); filesToDelete.add(blob.getFile()); return blob; } } } private void checkMarker() throws IOException { if (zipFile == null) { ZipEntry entry = in.getNextEntry(); if (entry == null) { throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)"); } if (!isMarkerEntry(entry)) { throw new IOException("Not a valid Nuxeo Archive - no marker file found"); } } else { if (!zipIndex.contains(ExportConstants.MARKER_FILE)) { throw new IOException("Not a valid Nuxeo Archive - no marker file found"); } } } public static boolean isMarkerEntry(ZipEntry entry) { return entry.getName().equals(ExportConstants.MARKER_FILE); } }