/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.imhotep.archive; import com.google.common.io.ByteStreams; import com.indeed.util.io.Files; import com.indeed.imhotep.archive.FileMetadata; import com.indeed.imhotep.archive.SquallArchiveReader; import com.indeed.imhotep.archive.SquallArchiveWriter; import com.indeed.imhotep.archive.compression.SquallArchiveCompressor; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.junit.Test; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Random; import static com.indeed.imhotep.archive.compression.SquallArchiveCompressor.GZIP; import static com.indeed.imhotep.archive.compression.SquallArchiveCompressor.NONE; import static com.indeed.imhotep.archive.compression.SquallArchiveCompressor.SNAPPY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; /** * @author jsgroth */ public class TestSquallArchive { @Test public void test() throws IOException { final FileSystem fs = new NicerLocalFileSystem(); final Path tempDir = new Path(getTempDir()); fs.mkdirs(tempDir); try { for (SquallArchiveCompressor compressor : Arrays.asList(NONE, GZIP, SNAPPY)) { final String localTempDir = getTempDir(); try { doTheTest(fs, tempDir, new File(localTempDir), compressor); } finally { Files.delete(localTempDir); } } } finally { fs.delete(tempDir, true); } fs.mkdirs(tempDir); try { final String localTempDir = getTempDir(); try { testDirectories(fs, tempDir, new File(localTempDir)); } finally { Files.delete(localTempDir); } } finally { fs.delete(tempDir, true); } } private static String getTempDir() { return com.google.common.io.Files.createTempDir().getAbsolutePath(); } private static void testDirectories(FileSystem fs, Path tempDir, File localTempDir) throws IOException { final File localArchiveDir = new File(localTempDir, "tmp"); if (!localArchiveDir.mkdir()) throw new IOException(); final OutputStream os = new FileOutputStream(new File(localArchiveDir, "tempfile")); for (int i = 1; i <= 10; ++i) { os.write(i); } os.close(); final SquallArchiveWriter writer = new SquallArchiveWriter(fs, tempDir, true); writer.appendDirectory(localArchiveDir); writer.commit(); checkDirectory(fs, tempDir, localTempDir, localArchiveDir); writer.commit(); // no-op checkDirectory(fs, tempDir, localTempDir, localArchiveDir); final SquallArchiveWriter writer2 = new SquallArchiveWriter(fs, tempDir, true); writer2.batchAppendDirectory(localTempDir); checkDirectory(fs, tempDir, localTempDir, localArchiveDir); } private static void checkDirectory(FileSystem fs, Path tempDir, File localTempDir, File localArchiveDir) throws IOException { Files.delete(localArchiveDir.getAbsolutePath()); final SquallArchiveReader reader = new SquallArchiveReader(fs, tempDir); reader.copyToLocal("tmp/tempfile", localTempDir.getAbsolutePath()); assertTrue(ByteStreams.equal( com.google.common.io.Files.newInputStreamSupplier(new File(localArchiveDir, "tempfile")), ByteStreams.newInputStreamSupplier(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) )); Files.delete(localArchiveDir.getAbsolutePath()); reader.copyAllToLocal(localTempDir.getAbsolutePath()); assertTrue(ByteStreams.equal( com.google.common.io.Files.newInputStreamSupplier(new File(localArchiveDir, "tempfile")), ByteStreams.newInputStreamSupplier(new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) )); } private static void doTheTest(FileSystem fs, Path tempDir, File localTempDir, SquallArchiveCompressor compressor) throws IOException { final Random rand = new Random(); final List<File> tempFiles = new ArrayList<File>(); for (int i = 0; i < 10; ++i) { final File tempFile = new File(localTempDir, "tempfile" + i); tempFiles.add(tempFile); final int len = rand.nextInt(1024) + 1024; final OutputStream os = new FileOutputStream(tempFile); for (int j = 0; j < len; ++j) { os.write(rand.nextInt(256)); } os.close(); } Collections.shuffle(tempFiles); long bytesWritten = writeHalfTheFiles(fs, tempDir, tempFiles, compressor); if (compressor == NONE) { assertEquals(bytesWritten, getArchiveBytesWritten(fs, tempDir)); } final File localTempDir2 = new File(getTempDir()); try { readHalfTheFiles(fs, tempDir, localTempDir2, tempFiles); } finally { Files.delete(localTempDir2.getAbsolutePath()); } bytesWritten += writeTheOtherHalf(fs, tempDir, tempFiles, compressor); if (compressor == NONE) { assertEquals(bytesWritten, getArchiveBytesWritten(fs, tempDir)); } if (!localTempDir2.mkdirs()) throw new IOException(); try { readAllTheFiles(fs, tempDir, localTempDir2, tempFiles); } finally { Files.delete(localTempDir2.getAbsolutePath()); } } private static long getArchiveBytesWritten(FileSystem fs, Path tempDir) throws IOException { long sum = 0; for (final FileStatus status : fs.listStatus(tempDir, new PathFilter() { @Override public boolean accept(Path path) { return SquallArchiveWriter.ARCHIVE_FILENAME_PATTERN.matcher(path.getName()).matches(); } })) { sum += status.getLen(); } return sum; } private static void readAllTheFiles(FileSystem fs, Path tempDir, File localTempDir2, List<File> tempFiles) throws IOException { final SquallArchiveReader reader = new SquallArchiveReader(fs, tempDir); final List<FileMetadata> metadata = reader.readMetadata(); assertEquals(metadata.size(), tempFiles.size()); for (int i = 0; i < metadata.size(); ++i) { final FileMetadata file = metadata.get(i); final File tf = tempFiles.get(i); assertEquals(tf.getName(), file.getFilename()); reader.copyToLocal(file, localTempDir2); assertTrue(com.google.common.io.Files.equal(tf, new File(localTempDir2, file.getFilename()))); } for (final File f : localTempDir2.listFiles()) { if (!f.delete()) throw new IOException(); } reader.copyToLocal("tempfile3", localTempDir2.getAbsolutePath()); assertTrue(com.google.common.io.Files.equal(new File(localTempDir2, "tempfile3"), findTempFile3(tempFiles))); } private static File findTempFile3(List<File> tempFiles) { for (final File f : tempFiles) { if (f.getName().endsWith("3")) return f; } throw new AssertionError("wtf"); } private static void readHalfTheFiles(FileSystem fs, Path tempDir, File localTempDir2, List<File> tempFiles) throws IOException { final SquallArchiveReader reader = new SquallArchiveReader(fs, tempDir); final List<FileMetadata> metadata = reader.readMetadata(); assertEquals(metadata.size(), tempFiles.size() / 2); for (int i = 0; i < metadata.size(); ++i) { final FileMetadata file = metadata.get(i); final File tf = tempFiles.get(i); assertEquals(tf.getName(), file.getFilename()); reader.copyToLocal(file, localTempDir2); assertTrue(ByteStreams.equal( com.google.common.io.Files.newInputStreamSupplier(tf), com.google.common.io.Files.newInputStreamSupplier(new File(localTempDir2, file.getFilename())) )); } } private static long writeTheOtherHalf(FileSystem fs, Path tempDir, List<File> tempFiles, SquallArchiveCompressor compressor) throws IOException { final SquallArchiveWriter writer = new SquallArchiveWriter(fs, tempDir, false, compressor); long expectedLen = 0L; for (int i = tempFiles.size() / 2; i < tempFiles.size(); ++i) { final File file = tempFiles.get(i); writer.appendFile(file); expectedLen += file.length(); } writer.commit(); return expectedLen; } private static long writeHalfTheFiles(FileSystem fs, Path tempDir, List<File> tempFiles, SquallArchiveCompressor compressor) throws IOException { final SquallArchiveWriter writer = new SquallArchiveWriter(fs, tempDir, true, compressor); long expectedLen = 0L; for (int i = 0; i < tempFiles.size() / 2; ++i) { final File file = tempFiles.get(i); writer.appendFile(file); expectedLen += file.length(); } writer.commit(); return expectedLen; } }