/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.imhotep.archive;
import com.google.common.base.Charsets;
import com.indeed.util.io.Files;
import com.indeed.imhotep.archive.compression.SquallArchiveCompressor;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.security.DigestInputStream;
import java.util.ArrayList;
import java.util.List;
/**
* @author jsgroth
*/
public class SquallArchiveReader {
private static final Logger log = Logger.getLogger(SquallArchiveReader.class);
private final FileSystem fs;
private final Path path;
/**
* create a SquallArchiveReader
*
* @param fs a file system
* @param path the directory where the archive is located
*/
public SquallArchiveReader(FileSystem fs, Path path) {
this.fs = fs;
this.path = path;
}
/**
* get a list of all files contained in the metadata for this archive
*
* @return a list of file metadata
* @throws IOException if there is an IO problem
*/
public List<FileMetadata> readMetadata() throws IOException {
int retries = 3;
while (true) {
try {
final BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(path, "metadata.txt")), Charsets.UTF_8));
try {
final List<FileMetadata> ret = new ArrayList<FileMetadata>();
for (String line = r.readLine(); line != null; line = r.readLine()) {
final FileMetadata metadata = parseMetadata(line);
ret.add(metadata);
}
return ret;
} finally {
r.close();
}
} catch (FileNotFoundException e) {
if (--retries == 0) throw e;
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
}
}
}
private static FileMetadata parseMetadata(String line) throws IOException {
final String[] split = line.split("\t");
if (split.length < 5) {
throw new IOException("malformed metadata line: " + line);
}
final String filename = split[0];
final long size = Long.parseLong(split[1]);
final long timestamp = Long.parseLong(split[2]);
final String checksum = split[3];
final long startOffset = Long.parseLong(split[4]);
final SquallArchiveCompressor compressor = split.length > 5 ?
SquallArchiveCompressor.fromKey(split[5]) : SquallArchiveCompressor.NONE;
final String archiveFilename = split.length > 6 ? split[6] : "archive.bin";
return new FileMetadata(filename, size, timestamp, checksum, startOffset, compressor, archiveFilename);
}
/**
* copies every file in the archive to a local directory
*
* @param localDir the directory to copy files into
* @throws IOException if there is an IO problem
*/
public void copyAllToLocal(String localDir) throws IOException {
copyAllToLocal(new File(localDir), new AcceptAllFileMetadataFilter());
}
/**
* copies every file in the archive to a local directory
*
* @param localDir the directory to copy files into
* @throws IOException if there is an IO problem
*/
public void copyAllToLocal(File localDir) throws IOException {
copyAllToLocal(localDir, new AcceptAllFileMetadataFilter());
}
/**
* copies every file in the archive that is accepted by the filter to a local directory
* @param localDir the directory to copy files into
* @param filter a function specifying which files should be copied
* @throws IOException if there is an IO problem
*/
public void copyAllToLocal(File localDir, FileMetadataFilter filter) throws IOException {
for (final FileMetadata metadata : readMetadata()) {
if (filter.accept(metadata)) {
copyToLocal(metadata, localDir);
}
}
}
/**
* copy a file from this archive to a local directory
*
* @param filename the file to copy
* @param localDir the directory to copy into
* @throws IOException if the given file is not in the archive or if there is an IO problem
*/
public void copyToLocal(String filename, String localDir) throws IOException {
final List<FileMetadata> metadataList = readMetadata();
for (final FileMetadata metadata : metadataList) {
if (filename.equals(metadata.getFilename())) {
copyToLocal(metadata, new File(localDir));
return;
}
}
throw new FileNotFoundException("this archive does not have a file named " + filename);
}
/**
* copy a file from this archive to a local directory
*
* @param file the metadata for the file to copy
* @param localDir the directory to copy into
* @throws IOException if there is an IO problem
*/
public void copyToLocal(FileMetadata file, File localDir) throws IOException {
int retries = 3;
while (true) {
try {
tryCopyToLocal(file, localDir);
break;
} catch (IOException e) {
log.error(e);
if (--retries == 0) throw e;
try {
Thread.sleep(10000);
} catch (InterruptedException ie) {
log.error(e);
}
}
}
}
public void tryCopyToLocal(FileMetadata file, File localDir) throws IOException {
if (!localDir.exists() && !localDir.mkdirs()) {
throw new IOException("could not create directory " + localDir);
}
final String fullFilename = file.getFilename();
final File targetFile;
if (fullFilename.contains("/")) {
final int lastSlash = fullFilename.lastIndexOf('/');
final String[] parentDirs = fullFilename.substring(0, lastSlash).split("/");
final String fullParentPath = Files.buildPath(parentDirs);
final File parentFile = new File(localDir, fullParentPath);
if ((!parentFile.exists() && !parentFile.mkdirs()) || (parentFile.exists() && !parentFile.isDirectory())) {
throw new IOException("unable to create directory " + parentFile.getAbsolutePath());
}
targetFile = new File(parentFile, fullFilename.substring(lastSlash + 1));
} else {
targetFile = new File(localDir, file.getFilename());
}
final Path archivePath = new Path(path, file.getArchiveFilename());
final SquallArchiveCompressor compressor = file.getCompressor();
final FSDataInputStream is = fs.open(archivePath);
try {
is.seek(file.getStartOffset());
final DigestInputStream digestStream = new DigestInputStream(compressor.newInputStream(is), ArchiveUtils.getMD5Digest());
final OutputStream os = new BufferedOutputStream(new FileOutputStream(targetFile));
ArchiveUtils.streamCopy(digestStream, os, file.getSize());
os.close();
final String checksum = ArchiveUtils.toHex(digestStream.getMessageDigest().digest());
if (!checksum.equals(file.getChecksum())) {
throw new IOException("invalid checksum for file " + fullFilename + " in archive " + path + ": file checksum = " + checksum + ", checksum in metadata = " + file.getChecksum());
}
} finally {
is.close();
}
}
}