/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.storage;
import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.net.util.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.storage.fragment.FileFragment;
import org.apache.tajo.storage.fragment.Fragment;
import org.apache.tajo.storage.fragment.FragmentConvertor;
import org.apache.tajo.util.Bytes;
import org.apache.tajo.util.FileUtil;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto;
public abstract class AbstractStorageManager {
private final Log LOG = LogFactory.getLog(AbstractStorageManager.class);
protected final TajoConf conf;
protected final FileSystem fs;
protected final Path tableBaseDir;
protected final boolean blocksMetadataEnabled;
/**
* Cache of scanner handlers for each storage type.
*/
protected static final Map<String, Class<? extends Scanner>> SCANNER_HANDLER_CACHE
= new ConcurrentHashMap<String, Class<? extends Scanner>>();
/**
* Cache of appender handlers for each storage type.
*/
protected static final Map<String, Class<? extends FileAppender>> APPENDER_HANDLER_CACHE
= new ConcurrentHashMap<String, Class<? extends FileAppender>>();
/**
* Cache of constructors for each class. Pins the classes so they
* can't be garbage collected until ReflectionUtils can be collected.
*/
private static final Map<Class<?>, Constructor<?>> CONSTRUCTOR_CACHE =
new ConcurrentHashMap<Class<?>, Constructor<?>>();
public abstract Class<? extends Scanner> getScannerClass(CatalogProtos.StoreType storeType) throws IOException;
public abstract Scanner getScanner(TableMeta meta, Schema schema, Fragment fragment, Schema target) throws IOException;
protected AbstractStorageManager(TajoConf conf) throws IOException {
this.conf = conf;
this.tableBaseDir = TajoConf.getWarehouseDir(conf);
this.fs = tableBaseDir.getFileSystem(conf);
this.blocksMetadataEnabled = conf.getBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
if (!this.blocksMetadataEnabled)
LOG.warn("does not support block metadata. ('dfs.datanode.hdfs-blocks-metadata.enabled')");
}
public Scanner getFileScanner(TableMeta meta, Schema schema, Path path)
throws IOException {
FileSystem fs = path.getFileSystem(conf);
FileStatus status = fs.getFileStatus(path);
FileFragment fragment = new FileFragment(path.getName(), path, 0, status.getLen());
return getScanner(meta, schema, fragment);
}
public Scanner getScanner(TableMeta meta, Schema schema, FragmentProto fragment) throws IOException {
return getScanner(meta, schema, FragmentConvertor.convert(conf, meta.getStoreType(), fragment), schema);
}
public Scanner getScanner(TableMeta meta, Schema schema, FragmentProto fragment, Schema target) throws IOException {
return getScanner(meta, schema, FragmentConvertor.convert(conf, meta.getStoreType(), fragment), target);
}
public Scanner getScanner(TableMeta meta, Schema schema, Fragment fragment) throws IOException {
return getScanner(meta, schema, fragment, schema);
}
public FileSystem getFileSystem() {
return this.fs;
}
public Path getWarehouseDir() {
return this.tableBaseDir;
}
public void delete(Path tablePath) throws IOException {
FileSystem fs = tablePath.getFileSystem(conf);
fs.delete(tablePath, true);
}
public boolean exists(Path path) throws IOException {
FileSystem fileSystem = path.getFileSystem(conf);
return fileSystem.exists(path);
}
/**
* This method deletes only data contained in the given path.
*
* @param path The path in which data are deleted.
* @throws IOException
*/
public void deleteData(Path path) throws IOException {
FileSystem fileSystem = path.getFileSystem(conf);
FileStatus[] fileLists = fileSystem.listStatus(path);
for (FileStatus status : fileLists) {
fileSystem.delete(status.getPath(), true);
}
}
public Path getTablePath(String tableName) {
return new Path(tableBaseDir, tableName);
}
public Appender getAppender(TableMeta meta, Schema schema, Path path)
throws IOException {
Appender appender;
Class<? extends FileAppender> appenderClass;
String handlerName = meta.getStoreType().name().toLowerCase();
appenderClass = APPENDER_HANDLER_CACHE.get(handlerName);
if (appenderClass == null) {
appenderClass = conf.getClass(
String.format("tajo.storage.appender-handler.%s.class",
meta.getStoreType().name().toLowerCase()), null,
FileAppender.class);
APPENDER_HANDLER_CACHE.put(handlerName, appenderClass);
}
if (appenderClass == null) {
throw new IOException("Unknown Storage Type: " + meta.getStoreType());
}
appender = newAppenderInstance(appenderClass, conf, meta, schema, path);
return appender;
}
public TableMeta getTableMeta(Path tablePath) throws IOException {
TableMeta meta;
FileSystem fs = tablePath.getFileSystem(conf);
Path tableMetaPath = new Path(tablePath, ".meta");
if (!fs.exists(tableMetaPath)) {
throw new FileNotFoundException(".meta file not found in " + tablePath.toString());
}
FSDataInputStream tableMetaIn = fs.open(tableMetaPath);
CatalogProtos.TableProto tableProto = (CatalogProtos.TableProto) FileUtil.loadProto(tableMetaIn,
CatalogProtos.TableProto.getDefaultInstance());
meta = new TableMeta(tableProto);
return meta;
}
public FileFragment[] split(String tableName) throws IOException {
Path tablePath = new Path(tableBaseDir, tableName);
return split(tableName, tablePath, fs.getDefaultBlockSize());
}
public FileFragment[] split(String tableName, long fragmentSize) throws IOException {
Path tablePath = new Path(tableBaseDir, tableName);
return split(tableName, tablePath, fragmentSize);
}
public FileFragment[] splitBroadcastTable(Path tablePath) throws IOException {
FileSystem fs = tablePath.getFileSystem(conf);
List<FileFragment> listTablets = new ArrayList<FileFragment>();
FileFragment tablet;
FileStatus[] fileLists = fs.listStatus(tablePath);
for (FileStatus file : fileLists) {
tablet = new FileFragment(tablePath.getName(), file.getPath(), 0, file.getLen());
listTablets.add(tablet);
}
FileFragment[] tablets = new FileFragment[listTablets.size()];
listTablets.toArray(tablets);
return tablets;
}
public FileFragment[] split(Path tablePath) throws IOException {
FileSystem fs = tablePath.getFileSystem(conf);
return split(tablePath.getName(), tablePath, fs.getDefaultBlockSize());
}
public FileFragment[] split(String tableName, Path tablePath) throws IOException {
return split(tableName, tablePath, fs.getDefaultBlockSize());
}
private FileFragment[] split(String tableName, Path tablePath, long size)
throws IOException {
FileSystem fs = tablePath.getFileSystem(conf);
long defaultBlockSize = size;
List<FileFragment> listTablets = new ArrayList<FileFragment>();
FileFragment tablet;
FileStatus[] fileLists = fs.listStatus(tablePath);
for (FileStatus file : fileLists) {
long remainFileSize = file.getLen();
long start = 0;
if (remainFileSize > defaultBlockSize) {
while (remainFileSize > defaultBlockSize) {
tablet = new FileFragment(tableName, file.getPath(), start, defaultBlockSize);
listTablets.add(tablet);
start += defaultBlockSize;
remainFileSize -= defaultBlockSize;
}
listTablets.add(new FileFragment(tableName, file.getPath(), start, remainFileSize));
} else {
listTablets.add(new FileFragment(tableName, file.getPath(), 0, remainFileSize));
}
}
FileFragment[] tablets = new FileFragment[listTablets.size()];
listTablets.toArray(tablets);
return tablets;
}
public static FileFragment[] splitNG(Configuration conf, String tableName, TableMeta meta,
Path tablePath, long size)
throws IOException {
FileSystem fs = tablePath.getFileSystem(conf);
long defaultBlockSize = size;
List<FileFragment> listTablets = new ArrayList<FileFragment>();
FileFragment tablet;
FileStatus[] fileLists = fs.listStatus(tablePath);
for (FileStatus file : fileLists) {
long remainFileSize = file.getLen();
long start = 0;
if (remainFileSize > defaultBlockSize) {
while (remainFileSize > defaultBlockSize) {
tablet = new FileFragment(tableName, file.getPath(), start, defaultBlockSize);
listTablets.add(tablet);
start += defaultBlockSize;
remainFileSize -= defaultBlockSize;
}
listTablets.add(new FileFragment(tableName, file.getPath(), start, remainFileSize));
} else {
listTablets.add(new FileFragment(tableName, file.getPath(), 0, remainFileSize));
}
}
FileFragment[] tablets = new FileFragment[listTablets.size()];
listTablets.toArray(tablets);
return tablets;
}
public long calculateSize(Path tablePath) throws IOException {
FileSystem fs = tablePath.getFileSystem(conf);
long totalSize = 0;
if (fs.exists(tablePath)) {
totalSize = fs.getContentSummary(tablePath).getLength();
}
return totalSize;
}
/////////////////////////////////////////////////////////////////////////////
// FileInputFormat Area
/////////////////////////////////////////////////////////////////////////////
private static final PathFilter hiddenFileFilter = new PathFilter() {
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
}
};
/**
* Proxy PathFilter that accepts a path only if all filters given in the
* constructor do. Used by the listPaths() to apply the built-in
* hiddenFileFilter together with a user provided one (if any).
*/
private static class MultiPathFilter implements PathFilter {
private List<PathFilter> filters;
public MultiPathFilter(List<PathFilter> filters) {
this.filters = filters;
}
public boolean accept(Path path) {
for (PathFilter filter : filters) {
if (!filter.accept(path)) {
return false;
}
}
return true;
}
}
/**
* List input directories.
* Subclasses may override to, e.g., select only files matching a regular
* expression.
*
* @return array of FileStatus objects
* @throws IOException if zero items.
*/
protected List<FileStatus> listStatus(Path path) throws IOException {
List<FileStatus> result = new ArrayList<FileStatus>();
Path[] dirs = new Path[]{path};
if (dirs.length == 0) {
throw new IOException("No input paths specified in job");
}
List<IOException> errors = new ArrayList<IOException>();
// creates a MultiPathFilter with the hiddenFileFilter and the
// user provided one (if any).
List<PathFilter> filters = new ArrayList<PathFilter>();
filters.add(hiddenFileFilter);
PathFilter inputFilter = new MultiPathFilter(filters);
for (int i = 0; i < dirs.length; ++i) {
Path p = dirs[i];
FileSystem fs = p.getFileSystem(conf);
FileStatus[] matches = fs.globStatus(p, inputFilter);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + p));
} else if (matches.length == 0) {
errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
} else {
for (FileStatus globStat : matches) {
if (globStat.isDirectory()) {
for (FileStatus stat : fs.listStatus(globStat.getPath(),
inputFilter)) {
result.add(stat);
}
} else {
result.add(globStat);
}
}
}
}
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
LOG.info("Total input paths to process : " + result.size());
return result;
}
/**
* Is the given filename splitable? Usually, true, but if the file is
* stream compressed, it will not be.
* <p/>
* <code>FileInputFormat</code> implementations can override this and return
* <code>false</code> to ensure that individual input files are never split-up
* so that Mappers process entire files.
*
*
* @param filename the file name to check
* @return is this file isSplittable?
*/
protected boolean isSplittable(TableMeta meta, Schema schema, Path filename) throws IOException {
Scanner scanner = getFileScanner(meta, schema, filename);
return scanner.isSplittable();
}
protected long computeSplitSize(long blockSize, long minSize,
long maxSize) {
return Math.max(minSize, Math.min(maxSize, blockSize));
}
private static final double SPLIT_SLOP = 1.1; // 10% slop
protected int getBlockIndex(BlockLocation[] blkLocations,
long offset) {
for (int i = 0; i < blkLocations.length; i++) {
// is the offset inside this block?
if ((blkLocations[i].getOffset() <= offset) &&
(offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
return i;
}
}
BlockLocation last = blkLocations[blkLocations.length - 1];
long fileLength = last.getOffset() + last.getLength() - 1;
throw new IllegalArgumentException("Offset " + offset +
" is outside of file (0.." +
fileLength + ")");
}
/**
* A factory that makes the split for this class. It can be overridden
* by sub-classes to make sub-types
*/
protected FileFragment makeSplit(String fragmentId, TableMeta meta, Path file, long start, long length) {
return new FileFragment(fragmentId, file, start, length);
}
protected FileFragment makeSplit(String fragmentId, TableMeta meta, Path file, long start, long length,
String[] hosts) {
return new FileFragment(fragmentId, file, start, length, hosts);
}
protected FileFragment makeSplit(String fragmentId, TableMeta meta, Path file, BlockLocation blockLocation,
int[] diskIds) throws IOException {
return new FileFragment(fragmentId, file, blockLocation, diskIds);
}
// for Non Splittable. eg, compressed gzip TextFile
protected FileFragment makeNonSplit(String fragmentId, TableMeta meta, Path file, long start, long length,
BlockLocation[] blkLocations) throws IOException {
Map<String, Integer> hostsBlockMap = new HashMap<String, Integer>();
for (BlockLocation blockLocation : blkLocations) {
for (String host : blockLocation.getHosts()) {
if (hostsBlockMap.containsKey(host)) {
hostsBlockMap.put(host, hostsBlockMap.get(host) + 1);
} else {
hostsBlockMap.put(host, 1);
}
}
}
List<Map.Entry<String, Integer>> entries = new ArrayList<Map.Entry<String, Integer>>(hostsBlockMap.entrySet());
Collections.sort(entries, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> v1, Map.Entry<String, Integer> v2) {
return v1.getValue().compareTo(v2.getValue());
}
});
String[] hosts = new String[blkLocations[0].getHosts().length];
for (int i = 0; i < hosts.length; i++) {
Map.Entry<String, Integer> entry = entries.get((entries.size() - 1) - i);
hosts[i] = entry.getKey();
}
return new FileFragment(fragmentId, file, start, length, hosts);
}
/**
* Get the minimum split size
*
* @return the minimum number of bytes that can be in a split
*/
public long getMinSplitSize() {
return conf.getLongVar(TajoConf.ConfVars.MINIMUM_SPLIT_SIZE);
}
/**
* Get Disk Ids by Volume Bytes
*/
private int[] getDiskIds(VolumeId[] volumeIds) {
int[] diskIds = new int[volumeIds.length];
for (int i = 0; i < volumeIds.length; i++) {
int diskId = -1;
if (volumeIds[i] != null && volumeIds[i].isValid()) {
String volumeIdString = volumeIds[i].toString();
byte[] volumeIdBytes = Base64.decodeBase64(volumeIdString);
if (volumeIdBytes.length == 4) {
diskId = Bytes.toInt(volumeIdBytes);
} else if (volumeIdBytes.length == 1) {
diskId = (int) volumeIdBytes[0]; // support hadoop-2.0.2
}
}
diskIds[i] = diskId;
}
return diskIds;
}
/**
* Generate the map of host and make them into Volume Ids.
*
*/
private Map<String, Set<Integer>> getVolumeMap(List<FileFragment> frags) {
Map<String, Set<Integer>> volumeMap = new HashMap<String, Set<Integer>>();
for (FileFragment frag : frags) {
String[] hosts = frag.getHosts();
int[] diskIds = frag.getDiskIds();
for (int i = 0; i < hosts.length; i++) {
Set<Integer> volumeList = volumeMap.get(hosts[i]);
if (volumeList == null) {
volumeList = new HashSet<Integer>();
volumeMap.put(hosts[i], volumeList);
}
if (diskIds.length > 0 && diskIds[i] > -1) {
volumeList.add(diskIds[i]);
}
}
}
return volumeMap;
}
/**
* Generate the list of files and make them into FileSplits.
*
* @throws IOException
*/
public List<FileFragment> getSplits(String tableName, TableMeta meta, Schema schema, Path inputPath) throws IOException {
// generate splits'
List<FileFragment> splits = new ArrayList<FileFragment>();
FileSystem fs = inputPath.getFileSystem(conf);
List<FileStatus> files;
if (fs.isFile(inputPath)) {
files = Lists.newArrayList(fs.getFileStatus(inputPath));
} else {
files = listStatus(inputPath);
}
for (FileStatus file : files) {
Path path = file.getPath();
long length = file.getLen();
if (length > 0) {
BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
boolean splittable = isSplittable(meta, schema, path);
if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) {
// supported disk volume
BlockStorageLocation[] blockStorageLocations = ((DistributedFileSystem) fs)
.getFileBlockStorageLocations(Arrays.asList(blkLocations));
if (splittable) {
for (BlockStorageLocation blockStorageLocation : blockStorageLocations) {
splits.add(makeSplit(tableName, meta, path, blockStorageLocation, getDiskIds(blockStorageLocation
.getVolumeIds())));
}
} else { // Non splittable
long blockSize = blockStorageLocations[0].getLength();
if (blockSize >= length) {
for (BlockStorageLocation blockStorageLocation : blockStorageLocations) {
splits.add(makeSplit(tableName, meta, path, blockStorageLocation, getDiskIds(blockStorageLocation
.getVolumeIds())));
}
} else {
splits.add(makeNonSplit(tableName, meta, path, 0, length, blockStorageLocations));
}
}
} else {
if (splittable) {
long minSize = Math.max(getMinSplitSize(), 1);
long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
long splitSize = Math.max(minSize, blockSize);
long bytesRemaining = length;
// for s3
while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(tableName, meta, path, length - bytesRemaining, splitSize,
blkLocations[blkIndex].getHosts()));
bytesRemaining -= splitSize;
}
if (bytesRemaining > 0) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(tableName, meta, path, length - bytesRemaining, bytesRemaining,
blkLocations[blkIndex].getHosts()));
}
} else { // Non splittable
splits.add(makeNonSplit(tableName, meta, path, 0, length, blkLocations));
}
}
} else {
//for zero length files
splits.add(makeSplit(tableName, meta, path, 0, length));
}
}
LOG.info("Total # of splits: " + splits.size());
return splits;
}
private static class InvalidInputException extends IOException {
List<IOException> errors;
public InvalidInputException(List<IOException> errors) {
this.errors = errors;
}
@Override
public String getMessage(){
StringBuffer sb = new StringBuffer();
int messageLimit = Math.min(errors.size(), 10);
for (int i = 0; i < messageLimit ; i ++) {
sb.append(errors.get(i).getMessage()).append("\n");
}
if(messageLimit < errors.size())
sb.append("skipped .....").append("\n");
return sb.toString();
}
}
private static final Class<?>[] DEFAULT_SCANNER_PARAMS = {
Configuration.class,
Schema.class,
TableMeta.class,
FileFragment.class
};
private static final Class<?>[] DEFAULT_APPENDER_PARAMS = {
Configuration.class,
Schema.class,
TableMeta.class,
Path.class
};
/**
* create a scanner instance.
*/
public static <T> T newScannerInstance(Class<T> theClass, Configuration conf, Schema schema, TableMeta meta,
Fragment fragment) {
T result;
try {
Constructor<T> meth = (Constructor<T>) CONSTRUCTOR_CACHE.get(theClass);
if (meth == null) {
meth = theClass.getDeclaredConstructor(DEFAULT_SCANNER_PARAMS);
meth.setAccessible(true);
CONSTRUCTOR_CACHE.put(theClass, meth);
}
result = meth.newInstance(new Object[]{conf, schema, meta, fragment});
} catch (Exception e) {
throw new RuntimeException(e);
}
return result;
}
/**
* create a scanner instance.
*/
public static <T> T newAppenderInstance(Class<T> theClass, Configuration conf, TableMeta meta, Schema schema,
Path path) {
T result;
try {
Constructor<T> meth = (Constructor<T>) CONSTRUCTOR_CACHE.get(theClass);
if (meth == null) {
meth = theClass.getDeclaredConstructor(DEFAULT_APPENDER_PARAMS);
meth.setAccessible(true);
CONSTRUCTOR_CACHE.put(theClass, meth);
}
result = meth.newInstance(new Object[]{conf, schema, meta, path});
} catch (Exception e) {
throw new RuntimeException(e);
}
return result;
}
}