/*
* Copyright 2013 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.filesystem;
import com.google.common.collect.Lists;
import org.apache.avro.Schema;
import org.apache.hadoop.fs.Path;
import org.kitesdk.data.impl.Accessor;
import org.kitesdk.data.spi.FieldPartitioner;
import org.kitesdk.data.spi.StorageKey;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
/**
* An implementation of {@link PathConversion} that builds a key from the start of the path to the end
* instead of started at the end and building backwards. The benefit of implementation is that the
* partial keys can be created without traversing the entire tree.
*/
public class PartialPathConversion extends PathConversion{
private final Path rootPath;
/**
* Creates a conversion for any path prefixed with the {@code rootDirectory} and partitioned
* @param rootDirectory the root location from which the path is being evaluated
* @param schema The schema for the payload dataset
*/
public PartialPathConversion(Path rootDirectory, Schema schema){
super(schema);
this.rootPath = rootDirectory;
}
//Supposed to build keys from start to finish vs end to start
public StorageKey toKey(Path fromPath, StorageKey storage) {
final List<FieldPartitioner> partitioners =
Accessor.getDefault().getFieldPartitioners(storage.getPartitionStrategy());
//Strip off the root directory to get partition segments
String truncatedPath = fromPath.toString();
if(truncatedPath.startsWith(rootPath.toString())){
truncatedPath = truncatedPath.substring(rootPath.toString().length());
}
List<String> pathParts = new LinkedList<String>();
//Check that there are segments to parse.
if(!truncatedPath.isEmpty()) {
Path currentPath = new Path(truncatedPath);
while (currentPath != null) {
String name = currentPath.getName();
if(!name.isEmpty()) {
pathParts.add(currentPath.getName());
}
currentPath = currentPath.getParent();
}
//list is now last -> first so reverse the list to be first -> last
Collections.reverse(pathParts);
}
final List<Object> values = Lists.newArrayList(
new Object[pathParts.size()]);
//for each segment we have get the value for the key
for(int i = 0; i < pathParts.size(); i++){
values.set(i, valueForDirname(
(FieldPartitioner<?, ?>) partitioners.get(i),
pathParts.get(i)));
}
storage.replaceValues(values);
return storage;
}
}