/*
* Copyright 2015 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.s3;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetOperationException;
import org.kitesdk.data.spi.DatasetRepository;
import org.kitesdk.data.spi.DefaultConfiguration;
import org.kitesdk.data.spi.Loadable;
import org.kitesdk.data.spi.OptionBuilder;
import org.kitesdk.data.spi.Registration;
import org.kitesdk.data.spi.URIPattern;
import org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository;
/**
* A Loader implementation to register URIs for S3.
*/
public class Loader implements Loadable {
private static final int UNSPECIFIED_PORT = -1;
/**
* This class builds configured instances of
* {@code FileSystemDatasetRepository} from a Map of options. This is for the
* URI system.
*/
private static class URIBuilder implements OptionBuilder<DatasetRepository> {
@Override
public DatasetRepository getFromOptions(Map<String, String> match) {
String path = match.get("path");
final Path root = (path == null || path.isEmpty()) ?
new Path("/") : new Path("/", path);
Configuration conf = DefaultConfiguration.get();
FileSystem fs;
try {
fs = FileSystem.get(fileSystemURI(match), conf);
} catch (IOException e) {
// "Incomplete HDFS URI, no host" => add a helpful suggestion
if (e.getMessage().startsWith("Incomplete")) {
throw new DatasetIOException("Could not get a FileSystem: " +
"make sure the credentials for " + match.get(URIPattern.SCHEME) +
" URIs are configured.", e);
}
throw new DatasetIOException("Could not get a FileSystem", e);
}
return new FileSystemDatasetRepository.Builder()
.configuration(new Configuration(conf)) // make a modifiable copy
.rootDirectory(fs.makeQualified(root))
.build();
}
}
@Override
public void load() {
try {
// load hdfs-site.xml by loading HdfsConfiguration
FileSystem.getLocal(DefaultConfiguration.get());
} catch (IOException e) {
throw new DatasetIOException("Cannot load default config", e);
}
OptionBuilder<DatasetRepository> builder = new URIBuilder();
// username and secret are the same; host is the bucket
Registration.register(
new URIPattern("s3n:/*path"),
new URIPattern("s3n:/*path/:namespace/:dataset"),
builder);
Registration.register(
new URIPattern("s3a:/*path"),
new URIPattern("s3a:/*path/:namespace/:dataset"),
builder);
}
private static URI fileSystemURI(Map<String, String> match) {
final String userInfo;
if (match.containsKey(URIPattern.USERNAME)) {
if (match.containsKey(URIPattern.PASSWORD)) {
userInfo = match.get(URIPattern.USERNAME) + ":" +
match.get(URIPattern.PASSWORD);
} else {
userInfo = match.get(URIPattern.USERNAME);
}
} else {
userInfo = null;
}
try {
int port = UNSPECIFIED_PORT;
if (match.containsKey(URIPattern.PORT)) {
try {
port = Integer.parseInt(match.get(URIPattern.PORT));
} catch (NumberFormatException e) {
port = UNSPECIFIED_PORT;
}
}
return new URI(match.get(URIPattern.SCHEME), userInfo,
match.get(URIPattern.HOST), port, "/", null, null);
} catch (URISyntaxException ex) {
throw new DatasetOperationException("[BUG] Could not build FS URI", ex);
}
}
}