package edu.washington.escience.myria.io;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Objects;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.annotation.JsonProperty;
import edu.washington.escience.myria.coordinator.CatalogException;
public class UriSink implements DataSink {
/** Required for Java serialization. */
private static final long serialVersionUID = 1L;
@JsonProperty private URI uri;
public UriSink(@JsonProperty(value = "uri", required = true) final String uri)
throws CatalogException, URISyntaxException {
this.uri = URI.create(Objects.requireNonNull(uri, "Parameter uri cannot be null"));
/* Force using the Hadoop S3A FileSystem */
if (this.uri.getScheme().equals("s3")) {
this.uri =
new URI(
"s3a",
this.uri.getUserInfo(),
this.uri.getHost(),
this.uri.getPort(),
this.uri.getPath(),
this.uri.getQuery(),
this.uri.getFragment());
}
if (!this.uri.getScheme().equals("hdfs") && !this.uri.getScheme().equals("s3a")) {
throw new CatalogException("URI must be an HDFS or S3 URI");
}
}
@Override
public OutputStream getOutputStream() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(uri, conf);
Path rootPath = new Path(uri);
return fs.create(rootPath);
}
}