package water.api;
import java.io.*;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import water.*;
import water.api.RequestServer.API_VERSION;
import water.fvec.Frame;
import water.persist.PersistHdfs;
import water.util.FSUtils;
import water.util.Log;
public class ExportFiles extends Request2 {
static final int API_WEAVER=1; // This file has auto-gen'd doc & json fields
static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.
// This Request supports the HTML 'GET' command, and this is the help text
// for GET.
static final String DOC_GET =
"Export a Frame from H2O onto a file system (local disk or HDFS).";
@Override
public API_VERSION[] supportedVersions() { return SUPPORTS_ONLY_V2; }
@API(help="Key to an existing H2O Frame (or ValueArray).", required=true,filter=Default.class)
Key src_key;
@API(help="Path to a file on either local disk of connected node or HDFS.", required=true,filter=GeneralFile.class,gridable=false)
String path;
@API(help="Overwrite existing files.", required=false,filter=Default.class,gridable=false)
boolean force = false;
public static String link(Key k, String content){
return "<a href='/2/ExportFiles.query?src_key=" + k.toString() + "'>" + content + "</a>";
}
/**
* Iterates over fields and their annotations, and creates argument handlers.
*/
boolean _local = false;
@Override protected void registered(API_VERSION version) { super.registered(version); }
@Override protected Response serve() {
try {
// pull everything local
Log.info("ExportFiles processing (" + path + ")");
if (DKV.get(src_key) == null) throw new IllegalArgumentException(src_key.toString() + " not found.");
Object value = DKV.get(src_key).get();
// create a stream to read the entire VA or Frame
if( !(value instanceof Frame) ) throw new UnsupportedOperationException("Can only export Frames.");
InputStream csv = ((Frame) value).toCSV(true);
String p2 = path.toLowerCase();
if( p2.startsWith("hdfs://" ) ) serveHdfs(csv);
else if( p2.startsWith("s3n://" ) ) serveHdfs(csv);
else serveLocalDisk(csv);
return RequestBuilders.Response.done(this);
} catch (Throwable t) {
return RequestBuilders.Response.error(t);
}
}
protected void serveHdfs(InputStream csv) throws IOException {
if (FSUtils.isBareS3NBucketWithoutTrailingSlash(path)) { path += "/"; }
Path p = new Path(path);
org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(p.toUri(), PersistHdfs.CONF);
if( !force && fs.exists(p) ) throw new IllegalArgumentException("File " + path + " already exists.");
fs.mkdirs(p.getParent());
FSDataOutputStream s = fs.create(p);
byte[] buffer = new byte[1024];
try {
int len;
while ((len = csv.read(buffer)) > 0) {
s.write(buffer, 0, len);
}
} finally {
s.close();
Log.info("Key '" + src_key.toString() + "' was written to " + path.toString() + ".");
}
}
private void serveLocalDisk(InputStream csv) throws IOException {
_local = true;
OutputStream output = null;
try {
File f = new File(path);
if( !force && f.exists() ) throw new IllegalArgumentException("File " + path + " already exists.");
output = new FileOutputStream(path.toString());
byte[] buffer = new byte[1024];
int len;
while((len = csv.read(buffer)) > 0) {
output.write(buffer, 0, len);
}
Log.info("Key '" + src_key.toString() + "' was written to " +
(_local && H2O.CLOUD.size() > 1 ? H2O.SELF_ADDRESS + ":" : "") + path.toString() + ".");
} finally {
if (output != null) output.close();
}
}
@Override public boolean toHTML( StringBuilder sb ) {
DocGen.HTML.section(sb, "Export done. Key '" + src_key.toString() +
"' was written to " + (_local && H2O.CLOUD.size() > 1 ? H2O.SELF_ADDRESS + ":" : "") + path.toString());
return true;
}
}