package sizzle.functions; import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; /** * Files and other OS resources. * * @author anthonyu * */ public class SizzleFileIntrinsics { private static FileSystem fs; static { try { SizzleFileIntrinsics.fs = FileSystem.get(new Configuration()); } catch (final IOException e) { throw new RuntimeException(e.getClass().getSimpleName() + " caught", e); } } /** * Return the entire contents of the named file as an uninterpreted array of * bytes. * * @param file * A {@link String} containing the name of the file * * @return An array of byte containing the contents of the named file * * @throws IOException */ @FunctionSpec(name = "load", returnType = "bytes", formalParameters = { "string" }) public static byte[] load(final String file) throws IOException { final InputStream i = new BufferedInputStream(SizzleFileIntrinsics.fs.open(new Path(file))); try { final ByteArrayOutputStream o = new ByteArrayOutputStream(); try { final byte[] buf = new byte[4096]; int len; while ((len = i.read(buf)) > 0) o.write(buf, 0, len); } finally { o.close(); } return o.toByteArray(); } finally { i.close(); } } /** * Return the contents of the named environment variable as a string. The * raw data is interpreted as UTF-8 in the same manner as the default * conversion from bytes to string. * * @param variable * A {@link String} containing the name of the desired * environment variable * * @return A {@link String} containing the contents of the desired * environment variable * * @throws IOException */ @FunctionSpec(name = "getenv", returnType = "string", formalParameters = { "string" }) public static String getenv(final String variable) throws IOException { return System.getenv(variable); } // TODO: implement {set,get,lock}additionalinput /* * getadditionalinput: function(variable: string): bytes; * * A map of strings to bytes may be provided to Proc by the process running * szl. Return the bytes mapped to by the argument. Never returns undef. * * * setadditionalinput: function(label: string, value: bytes); * * Stores a (label, value) pair. Never returns undef. * * * lockadditionalinput: function(); * * Prevents further calls to setadditionalinput for this record. Never * returns undef. * * * type resourcestats = { initializedavailablemem: int, * initializedallocatedmem: int, initializedusertime: time, * initializedsystemtime: time, availablemem: int, allocatedmem: int, * usertime: time, systemtime: time }; * * * getresourcestats: function(): resourcestats; * * Return a tuple of type resourcestats containing resource usage * statistics. The f irst set of numbers reports the statistics after static * initialization. The second set reports the values consumed by processing * the current input record. The availablemem figure reports total size of * the heap; allocatedmem is the amount in use on the heap. Memory is * measured in bytes, and time is measured in microseconds. Availability and * accuracy of these values is implementation dependent. Never returns * undef. The database intrinsics are not implemented. They are provided as * a recommendation for the any future implementation of database access. */ // TODO: implement database intrinsics /* * type SQL_DB = int; # basic type * * dbconnect: function(dbspec: string, defaultspec: string): int; * * Connects to a database with the dbspecs and returns a db object. It is * recommended to declare the db object as static so only one connection is * made per worker. Returns undef only if an error occurs. * * * dbquery: function(db: int, query: string): array of array of string; * * Executes a sql query on the given database object. Returns an array of * array of string, each array of string representing one row of results. * For most queries such as SELECT statements, the results can be declared * as static to avoid excessive queries on the database. Returns undef only * if an error occurs. */ }