/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.scripting.jruby; import java.io.IOException; import java.util.HashSet; import java.util.Set; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Arrays; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.Utils; import org.apache.pig.data.DataType; import org.apache.pig.parser.ParserException; import org.apache.pig.impl.logicalLayer.FrontendException; import org.jruby.Ruby; import org.jruby.RubyHash; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyModule; import org.jruby.RubyObject; import org.jruby.RubyRange; import org.jruby.RubyString; import org.jruby.RubySymbol; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Block; import org.jruby.runtime.builtin.IRubyObject; //TODO implement all of the merge functions /** * This class encapsulated a native Schema object, and provides a more convenient * interface for manipulating Schemas. It hides the Schema/FieldSchema distinction * from the user, and tries to present a cleaner, more Ruby-esque API to the user. * For general information on JRuby's API definition annotations, * see {@link RubyDataBag}. */ @JRubyClass(name = "Schema") public class RubySchema extends RubyObject { private static final long serialVersionUID = 1L; /** * This is a pattern used in the conversion from ruby arguments to a valid Schema. It detects * cases where there is a bag, map, or tuple without being followed by {}, [], or () respectively. * It is used for convenience. */ private static final Pattern bmtPattern = Pattern.compile("(?:\\S+:)?(bag|map|tuple)\\s*(?:,|$)", Pattern.CASE_INSENSITIVE); /** * This is the encapsulated Schema object. */ private Schema internalSchema; private static final ObjectAllocator ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klass) { return new RubySchema(runtime, klass); } }; /** * This method registers the class with the given runtime. * * @param runtime an instance of the Ruby runtime * @return a RubyClass object with metadata about the registered class */ public static RubyClass define(Ruby runtime) { RubyClass result = runtime.defineClass("Schema",runtime.getObject(), ALLOCATOR); result.kindOf = new RubyModule.KindOf() { public boolean isKindOf(IRubyObject obj, RubyModule type) { return obj instanceof RubySchema; } }; result.includeModule(runtime.getEnumerable()); result.defineAnnotatedMethods(RubySchema.class); return result; } protected RubySchema(final Ruby ruby, RubyClass rc) { super(ruby,rc); internalSchema = new Schema(); } /** * This constructor sets the encapsulated Schema to be equal to * the given Schema. If copy is true, it is set equal to a copy. * If it is false, it is set directly equal. * * @param ruby an instance of the ruby runtime * @param rc an instance of the class object with meatadata * @param s a Schema to encapsulate * @param copy a boolean value. If true, s will be copied and the copy * will be encapsulated. If false, it will be encapsulated * directly. */ protected RubySchema(final Ruby ruby, RubyClass rc, Schema s, boolean copy) { super(ruby,rc); if (copy) { internalSchema = new Schema(s); } else { internalSchema = s; } } /** * This constructor sets the encapsulated Schema to be equal to the * given Schema. * * @param ruby an instance of the ruby runtime * @param rc an instance of the class object with meatadata * @param s a Schema to encapsulate */ protected RubySchema(final Ruby ruby, RubyClass rc, Schema s) { this(ruby, rc, s, true); } /** * This constructor is provided for convenience and sets the * internal Schema equal to the result of a call to * {@link Utils#getSchemaFromString}. * * @param ruby an instance of the ruby runtime * @param rc an instance of the class object with meatadata * @param s a String which will be passed to * {@link Utils#getSchemaFromString} */ protected RubySchema(final Ruby ruby, RubyClass rc, String s) { super(ruby, rc); try { internalSchema = Utils.getSchemaFromString(s); } catch (ParserException e) { throw new RuntimeException("Error converting String to Schema: " + s, e); } } /** * The ruby initializer accepts any number of arguments. With no arguments, * it will return an empty Schema object. It can accept any number of arguments. * To understand the valid arguments, see the documentation for {@link #rubyArgToSchema}. * * @param args a varargs which can take any number of valid arguments to * {@link #rubyArgToSchema} * @return the initialized RubySchema */ @JRubyMethod(rest = true) public RubySchema initialize(IRubyObject[] args) { internalSchema = new Schema(); for (IRubyObject arg : args) { Schema rs = rubyArgToSchema(arg); for (Schema.FieldSchema i : rs.getFields()) internalSchema.add(i); } RubySchema.fixSchemaNames(internalSchema); return this; } /** * This is a static helper method to create a null aliased bytearray Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"by", "bytearray"}) public static RubySchema nullBytearray(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.BYTEARRAY); } /** * This is a static helper method to create a null aliased Boolean Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"bool", "boolean"}) public static RubySchema nullBoolean(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.BOOLEAN); } /** * This is a static helper method to create a null aliased chararray Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"c", "chararray"}) public static RubySchema nullChararray(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.CHARARRAY); } /** * This is a static helper method to create a null aliased long Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"l", "long"}) public static RubySchema nullLong(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.LONG); } /** * This is a static helper method to create a null aliased int Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"i", "int"}) public static RubySchema nullInt(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.INTEGER); } /** * This is a static helper method to create a null aliased double Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"d", "double"}) public static RubySchema nullDouble(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.DOUBLE); } /** * This is a static helper method to create a null aliased float Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"f", "float"}) public static RubySchema nullFloate(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.FLOAT); } /** * This is a static helper method to create a null aliased datetime Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"dt", "datetime"}) public static RubySchema nullDateTime(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.DATETIME); } /** * This is a static helper method to create a null aliased tuple Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"t", "tuple"}) public static RubySchema nullTuple(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.TUPLE); } /** * This is a static helper method to create a null aliased bag Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"b", "bag"}) public static RubySchema nullBag(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.BAG); } /** * This is a static helper method to create a null aliased map Schema. * This is useful in cases where you do not want the output to have an explicit * name, which {@link Utils#getSchemaFromString} will assign. * * @param context the context the method is being executed in * @param self an instance of the RubyClass with metadata on * the Ruby class object this method is being * statically invoked against * @return a null-aliased bytearray schema */ @JRubyMethod(meta = true, name = {"m", "map"}) public static RubySchema nullMap(ThreadContext context, IRubyObject self) { return makeNullAliasRubySchema(context, DataType.MAP); } /** * This is a helper method to generate a RubySchema of the given type without an alias. * * @param context the context the method is being executed in * @param type the DataType.PIGTYPE value to make the Schema from * @return a RubySchema object encapsulated a Schema of the specified type */ private static RubySchema makeNullAliasRubySchema(ThreadContext context, byte type) { Ruby runtime = context.getRuntime(); return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema(null, type))); } /** * This is a helper function which converts objects into Schema objects. The valid * options are as follows: * <p> * A RubyString, which will have {@link Utils#getSchemaFromString} called on it, and * it will be added. * <p> * A RubySchema, which will be added directly. IMPORTANT NOTE: since this API abstracts * away from the distinction between Schema/FieldSchema, its important to understand * how a Schema is added to another. In this case, the FieldSchema is pulled directly * out of the given Schema. Thus, where in Pig a Schema.FieldSchema might be passed around, * internally to this class, generally a Schema will be passed around encapsulating it. * <p> * A list will create the Schema for a Tuple whose elements will be the elements of the * list. Each element will be subjected to the same rules applied here. * <p> * A hash in the form of:<br> * <code>{"name:tuple"=>["x:int","y:int,z:int"], "name2:bag"=>["a:chararray"]}</code><br> * The keys must be a tuple, bag, or map, and the value must be an array. * * @param arg an object (generally an IRubyObject or String) to convert. See above for the rules on valid arguments * @return the Schema constructed for the given argument */ public static Schema rubyArgToSchema(Object arg) { try { /** * Given a String or a RubyString, calls {@link Utils#getSchemaFromString}. * Additionally, as a convenience to the user, this method uses a regex to * detect any case where a schema declaration of "bag", "tuple", or "map" * does not have the trailing "{}", "()", or "[]" that * {@link Utils#getSchemaFromString} requires. */ if (arg instanceof String || arg instanceof RubyString) { String s = arg.toString(); Matcher m = bmtPattern.matcher(s); while (m.find()) { String type = m.group(1); String inter = s.substring(0, m.start(1)); if (type.equalsIgnoreCase("bag")) { inter += "{}"; } else if (type.equalsIgnoreCase("map")) { inter += "[]"; } else if (type.equalsIgnoreCase("tuple")) { inter += "()"; } else { throw new RuntimeException("Arriving here should be impossible"); } s = inter + s.substring(m.end(1)); m = bmtPattern.matcher(s); } return Utils.getSchemaFromString(s); // In the case of a RubySchema, can just return the encapsulated Schema } else if (arg instanceof RubySchema) { return ((RubySchema)arg).getInternalSchema(); // In the case of a RubyArray, the elements of the array are passed to this // method, and they will be treated as elements of a Tuple Schema. } else if (arg instanceof RubyArray) { RubyArray ary = (RubyArray)arg; Schema s = new Schema(); for (Object o : ary) { Schema ts = rubyArgToSchema(o); for (Schema.FieldSchema fs : ts.getFields()) { s.add(fs); } } return new Schema(new Schema.FieldSchema("tuple_0", s, DataType.TUPLE)); /** * In the case of a RubyHash, the key serves defines a Schema that will encapsulate * other elements. This mainly is for the convenience of being able to name * bags, maps, and tuples while easily being able to have interchangeable elements. * The key will be given to this method, but must return a singular map, tuple, or * bag, or an error will be thrown. The value to that key must be an array, and * each element will be passed to this method and then added to the Schema for * the key. */ } else if (arg instanceof RubyHash) { RubyHash hash = (RubyHash)arg; Schema hashSchema = new Schema(); for (Object o : hash.keySet()) { Schema s = rubyArgToSchema(o); if (s.size() != 1) { throw new RuntimeException("Hash key must be singular"); } Schema.FieldSchema fs = s.getField(0); Object v = hash.get(o); if (v instanceof RubyArray) { byte type = fs.type; if (type == DataType.BAG) { fs.schema = rubyArgToSchema(v); } else if (type == DataType.TUPLE || type == DataType.MAP) { fs.schema = rubyArgToSchema(v).getField(0).schema; } else { throw new RuntimeException("Hash key must be tuple map or bag"); } } else { throw new RuntimeException("Hash value must be an Array"); } hashSchema.add(fs); } return hashSchema; } else { throw new RuntimeException("Bad argument given to rubyToSchema: " + arg + (arg != null ? " class type " + arg.getClass().toString() : "")); } } catch (IOException e) { throw new RuntimeException("Error converting ruby to Schema: " + arg, e); } } /** * This is a ruby method which takes a name and an array of arguments and constructs a Tuple schema * from them. * * @param context the context the method is being executed in * @param self the RubyClass for the Class object this was invoked on * @param arg1 the name for the RubySchema * @param arg2 a list of arguments to instantiate the new RubySchema * @return the new Tuple RubySchema */ @JRubyMethod(meta = true, name = {"t", "tuple"}) public static RubySchema tuple(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) { RubySchema rs = tuple(context, self, arg2); rs.setNameIf(arg1); return rs; } /** * This is a ruby method which takes an array of arguments and constructs a Tuple schema from them. The name * will be set automatically. * * @param context the context the method is being executed in * @param self the RubyClass for the Class object this was invoked on * @param arg a list of arguments to instantiate the new RubySchema * @return the new RubySchema */ @JRubyMethod(meta = true, name = {"t", "tuple"}) public static RubySchema tuple(ThreadContext context, IRubyObject self, IRubyObject arg) { if (arg instanceof RubyArray) { Schema s = rubyArgToSchema(arg); Ruby runtime = context.getRuntime(); return new RubySchema(runtime, runtime.getClass("Schema"), s); } else { throw new RuntimeException("Bad argument given to Schema.tuple"); } } /** * This is a ruby method which takes a name and an array of arguments and constructs a Map schema * from them. * * @param context the context the method is being executed in * @param self the RubyClass for the Class object this was invoked on * @param arg1 the name for the RubySchema * @param arg2 a list of arguments to instantiate the new RubySchema * @return the new RubySchema */ @JRubyMethod(meta = true, name = {"m", "map"}) public static RubySchema map(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) { RubySchema rs = map(context, self, arg2); rs.setNameIf(arg1); return rs; } /** * This is a ruby method which takes an array of arguments and constructs a Map schema from them. The name * will be set automatically. * * @param context the context the method is being executed in * @param self the RubyClass for the Class object this was invoked on * @param arg a list of arguments to instantiate the new RubySchema * @return the new RubySchema */ @JRubyMethod(meta = true, name = {"m", "map"}) public static RubySchema map(ThreadContext context, IRubyObject self, IRubyObject arg) { Schema s = tuple(context, self, arg).getInternalSchema(); Ruby runtime = context.getRuntime(); try { return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema("map_0", s.getField(0).schema, DataType.MAP))); } catch (FrontendException e) { throw new RuntimeException("Error making map", e); } } /** * This is a ruby method which takes a name and an array of arguments and constructs a Bag schema * from them. * * @param context the context the method is being executed in * @param self the RubyClass for the Class object this was invoked on * @param arg1 the name for the RubySchema * @param arg2 a list of arguments to instantiate the new RubySchema * @return the new RubySchema */ @JRubyMethod(meta = true, name={"b", "bag"}) public static RubySchema bag(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) { RubySchema rs = bag(context, self, arg2); rs.setNameIf(arg1); return rs; } /** * This is a ruby method which takes an array of arguments and constructs a Bag schema from them. The name * will be set automatically. * * @param context the context the method is being executed in * @param self the RubyClass for the Class object this was invoked on * @param arg a list of arguments to instantiate the new RubySchema * @return the new RubySchema */ @JRubyMethod(meta = true, name = {"b", "bag"}) public static RubySchema bag(ThreadContext context, IRubyObject self, IRubyObject arg) { Schema s = tuple(context, self, arg).getInternalSchema(); Ruby runtime = context.getRuntime(); try { return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema("bag_0", s, DataType.BAG))); } catch (FrontendException e) { throw new RuntimeException("Error making map", e); } } /** * This method will fix any name conflicts in a schema. It's important to note that * this will change the Schema object itself. It will deal with any collisions in things * named tuple_#, bag_#, map_#, or val_#, as these are generally names generated by * Util.getSchemaFromString. In the case of another name conflict, it will not be * changed, as that name conflict was created by the user. * * @param s a Schema object to fix in place */ private static void fixSchemaNames(Schema s) { if (s == null) return; // This regex detects names that could possibly collide that we should change Pattern p = Pattern.compile("(bag_|tuple_|map_|val_)(\\d+)", Pattern.CASE_INSENSITIVE); Set<String> names = new HashSet<String>(s.size(), 1.0f); for (Schema.FieldSchema fs : s.getFields()) { if (fs.alias == null) continue; Matcher m = p.matcher(fs.alias); if (m.matches() && names.contains(fs.alias)) { String prefix = m.group(1); int suffix = Integer.parseInt(m.group(2)); while (names.contains(prefix + suffix)) suffix++; fs.alias = prefix + suffix; } names.add(fs.alias); if (fs.schema != null) { if (fs.type == DataType.BAG) { try { fixSchemaNames(fs.schema.getField(0).schema); } catch (FrontendException e) { throw new RuntimeException("Error recursively fixing schema: " + s, e); } } else { fixSchemaNames(fs.schema); } } } } /** * This is just a convenience method which sets the name of the internalSchema to the argument that was given. * * @param arg a RubyString to set the name of the encapsulated Schema object */ private void setNameIf(IRubyObject arg) { if (arg instanceof RubyString) { setName(arg.toString()); } else { throw new RuntimeException("Bad name given"); } } /** * This method sets the name of a RubySchema to the name given. It's important to note that * if the RubySchema represents anything other than a tuple, databag, or map then an error * will be thrown. * * @param name a String to set the name of the encapsulated Schema object */ private void setName(String name) { Schema.FieldSchema fs; try { fs = internalSchema.getField(0); } catch (FrontendException e) { throw new RuntimeException("Error getting field from schema: " + internalSchema, e); } byte type = fs.type; if (type == DataType.TUPLE || type == DataType.BAG || type == DataType.MAP) { fs.alias = name; } else { throw new RuntimeException("setName cannot be set on Schema: " + internalSchema); } } /** * The toString method just leverages Schema's printing. * * @param context the context the method is being executed in * @return a String representation of the encapsulated Schema object */ @JRubyMethod(name = {"to_s", "inspect"}) public RubyString toString(ThreadContext context) { return RubyString.newString(context.getRuntime(), internalSchema.toString()); } /** * This is the ruby method which allows people to access elements of the RubySchema object. * It can be given either a single numeric index, or a Range object to specify a range of indices. * It's important to note that the Schema object returned from this references the Schema stored * internally, so if the user wants to make changes without affecting this object, it must be cloned. * * @param context the context the method is being executed in * @param arg a Fixnum index, Range object to specify a range of values to return, or * a String to look up by alias name * @return the RubySchema object encapsulated the found Schema */ @JRubyMethod(name = {"[]", "slice"}) public RubySchema get(ThreadContext context, IRubyObject arg) { Ruby runtime = context.getRuntime(); if (arg instanceof RubyFixnum) { int index = (int)((RubyFixnum)arg).getLongValue(); Schema s; try { s = new Schema(internalSchema.getField(index)); } catch (FrontendException e) { throw new RuntimeException("Invalid index given to get function: " + index, e); } return new RubySchema(runtime, runtime.getClass("Schema"), s, false); //returns the actual object itself } else if (arg instanceof RubyRange) { int min = (int)((RubyFixnum)((RubyRange)arg).min(context, Block.NULL_BLOCK)).getLongValue(); int max = (int)((RubyFixnum)((RubyRange)arg).max(context, Block.NULL_BLOCK)).getLongValue(); return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getFields().subList(min, max + 1)), false); } else if (arg instanceof RubyString) { try { return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getField(arg.toString())), false); } catch (FrontendException e) { throw new RuntimeException("Unable to find field " + arg.toString() + " in schema " + internalSchema, e); } } else { throw new RuntimeException("Invalid argument given to get function: " + arg.toString()); } } /** * This is a version of [] which allows the range to be specified as such: [1,2]. * * @param context the context the method is being executed in * @param arg1 a Fixnum start index * @param arg2 a Fixnum end index * @return the RubySchema object encapsulated the found Schema */ @JRubyMethod(name = {"[]", "slice"}) public RubySchema get(ThreadContext context, IRubyObject arg1, IRubyObject arg2) { if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) { Ruby runtime = context.getRuntime(); int min = (int)((RubyFixnum)arg1).getLongValue(); int max = (int)((RubyFixnum)arg2).getLongValue() - 1; return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getFields().subList(min, max + 1)), false); } else { throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )"); } } /** * This allows the users to set an index or a range of values to * a specified RubySchema. The first argument must be a Fixnum or Range, * and the second argument may optionally be a Fixnum. The given index * (or range of indices) will be replaced by a RubySchema instantiated * based on the remaining arguments. * * @param context the contextthe method is being executed in * @param args a varargs which has to be at least length two. * @return the RubySchema that was added */ @JRubyMethod(name = {"[]=", "set"}, required = 2, rest = true) public RubySchema set(ThreadContext context, IRubyObject[] args) { IRubyObject arg1 = args[0]; IRubyObject arg2 = args[1]; IRubyObject[] arg3 = Arrays.copyOfRange(args, 1, args.length); Schema s = internalSchema; Ruby runtime = context.getRuntime(); List<Schema.FieldSchema> lfs = s.getFields(); int min, max; if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) { min = (int)((RubyFixnum)arg1).getLongValue(); max = (int)((RubyFixnum)arg2).getLongValue(); arg3 = Arrays.copyOfRange(args, 2, args.length); } else if (arg1 instanceof RubyFixnum) { min = (int)((RubyFixnum)arg1).getLongValue(); max = min + 1; } else if (arg1 instanceof RubyRange) { min = (int)((RubyFixnum)((RubyRange)arg1).min(context, Block.NULL_BLOCK)).getLongValue(); max = (int)((RubyFixnum)((RubyRange)arg1).max(context, Block.NULL_BLOCK)).getLongValue() + 1; } else { throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )"); } for (int i = min; i < max; i++) lfs.remove(min); if (arg3 == null || arg3.length == 0) throw new RuntimeException("Must have schema argument for []="); RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(arg3); for (Schema.FieldSchema fs : rs.getInternalSchema().getFields()) lfs.add(min++, fs); RubySchema.fixSchemaNames(internalSchema); return rs; } /** * This method provides addition semantics, without modifying the original Schema. * This method can be given any number of arguments, much as with the constructor. * * @param context the context the method is being executed in * @param args a varargs which can be any valid set of arguments that * can initialize a RubySchema * @return the Rresult of the addition */ @JRubyMethod(name = {"add", "+"}, rest = true) public RubySchema add(ThreadContext context, IRubyObject[] args) { RubySchema rsClone = clone(context); rsClone.addInPlace(context, args); return rsClone; } /** * This method provides addition semantics, modifying the original Schema in place. * This method can be given any number of arguments, much as with the constructor. * * @param context the context the method is being executed in * @param args a varargs which can be any valid set of arguments that * can initialize a RubySchema */ @JRubyMethod(name = "add!", rest = true) public void addInPlace(ThreadContext context, IRubyObject[] args) { Ruby runtime = context.getRuntime(); List<Schema.FieldSchema> lfs = internalSchema.getFields(); RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(args); for (Schema.FieldSchema fs : rs.getInternalSchema().getFields()) lfs.add(fs); RubySchema.fixSchemaNames(internalSchema); } /** * @param context the context the method is being executed in * @return a RubySchema copy of the Schema */ @JRubyMethod public RubySchema clone(ThreadContext context) { Ruby runtime = context.getRuntime(); return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema); } /** * Given a field name this string will search the RubySchema for a FieldSchema * with that name and return it encapsulated in a Schema. * * @param context the context the method is being executed in * @param arg a RubyString serving as an alias to look * for in the Schema * @return the found RubySchema */ @JRubyMethod public RubySchema find(ThreadContext context, IRubyObject arg) { if (arg instanceof RubyString) { Ruby runtime = context.getRuntime(); return new RubySchema(runtime, runtime.getClass("Schema"), RubySchema.find(internalSchema, arg.toString()), false); } else { throw new RuntimeException("Invalid arguement passed to find: " + arg); } } /** * This is a helper method which recursively searches for an alias in the Schema * encapsulated by RubySchema. This is necessary because findFieldSchema uses * canonicalName, not name. * * @param s the Schema to search through * @param alias * @return the found RubySchema */ private static Schema find(Schema s, String alias) { for (Schema.FieldSchema fs : s.getFields()) if (alias.equals(fs.alias)) return new Schema(fs); for (Schema.FieldSchema fs : s.getFields()) if (fs.schema != null) { Schema r = RubySchema.find(fs.schema, alias); if (r != null) return r; } return new Schema(); } /** * Given a field name, this will return the index of it in the schema. * * @param context the context the method is being executed in * @param arg a field name to look for * @return the index for that field name */ @JRubyMethod public RubyFixnum index(ThreadContext context, IRubyObject arg) { if (arg instanceof RubyString) { try { return new RubyFixnum(context.getRuntime(), internalSchema.getPosition(arg.toString())); } catch (FrontendException e) { throw new RuntimeException("Unable to find position for argument: " + arg); } } else { throw new RuntimeException("Invalid arguement passed to index: " + arg); } } /** * @param context the context the method is being executed in * @return the size of the encapsulated Schema */ @JRubyMethod(name = {"size", "length"}) public RubyFixnum size(ThreadContext context) { return new RubyFixnum(context.getRuntime(), internalSchema.size()); } /** * This is a helper method to pull out the native Java type from the ruby object. * * @return the encapsulated Schema */ public Schema getInternalSchema() { return internalSchema; } /** * This method allows access into the Schema nested in the encapsulated Schema. For example, * if the encapsulated Schema is a bag Schema, this allows the user to access the schema of * the interior Tuple. * * @param context the context the method is being executed in * @return a RubySchema encapsulating the nested Schema */ @JRubyMethod(name = {"get", "inner", "in"}) public RubySchema get(ThreadContext context) { if (internalSchema.size() != 1) throw new RuntimeException("Can only return nested schema if there is one schema to get"); Ruby runtime = context.getRuntime(); try { return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema.getField(0).schema, false); } catch (FrontendException e) { throw new RuntimeException("Schema does not have a nested FieldScema", e); } } /** * This method allows the user to see the name of the alias of the FieldSchema of the encapsulated * Schema. This method only works if the Schema has one FieldSchema. * * @param context the context the method is being executed in * @return the name of the Schema */ @JRubyMethod(name = "name") public RubyString getName(ThreadContext context) { try { if (internalSchema.size() != 1) throw new RuntimeException("Can only get name if there is one schema present"); return RubyString.newString(context.getRuntime(), internalSchema.getField(0).alias); } catch (FrontendException e) { throw new RuntimeException("Unable to get field from Schema", e); } } /** * This method allows the user to set the name of the alias of the FieldSchema of the encapsulated * Schema. This method only works if the Schema has one FieldSchema. * * @param arg a RubyString to set the name to * @return the new name */ @JRubyMethod(name = "name=") public RubyString setName(IRubyObject arg) { if (arg instanceof RubyString) { if (internalSchema.size() != 1) throw new RuntimeException("Can only set name if there is one schema present"); try { internalSchema.getField(0).alias = arg.toString(); return (RubyString)arg; } catch (FrontendException e) { throw new RuntimeException("Unable to get field from Schema", e); } } else { throw new RuntimeException("Improper argument passed to 'name=':" + arg); } } }