/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.data; import java.net.URL; import java.net.URLClassLoader; import java.util.List; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTupleDefaultRawComparator; import org.apache.pig.classification.InterfaceAudience; import org.apache.pig.classification.InterfaceStability; /** * A factory to construct tuples. This class is abstract so that users can * override the tuple factory if they desire to provide their own that * returns their implementation of a tuple. If the property * pig.data.tuple.factory.name is set to a class name and * pig.data.tuple.factory.jar is set to a URL pointing to a jar that * contains the above named class, then {@link #getInstance()} will create a * an instance of the named class using the indicated jar. Otherwise, it * will create an instance of {@link DefaultTupleFactory}. */ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class TupleFactory implements TupleMaker<Tuple> { private static TupleFactory gSelf = null; /** * Get a reference to the singleton factory. * @return The TupleFactory to use to construct tuples. */ public static TupleFactory getInstance() { if (gSelf == null) { String factoryName = System.getProperty("pig.data.tuple.factory.name"); String factoryJar = System.getProperty("pig.data.tuple.factory.jar"); if (factoryName != null && factoryJar != null) { try { URL[] urls = new URL[1]; urls[0] = new URL(factoryJar); ClassLoader loader = new URLClassLoader(urls, TupleFactory.class.getClassLoader()); Class c = Class.forName(factoryName, true, loader); Object o = c.newInstance(); if (!(o instanceof TupleFactory)) { throw new RuntimeException("Provided factory " + factoryName + " does not extend TupleFactory!"); } gSelf = (TupleFactory)o; } catch (Exception e) { if (e instanceof RuntimeException) { // We just threw this RuntimeException re = (RuntimeException)e; throw re; } throw new RuntimeException("Unable to instantiate " + "tuple factory " + factoryName, e); } } else if (factoryName != null) { try { Class c = Class.forName(factoryName); Object o = c.newInstance(); if (!(o instanceof TupleFactory)) { throw new RuntimeException("Provided factory " + factoryName + " does not extend TupleFactory!"); } gSelf = (TupleFactory)o; } catch (Exception e) { if (e instanceof RuntimeException) { // We just threw this RuntimeException re = (RuntimeException)e; throw re; } throw new RuntimeException("Unable to instantiate " + "tuple factory " + factoryName, e); } } else { gSelf = new BinSedesTupleFactory(); } } return gSelf; } /** * Create an empty tuple. This should be used as infrequently as * possible, use newTuple(int) instead. * @return Empty new tuple. */ public abstract Tuple newTuple(); /** * Create a tuple with size fields. Whenever possible this is preferred * over the null constructor, as the constructor can preallocate the * size of the container holding the fields. Once this is called, it * is legal to call Tuple.set(x, object), where x < size. * @param size Number of fields in the tuple. * @return Tuple with size fields */ public abstract Tuple newTuple(int size); /** * Create a tuple from the provided list of objects. The underlying list * will be copied. * @param c List of objects to use as the fields of the tuple. * @return A tuple with the list objects as its fields */ public abstract Tuple newTuple(List c); /** * Create a tuple from a provided list of objects, keeping the provided * list. The new tuple will take over ownership of the provided list. * @param list List of objects that will become the fields of the tuple. * @return A tuple with the list objects as its fields */ public abstract Tuple newTupleNoCopy(List list); /** * Create a tuple with a single element. This is useful because of * the fact that bags (currently) only take tuples, we often end up * sticking a single element in a tuple in order to put it in a bag. * @param datum Datum to put in the tuple. * @return A tuple with one field */ public abstract Tuple newTuple(Object datum); /** * Return the actual class representing a tuple that the implementing * factory will be returning. This is needed because Hadoop needs * to know the exact class we will be using for input and output. * @return Class that implements tuple. */ public abstract Class<? extends Tuple> tupleClass(); protected TupleFactory() { } /** * Provided for testing purposes only. This function should never be * called by anybody but the unit tests. */ public static void resetSelf() { gSelf = null; } /** * Return the actual class implementing the raw comparator for tuples * that the factory will be returning. Ovverride this to allow Hadoop to * speed up tuple sorting. The actual returned class should know the * serialization details for the tuple. The default implementation * (PigTupleDefaultRawComparator) will serialize the data before comparison * @return Class that implements tuple raw comparator. */ public Class<? extends TupleRawComparator> tupleRawComparatorClass() { return PigTupleDefaultRawComparator.class; } /** * This method is used to inspect whether the Tuples created by this factory * will be of a fixed size when they are created. In practical terms, this means * whether they support append or not. * @return where the Tuple is fixed or not */ public abstract boolean isFixedSize(); }