/** * Copyright 2010-2013 Scale Unlimited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.scaleunlimited.cascading; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; @SuppressWarnings("serial") public abstract class BaseDatum implements Serializable { protected TupleEntry _tupleEntry; public BaseDatum() { this(new Fields()); } /** * Create an empty datum with field names defined by <fields> * * @param fields Names of fields */ public BaseDatum(Fields fields) { this(new TupleEntry(fields, Tuple.size(fields.size()))); } /** * Create a new datum with field names defined by <fields>, and * field values contained in <tuple> * * WARNING - <tuple> will be kept as the data container, so don't call this * with a tuple provided by a Cascading operation/iterator, as those get reused. * * @param fields Names of fields * @param tuple Data for the datum */ public BaseDatum(Fields fields, Tuple tuple) { if (fields.size() != tuple.size()) { throw new IllegalArgumentException("Size of fields must be the same as the size of the tuple: " + fields + "/" + tuple); } _tupleEntry = new TupleEntry(fields, tuple); } public BaseDatum(TupleEntry tupleEntry) { _tupleEntry = tupleEntry; } /** * Set the data container to be <tupleEntry> * * @param tupleEntry Data for the datum. */ public void setTupleEntry(TupleEntry tupleEntry) { setTupleEntry(tupleEntry, true); } /** * Set the data container to be <tupleEntry> * * @param tupleEntry Data for the datum. */ protected void setTupleEntry(TupleEntry tupleEntry, boolean checkFields) { if (checkFields && !tupleEntry.getFields().equals(getFields())) { throw new IllegalArgumentException("Fields must be the same as for current value: " + tupleEntry.getFields() + "/" + _tupleEntry.getFields()); } _tupleEntry = tupleEntry; reset(); } public void setTuple(Tuple tuple) { if (getFields().size() != tuple.size()) { throw new IllegalArgumentException("Size of tuple doesn't match current fields"); } _tupleEntry.setTuple(tuple); reset(); } public Tuple getTuple() { return getTupleEntry().getTuple(); } public TupleEntry getTupleEntry() { commit(); return _tupleEntry; } public Fields getFields() { return _tupleEntry.getFields(); } protected void validateFields(Fields superFields, Fields myFields) { if (!superFields.contains(myFields)) { throw new IllegalArgumentException("Fields passed to constructor don't contain " + myFields); } } protected void validateFields(TupleEntry tupleEntry, Fields myFields) { if (!tupleEntry.getFields().contains(myFields)) { throw new IllegalArgumentException("Fields passed to constructor don't contain " + myFields); } } // Provide way for subclasses to fix up _tupleEntry with in-memory data. public void commit() {}; // Provide way for subclasses to fix up in-memory data when _tupleEntry changes. public void reset() {}; @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((_tupleEntry == null) ? 0 : _tupleEntry.hashCode()); return result; } private void writeObject(ObjectOutputStream s) throws IOException { // Make sure anything in memory has been flushed to _tupleEntry commit(); TupleEntry te = getTupleEntry(); s.writeObject(te.getFields()); s.writeObject(te.getTuple()); } private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException { Fields fields = (Fields)s.readObject(); Tuple tuple = (Tuple)s.readObject(); // Don't check field consistency, as serialization has created an empty Datum (no fields // defined) so they won't match up. setTupleEntry(new TupleEntry(fields, tuple), false); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; BaseDatum other = (BaseDatum) obj; if (_tupleEntry == null) { return other._tupleEntry == null; } else if (!_tupleEntry.getFields().equals(other._tupleEntry.getFields())) { return false; } else if (!_tupleEntry.getTuple().equals(other._tupleEntry.getTuple())) { return false; } return true; } @SuppressWarnings("unchecked") public static <T extends BaseDatum> T copy(T datum) { try { T result = (T)datum.getClass().newInstance(); result.setTupleEntry(datum.getTupleEntry()); return datum; } catch (Exception e) { throw new RuntimeException(e); } } /** * Create a unique field name * * Combines the class name and the user-defined field name. The format we use is * "fnXXX_YYY", where XXX is the class name and YYY is the field name. This gives us * names that are generally unique (so we avoid collisions in CoGroups), and are safe * to use with expressions compiled by Janino (for ExpressionFunction and ExpressionFilter) * * @param clazz * @param field * @return */ @SuppressWarnings("rawtypes") public static String fieldName(Class clazz, String field) { return String.format("fn%s_%s", clazz.getSimpleName(), field); } @SuppressWarnings("unchecked") public static Fields getSuperFields(Class<? extends BaseDatum> clazz) { try { Class<?> superClass = clazz.getSuperclass(); if (superClass.equals(BaseDatum.class)) { return new Fields(); } else { Class<? extends BaseDatum> superClazz = (Class<? extends BaseDatum>)clazz.getSuperclass(); BaseDatum datum = superClazz.newInstance(); return datum.getFields(); } } catch (Exception e) { throw new RuntimeException(e); } } }