/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test.pigmix.udf; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.util.HashMap; import java.util.Map; import org.apache.pig.LoadCaster; import org.apache.pig.ResourceSchema.ResourceFieldSchema; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.builtin.PigStorage; import org.apache.pig.builtin.Utf8StorageConverter; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.TupleFactory; import org.apache.pig.data.Tuple; import org.joda.time.DateTime; /** * A load function for the performance tests. */ public class PigPerformanceLoader extends PigStorage { BagFactory bagFactory; TupleFactory tupleFactory; public PigPerformanceLoader() { // Assume ^A as a delimiter super(""); bagFactory = BagFactory.getInstance(); tupleFactory = TupleFactory.getInstance(); } @Override public LoadCaster getLoadCaster() throws IOException { return new Caster(); } class Caster implements LoadCaster { Utf8StorageConverter helper = new Utf8StorageConverter(); /** * */ public Caster() { // TODO Auto-generated constructor stub } public DataBag bytesToBag(byte[] b, ResourceFieldSchema fs) throws IOException { if (b == null) return null; DataBag bag = bagFactory.newDefaultBag(); int pos = 0; while (pos < b.length) { Tuple t = tupleFactory.newTuple(1); // Figure out how long until the next element in the list. int start = pos; while (pos < b.length && b[pos] != 2) pos++; // 2 is ^B byte[] copy = new byte[pos - start]; int i, j; for (i = start + 1, j = 0; i < pos; i++, j++) copy[j] = b[i]; // The first byte will tell us what type the field is. try { switch (b[start]) { case 105: t.set(0, bytesToInteger(copy)); break; case 108: t.set(0, bytesToLong(copy)); break; case 102: t.set(0, bytesToFloat(copy)); break; case 100: t.set(0, bytesToDouble(copy)); break; case 115: t.set(0, bytesToCharArray(copy)); break; case 109: t.set(0, bytesToMap(copy)); break; case 98: t.set(0, bytesToBag(copy, null)); break; default: throw new RuntimeException("unknown type " + b[start]); } } catch (ExecException ee) { throw new IOException(ee); } pos++; // move past the separator bag.add(t); } return bag; } public Map<String, Object> bytesToMap(byte[] b) throws IOException { if (b == null) return null; Map<String, Object> m = new HashMap<String, Object>(26); int pos = 0; while (pos < b.length) { // The key is always one character at the moment. byte[] k = new byte[1]; k[0] = b[pos]; String key = new String(k); pos += 2; int start = pos; while (pos < b.length && b[pos] != 3) pos++; // 3 is ^C byte[] copy = new byte[pos - start]; int i, j; for (i = start + 1, j = 0; i < pos; i++, j++) copy[j] = b[i]; String val = bytesToCharArray(copy); m.put(key, val); pos++; // move past ^C } return m; } @Override public String bytesToCharArray(byte[] arg0) throws IOException { return helper.bytesToCharArray(arg0); } @Override public Double bytesToDouble(byte[] arg0) throws IOException { return helper.bytesToDouble(arg0); } @Override public Float bytesToFloat(byte[] arg0) throws IOException { return helper.bytesToFloat(arg0); } @Override public Integer bytesToInteger(byte[] arg0) throws IOException { return helper.bytesToInteger(arg0); } @Override public Long bytesToLong(byte[] arg0) throws IOException { return helper.bytesToLong(arg0); } @Override public Tuple bytesToTuple(byte[] arg0, ResourceFieldSchema fs) throws IOException { return helper.bytesToTuple(arg0, fs); } @Override public Boolean bytesToBoolean(byte[] arg0) throws IOException { return helper.bytesToBoolean(arg0); } @Override public DateTime bytesToDateTime(byte[] arg0) throws IOException { return helper.bytesToDateTime(arg0); } @Override public Map<String, Object> bytesToMap(byte[] arg0, ResourceFieldSchema fs) throws IOException { return helper.bytesToMap(arg0, fs); } @Override public BigInteger bytesToBigInteger(byte[] arg0) throws IOException { return helper.bytesToBigInteger(arg0); } @Override public BigDecimal bytesToBigDecimal(byte[] arg0) throws IOException { return helper.bytesToBigDecimal(arg0); } } }