/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package parquet.pig.summary; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.builtin.mock.Storage; import org.apache.pig.builtin.mock.Storage.Data; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.map.JsonMappingException; import org.junit.Test; public class TestSummary { private static final TupleFactory tf = TupleFactory.getInstance(); private static final BagFactory bf = BagFactory.getInstance(); private static final DataBag TEST_BAG = b( t(b(t(1l), t(2l, m("foo", "bar")), t(3))), t(b(t(1l), t(1l), t(3, "blah"))), t(b(t(1l), t(2l), t(2, "bloh"))), t(b(t(1l), t(2, "bloh"))), t(b(t("foo"), t(2, "bloh"))), t(b(t(b(t("bar"))), t(2, "bloh"))), t(b(t(b(t("bar"))), t(1l, m("foo", "bar", "baz", "buz")), t(2, "bloh"))), t(), t(null, null) ); public static Tuple t(Object... objects) { return tf.newTuple(Arrays.asList(objects)); } public static DataBag b(Tuple... tuples) { return bf.newDefaultBag(Arrays.asList(tuples)); } public static Map<String, Object> m(Object... objects) { Map<String, Object> m = new HashMap<String, Object>(); for (int i = 0; i < objects.length; i += 2) { m.put((String)objects[i], objects[i + 1]); } return m; } @Test public void testEvalFunc() throws IOException { Summary summary = new Summary(); String result = summary.exec(t(TEST_BAG)); validate(result, 1); } @Test public void testAlgebraic() throws IOException { Summary.Initial initial = new Summary.Initial(); Summary.Intermediate intermediate1 = new Summary.Intermediate(); Summary.Intermediate intermediate2 = new Summary.Intermediate(); Summary.Final finall = new Summary.Final(); DataBag combinedRedIn = bf.newDefaultBag(); for (int r = 0; r < 5; r++) { DataBag combinedMapOut = bf.newDefaultBag(); for (int m = 0; m < 5; m++) { DataBag mapOut = bf.newDefaultBag(); for (Tuple t : TEST_BAG) { Tuple exec = initial.exec(t(b(t))); mapOut.add(exec); } Tuple exec = intermediate1.exec(t(mapOut)); validate((String)exec.get(0), 1); combinedMapOut.add(exec); } combinedRedIn.add(intermediate2.exec(t(combinedMapOut))); } String result = finall.exec(t(combinedRedIn)); validate(result, 5*5); } private void validate(String result, int factor) throws JsonParseException, JsonMappingException, IOException { TupleSummaryData s = SummaryData.fromJSON(result, TupleSummaryData.class); // System.out.println(SummaryData.toPrettyJSON(s)); assertEquals(9 * factor, s.getCount()); assertEquals(1 * factor, s.getFields().get(0).getNull().longValue()); assertEquals(7 * factor, s.getFields().get(0).getBag().getCount()); assertEquals(18 * factor, s.getFields().get(0).getBag().getContent().getTuple().getFields().get(0).getCount()); MapSummaryData map = s.getFields().get(0).getBag().getContent().getTuple().getFields().get(1).getMap(); assertEquals(2 * factor, map.getCount()); assertEquals(3 * factor, map.getKey().getCount()); } @Test public void testPigScript() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < 1002; i++) { list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar"))))); } data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE "+Summary.class.getName()+"(A);"); pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();"); System.out.println(data.get("out").get(0).get(0)); TupleSummaryData s = SummaryData.fromJSON((String)data.get("out").get(0).get(0), TupleSummaryData.class); System.out.println(s); } @Test public void testMaxIsZero() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < 10; i++) { list.add(t("a", i - 9)); } data.set("in", "a:chararray, b:int", list); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE " + Summary.class.getName() + "(A);"); pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();"); TupleSummaryData s = SummaryData.fromJSON((String) data.get("out").get(0).get(0), TupleSummaryData.class); System.out.println(s); assertEquals(0, s.getFields().get(1).getNumber().getValue().getMax(), 0); } }