/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.piggybank.evaluation;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.junit.Before;
import org.junit.Test;
public class TestOver {
@Test
public void testSchema() throws Exception {
// No type
Over func = new Over();
Schema in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
Schema out = func.outputSchema(in);
assertEquals("{{NULL}}", out.toString());
// chararray
func = new Over("chararray");
in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
out = func.outputSchema(in);
assertEquals("{{chararray}}", out.toString());
// int
func = new Over("Int");
in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
out = func.outputSchema(in);
assertEquals("{{int}}", out.toString());
// double
func = new Over("DOUBLE");
in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
out = func.outputSchema(in);
assertEquals("{{double}}", out.toString());
}
@Test
public void testBadInput() throws Exception {
Over func = new Over();
boolean caught = false;
Tuple t = TupleFactory.getInstance().newTuple();
t.append("Mary had a little lamb");
t.append("count");
t.append(0);
t.append(0);
try {
func.exec(t);
} catch (ExecException ee) {
caught = true;
assertEquals("Over expected a bag for arg 1 but received chararray",
ee.getMessage());
}
assertTrue(caught);
func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
t = TupleFactory.getInstance().newTuple();
t.append(inbag);
caught = false;
try {
func.exec(t);
} catch (ExecException ee) {
caught = true;
assertEquals("Over expected 2 or more inputs but received 1",
ee.getMessage());
}
assertTrue(caught);
func = new Over();
t.append(1);
caught = false;
try {
func.exec(t);
} catch (ExecException ee) {
caught = true;
assertEquals("Over expected a string for arg 2 but received int",
ee.getMessage());
}
assertTrue(caught);
func = new Over();
t.set(1, "count");
t.append("fred");
caught = false;
try {
func.exec(t);
} catch (ExecException ee) {
caught = true;
assertEquals("Over expected an integer for arg 3 but received chararray",
ee.getMessage());
}
assertTrue(caught);
func = new Over();
t.set(2, -1);
t.append("fred");
caught = false;
try {
func.exec(t);
} catch (ExecException ee) {
caught = true;
assertEquals("Over expected an integer for arg 4 but received chararray",
ee.getMessage());
}
assertTrue(caught);
}
@Test
public void testBagFunc() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "fred");
boolean caught = false;
try {
DataBag outbag = func.exec(t);
} catch (ExecException ee) {
caught = true;
assertEquals("Unknown aggregate fred", ee.getMessage());
}
assertTrue(caught);
}
@Test
public void testCountNoWindow() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "count");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(10), to.get(0));
}
}
@Test
public void testCountPrecedingUnboundedToCurrent() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "count");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int cnt = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(cnt++), to.get(0));
}
}
@Test
public void testCountCurrentToUnboundedFollowing() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "count");
t.set(2, 0);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int cnt = 10;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(cnt--), to.get(0));
}
}
@Test
public void testThreeBeforeAndAfter() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "sum(int)");
t.set(2, 3);
t.set(3, 3);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int sum = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
switch (sum++) {
case 1:
case 10:
assertEquals(new Long(4), to.get(0));
break;
case 2:
case 9:
assertEquals(new Long(5), to.get(0));
break;
case 3:
case 8:
assertEquals(new Long(6), to.get(0));
break;
case 4:
case 5:
case 6:
case 7:
assertEquals(new Long(7), to.get(0));
break;
default:
// Huh?
throw new RuntimeException("We shouldn't be here, sum is "
+ sum);
}
}
}
@Test
public void testSumDouble() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1.0);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "sum(double)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(10.0), to.get(0));
}
}
@Test
public void testSumByteArray() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, new DataByteArray("1"));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "sum(bytearray)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(10.0), to.get(0));
}
}
@Test
public void testSumFloat() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1.0f);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "sum(float)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(10.0), to.get(0));
}
}
@Test
public void testSumInt() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "sum(int)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(10), to.get(0));
}
}
@Test
public void testSumLong() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, 1L);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "sum(long)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(10), to.get(0));
}
}
@Test
public void testAvgDouble() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (double)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "avg(double)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(4.5), to.get(0));
}
}
@Test
public void testAvgByteArray() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, new DataByteArray("1"));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "avg(bytearray)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(1.0), to.get(0));
}
}
@Test
public void testAvgFloat() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (float)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "avg(float)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(4.5), to.get(0));
}
}
@Test
public void testAvgInt() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "avg(int)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(4.5), to.get(0));
}
}
@Test
public void testAvgLong() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (long)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "avg(long)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(4.5), to.get(0));
}
}
@Test
public void testMinDouble() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (double)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "min(double)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(0.0), to.get(0));
}
}
@Test
public void testMinByteArray() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, new DataByteArray(new Integer(i).toString()));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "min(bytearray)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(0.0), to.get(0));
}
}
@Test
public void testMinFloat() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (float)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "min(float)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Float(0.0), to.get(0));
}
}
@Test
public void testMinInt() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "min(int)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Integer(0), to.get(0));
}
}
@Test
public void testMinLong() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (long)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "min(long)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(0), to.get(0));
}
}
@Test
public void testMinString() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, new Integer(i).toString());
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "min(chararray)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals("0", to.get(0));
}
}
@Test
public void testMaxDouble() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (double)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "max(double)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(count++), to.get(0));
}
}
@Test
public void testMaxByteArray() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, new DataByteArray(new Integer(i).toString()));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "max(bytearray)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(count++), to.get(0));
}
}
@Test
public void testMaxFloat() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (float)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "max(float)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Float(count++), to.get(0));
}
}
@Test
public void testMaxInt() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "max(int)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Integer(count++), to.get(0));
}
}
@Test
public void testMaxLong() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (long)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "max(long)");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Long(count++), to.get(0));
}
}
@Test
public void testMaxString() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, new Integer(i).toString());
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "max(chararray)");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals("9", to.get(0));
}
}
@Test
public void testRowNumber() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (double)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "row_number");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Integer(count++), to.get(0));
}
}
@Test
public void testFirstValue() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, (double)i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "first_value");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Double(0.0), to.get(0));
}
}
@Test
public void testLastValue() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "last_value");
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(new Integer(count++), to.get(0));
}
}
@Test
public void testLeadDefaults() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "lead");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
if (count < 10) assertEquals(new Integer(count++), to.get(0));
else assertNull(to.get(0));
}
}
@Test
public void testLeadWithRowsAheadNoDefault() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "lead");
t.set(2, -1);
t.set(3, -1);
t.set(4, 3);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 3;
for (Tuple to : outbag) {
assertEquals(1, to.size());
if (count < 10) assertEquals(new Integer(count++), to.get(0));
else assertNull(to.get(0));
}
}
@Test
public void testLeadWithRowsAheadDefault() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(6);
t.set(0, inbag);
t.set(1, "lead");
t.set(2, -1);
t.set(3, -1);
t.set(4, 3);
t.set(5, 99);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 3;
for (Tuple to : outbag) {
assertEquals(1, to.size());
if (count < 10) assertEquals(new Integer(count++), to.get(0));
else assertEquals(new Integer(99), to.get(0));
}
}
@Test
public void testLagDefaults() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(4);
t.set(0, inbag);
t.set(1, "lag");
t.set(2, -1);
t.set(3, -1);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = -1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
try {
if (count >= 0) assertEquals(new Integer(count), to.get(0));
else assertNull(to.get(0));
} finally {
count++;
}
}
}
@Test
public void testLagWithRowsBehindNoDefault() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "lag");
t.set(2, -1);
t.set(3, -1);
t.set(4, 3);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = -3;
for (Tuple to : outbag) {
assertEquals(1, to.size());
try {
if (count >= 0) assertEquals(new Integer(count), to.get(0));
else assertNull(to.get(0));
} finally {
count++;
}
}
}
@Test
public void testLagWithRowsBehindDefault() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(6);
t.set(0, inbag);
t.set(1, "lag");
t.set(2, -1);
t.set(3, -1);
t.set(4, 3);
t.set(5, 99);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = -3;
for (Tuple to : outbag) {
assertEquals(1, to.size());
try {
if (count >= 0) assertEquals(new Integer(count), to.get(0));
else assertEquals(new Integer(99), to.get(0));
} finally {
count++;
}
}
}
@Test
public void testRankNoArgs() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "rank");
boolean caught = false;
try {
DataBag outbag = func.exec(t);
} catch (ExecException ioe) {
caught = true;
assertTrue(ioe.getMessage().contains("Rank args must contain"));
}
assertTrue(caught);
}
@Test
public void testRankBadArgs() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, "fred");
boolean caught = false;
try {
DataBag outbag = func.exec(t);
} catch (ExecException ioe) {
caught = true;
assertTrue(ioe.getMessage().contains("Rank expected column number"));
}
assertTrue(caught);
}
@Test
public void testRankSimple() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(count++, to.get(0));
}
}
@Test
public void testRankWithRepeatValues() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 2);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 7);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(1, t.get(0));
t = iter.next();
assertEquals(1, t.get(0));
t = iter.next();
assertEquals(3, t.get(0));
t = iter.next();
assertEquals(4, t.get(0));
t = iter.next();
assertEquals(4, t.get(0));
t = iter.next();
assertEquals(4, t.get(0));
t = iter.next();
assertEquals(7, t.get(0));
}
@Test
public void testRankWithMultiKey() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
Tuple t = TupleFactory.getInstance().newTuple(3);
t.set(0, null);
t.set(1, r.nextInt(100));
t.set(2, "a");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, null);
t.set(1, r.nextInt(100));
t.set(2, "b");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 2);
t.set(1, r.nextInt(100));
t.set(2, "b");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t.set(2, "b");
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 5);
t.set(1, r.nextInt(100));
t.set(2, "c");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 5);
t.set(1, r.nextInt(100));
t.set(2, "c");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 7);
t.set(1, r.nextInt(100));
t.set(2, "z");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(6);
t.set(0, inbag);
t.set(1, "rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
t.set(5, 2);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(1, t.get(0));
t = iter.next();
assertEquals(2, t.get(0));
t = iter.next();
assertEquals(3, t.get(0));
t = iter.next();
assertEquals(4, t.get(0));
t = iter.next();
assertEquals(5, t.get(0));
t = iter.next();
assertEquals(5, t.get(0));
t = iter.next();
assertEquals(7, t.get(0));
}
@Test
public void testDenseRankSimple() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "dense_rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(count++, to.get(0));
}
}
@Test
public void testDenseRankWithRepeatValues() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 2);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 7);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "dense_rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(1, t.get(0));
t = iter.next();
assertEquals(1, t.get(0));
t = iter.next();
assertEquals(2, t.get(0));
t = iter.next();
assertEquals(3, t.get(0));
t = iter.next();
assertEquals(3, t.get(0));
t = iter.next();
assertEquals(3, t.get(0));
t = iter.next();
assertEquals(4, t.get(0));
}
@Test
public void testDenseRankWithMultiKey() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
Tuple t = TupleFactory.getInstance().newTuple(3);
t.set(0, null);
t.set(1, r.nextInt(100));
t.set(2, "a");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, null);
t.set(1, r.nextInt(100));
t.set(2, "b");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 2);
t.set(1, r.nextInt(100));
t.set(2, "b");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t.set(2, "b");
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 5);
t.set(1, r.nextInt(100));
t.set(2, "c");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 5);
t.set(1, r.nextInt(100));
t.set(2, "c");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(3);
t.set(0, 7);
t.set(1, r.nextInt(100));
t.set(2, "z");
inbag.add(t);
t = TupleFactory.getInstance().newTuple(6);
t.set(0, inbag);
t.set(1, "dense_rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
t.set(5, 2);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(1, t.get(0));
t = iter.next();
assertEquals(2, t.get(0));
t = iter.next();
assertEquals(3, t.get(0));
t = iter.next();
assertEquals(4, t.get(0));
t = iter.next();
assertEquals(5, t.get(0));
t = iter.next();
assertEquals(5, t.get(0));
t = iter.next();
assertEquals(6, t.get(0));
}
@Test
public void testNtileNoArgs() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, inbag);
t.set(1, "ntile");
boolean caught = false;
try {
DataBag outbag = func.exec(t);
} catch (ExecException ioe) {
caught = true;
assertTrue(ioe.getMessage().contains("Ntile args must contain"));
}
assertTrue(caught);
}
@Test
public void testNtileBadArgs() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, i);
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "ntile");
t.set(2, -1);
t.set(3, -1);
t.set(4, "fred");
boolean caught = false;
try {
DataBag outbag = func.exec(t);
} catch (ExecException ioe) {
caught = true;
assertTrue(ioe.getMessage().contains("Ntile expected integer"));
}
assertTrue(caught);
}
@Test
public void testNtileFour() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "ntile");
t.set(2, -1);
t.set(3, -1);
t.set(4, 4);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
if (count < 3) assertEquals(1, to.get(0));
else if (count < 5) assertEquals(2, to.get(0));
else if (count < 8) assertEquals(3, to.get(0));
else assertEquals(4, to.get(0));
count++;
}
}
@Test
public void testNtileTen() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "ntile");
t.set(2, -1);
t.set(3, -1);
t.set(4, 10);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(count, to.get(0));
count++;
}
}
@Test
public void testNtileHundred() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "ntile");
t.set(2, -1);
t.set(3, -1);
t.set(4, 100);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(count, to.get(0));
count++;
}
}
@Test
public void testPercentRankSimple() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "percent_rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 0;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(count/9.0, to.get(0));
count++;
}
}
@Test
public void testPercentRankWithRepeatValues() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 2);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 7);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "percent_rank");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(0.0, t.get(0));
t = iter.next();
assertEquals(0.0, t.get(0));
t = iter.next();
assertEquals(0.3333333333333333, t.get(0));
t = iter.next();
assertEquals(0.5, t.get(0));
t = iter.next();
assertEquals(0.5, t.get(0));
t = iter.next();
assertEquals(0.5, t.get(0));
t = iter.next();
assertEquals(1.0, t.get(0));
}
@Test
public void testCumeDistSimple() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
for (int i = 0; i < 10; i++) {
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, i);
t.set(1, r.nextInt(100));
inbag.add(t);
}
Tuple t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "cume_dist");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(10, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(count/10.0, to.get(0));
count++;
}
}
@Test
public void testCumeDistWithRepeatValues() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
Random r = new Random();
Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, null);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 2);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 5);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(2);
t.set(0, 7);
t.set(1, r.nextInt(100));
inbag.add(t);
t = TupleFactory.getInstance().newTuple(5);
t.set(0, inbag);
t.set(1, "cume_dist");
t.set(2, -1);
t.set(3, -1);
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
int count = 1;
for (Tuple to : outbag) {
assertEquals(1, to.size());
assertEquals(count/7.0, to.get(0));
count++;
}
/*
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(0.14285714285714285, t.get(0));
t = iter.next();
assertEquals(0.14285714285714285, t.get(0));
t = iter.next();
assertEquals(0.42857142857142855, t.get(0));
t = iter.next();
assertEquals(0.5714285714285714, t.get(0));
t = iter.next();
assertEquals(0.5714285714285714, t.get(0));
t = iter.next();
assertEquals(0.5714285714285714, t.get(0));
t = iter.next();
assertEquals(1.0, t.get(0));
*/
}
}