/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package parquet.filter2.recordlevel;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import org.junit.BeforeClass;
import org.junit.Test;
import parquet.example.data.Group;
import parquet.filter2.compat.FilterCompat;
import parquet.filter2.predicate.FilterPredicate;
import parquet.filter2.predicate.Operators.BinaryColumn;
import parquet.filter2.predicate.Operators.DoubleColumn;
import parquet.filter2.predicate.Operators.LongColumn;
import parquet.filter2.predicate.Statistics;
import parquet.filter2.predicate.UserDefinedPredicate;
import parquet.filter2.recordlevel.PhoneBookWriter.Location;
import parquet.filter2.recordlevel.PhoneBookWriter.PhoneNumber;
import parquet.filter2.recordlevel.PhoneBookWriter.User;
import parquet.io.api.Binary;
import static org.junit.Assert.assertEquals;
import static parquet.filter2.predicate.FilterApi.and;
import static parquet.filter2.predicate.FilterApi.binaryColumn;
import static parquet.filter2.predicate.FilterApi.doubleColumn;
import static parquet.filter2.predicate.FilterApi.longColumn;
import static parquet.filter2.predicate.FilterApi.eq;
import static parquet.filter2.predicate.FilterApi.gt;
import static parquet.filter2.predicate.FilterApi.not;
import static parquet.filter2.predicate.FilterApi.notEq;
import static parquet.filter2.predicate.FilterApi.or;
import static parquet.filter2.predicate.FilterApi.userDefined;
public class TestRecordLevelFilters {
public static List<User> makeUsers() {
List<User> users = new ArrayList<User>();
users.add(new User(17, null, null, null));
users.add(new User(18, "bob", null, null));
users.add(new User(19, "alice", new ArrayList<PhoneNumber>(), null));
users.add(new User(20, "thing1", Arrays.asList(new PhoneNumber(5555555555L, null)), null));
users.add(new User(27, "thing2", Arrays.asList(new PhoneNumber(1111111111L, "home")), null));
users.add(new User(28, "popular", Arrays.asList(
new PhoneNumber(1111111111L, "home"),
new PhoneNumber(2222222222L, null),
new PhoneNumber(3333333333L, "mobile")
), null));
users.add(new User(30, null, Arrays.asList(new PhoneNumber(1111111111L, "home")), null));
for (int i = 100; i < 200; i++) {
Location location = null;
if (i % 3 == 1) {
location = new Location((double) i, (double) i * 2);
}
if (i % 3 == 2) {
location = new Location((double) i, null);
}
users.add(new User(i, "p" + i, Arrays.asList(new PhoneNumber(i, "cell")), location));
}
return users;
}
private static File phonebookFile;
private static List<User> users;
@BeforeClass
public static void setup() throws IOException {
users = makeUsers();
phonebookFile = PhoneBookWriter.writeToFile(users);
}
private static interface UserFilter {
boolean keep(User u);
}
private static List<Group> getExpected(UserFilter f) {
List<Group> expected = new ArrayList<Group>();
for (User u : users) {
if (f.keep(u)) {
expected.add(PhoneBookWriter.groupFromUser(u));
}
}
return expected;
}
private static void assertFilter(List<Group> found, UserFilter f) {
List<Group> expected = getExpected(f);
assertEquals(expected.size(), found.size());
Iterator<Group> expectedIter = expected.iterator();
Iterator<Group> foundIter = found.iterator();
while (expectedIter.hasNext()) {
assertEquals(expectedIter.next().toString(), foundIter.next().toString());
}
}
@Test
public void testNoFilter() throws Exception {
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.NOOP);
assertFilter(found, new UserFilter() {
public boolean keep(User u) {
return true;
}
});
}
@Test
public void testAllFilter() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = eq(name, Binary.fromString("no matches"));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertEquals(new ArrayList<Group>(), found);
}
@Test
public void testNameNotNull() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = notEq(name, null);
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
public boolean keep(User u) {
return u.getName() != null;
}
});
}
public static class StartWithP extends UserDefinedPredicate<Binary> {
@Override
public boolean keep(Binary value) {
if (value == null) {
return false;
}
return value.toStringUsingUTF8().startsWith("p");
}
@Override
public boolean canDrop(Statistics<Binary> statistics) {
return false;
}
@Override
public boolean inverseCanDrop(Statistics<Binary> statistics) {
return false;
}
}
public static class SetInFilter extends UserDefinedPredicate<Long> implements Serializable {
private HashSet<Long> hSet;
public SetInFilter(HashSet<Long> phSet) {
hSet = phSet;
}
@Override
public boolean keep(Long value) {
if (value == null) {
return false;
}
return hSet.contains(value);
}
@Override
public boolean canDrop(Statistics<Long> statistics) {
return false;
}
@Override
public boolean inverseCanDrop(Statistics<Long> statistics) {
return false;
}
}
@Test
public void testNameNotStartWithP() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = not(userDefined(name, StartWithP.class));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
public boolean keep(User u) {
return u.getName() == null || !u.getName().startsWith("p");
}
});
}
@Test
public void testUserDefinedByInstance() throws Exception {
LongColumn name = longColumn("id");
final HashSet<Long> h = new HashSet<Long>();
h.add(20L);
h.add(27L);
h.add(28L);
FilterPredicate pred = userDefined(name, new SetInFilter(h));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
public boolean keep(User u) {
return u != null && h.contains(u.getId());
}
});
}
@Test
public void testComplex() throws Exception {
BinaryColumn name = binaryColumn("name");
DoubleColumn lon = doubleColumn("location.lon");
DoubleColumn lat = doubleColumn("location.lat");
FilterPredicate pred = or(and(gt(lon, 150.0), notEq(lat, null)), eq(name, Binary.fromString("alice")));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertFilter(found, new UserFilter() {
public boolean keep(User u) {
String name = u.getName();
Double lat = null;
Double lon = null;
if (u.getLocation() != null) {
lat = u.getLocation().getLat();
lon = u.getLocation().getLon();
}
return (lon != null && lon > 150.0 && lat != null) || "alice".equals(name);
}
});
}
}