/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.pig.backend.hadoop.accumulo; import java.io.File; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.ZooKeeperInstance; import org.apache.accumulo.core.client.admin.TableOperations; import org.apache.accumulo.core.client.security.tokens.PasswordToken; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.minicluster.MiniAccumuloCluster; import org.apache.accumulo.minicluster.MiniAccumuloConfig; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.apache.pig.PigServer; import org.apache.pig.data.Tuple; import org.apache.pig.test.Util; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.io.Files; public class TestAccumuloPigCluster { @SuppressWarnings("unchecked") private static final List<ImmutableMap<String, String>> AIRPORTS = Lists .newArrayList( ImmutableMap.of("code", "SJC", "name", "San Jose"), ImmutableMap.of("code", "SFO", "name", "San Francisco"), ImmutableMap.of("code", "MDO", "name", "Orlando"), ImmutableMap.of("code", "MDW", "name", "Chicago-Midway"), ImmutableMap.of("code", "JFK", "name", "JFK International"), ImmutableMap.of("code", "BWI", "name", "Baltimore-Washington")); @SuppressWarnings("unchecked") private static final List<ImmutableMap<String, String>> flightData = Lists .newArrayList( ImmutableMap.of("origin", "BWI", "destination", "SFO"), ImmutableMap.of("origin", "BWI", "destination", "SJC"), ImmutableMap.of("origin", "MDW", "destination", "MDO"), ImmutableMap.of("origin", "MDO", "destination", "SJC"), ImmutableMap.of("origin", "SJC", "destination", "JFK"), ImmutableMap.of("origin", "JFK", "destination", "MDW")); private static final Logger log = Logger .getLogger(TestAccumuloPigCluster.class); private static final File tmpdir = Files.createTempDir(); private static MiniAccumuloCluster accumuloCluster; private PigServer pig; @BeforeClass public static void setupClusters() throws Exception { MiniAccumuloConfig macConf = new MiniAccumuloConfig(tmpdir, "password"); macConf.setNumTservers(1); accumuloCluster = new MiniAccumuloCluster(macConf); accumuloCluster.start(); } @Before public void beforeTest() throws Exception { pig = new PigServer(Util.getLocalTestMode()); } @AfterClass public static void stopClusters() throws Exception { accumuloCluster.stop(); Thread.sleep(5000); FileUtils.deleteDirectory(tmpdir); } private void loadTestData() throws Exception { ZooKeeperInstance inst = new ZooKeeperInstance( accumuloCluster.getInstanceName(), accumuloCluster.getZooKeepers()); Connector c = inst.getConnector("root", new PasswordToken("password")); TableOperations tops = c.tableOperations(); if (!tops.exists("airports")) { tops.create("airports"); } if (!tops.exists("flights")) { tops.create("flights"); } BatchWriterConfig config = new BatchWriterConfig(); config.setMaxWriteThreads(1); config.setMaxLatency(100000l, TimeUnit.MILLISECONDS); config.setMaxMemory(10000l); BatchWriter bw = c.createBatchWriter("airports", config); try { int i = 1; for (Map<String, String> record : AIRPORTS) { Mutation m = new Mutation(Integer.toString(i)); for (Entry<String, String> entry : record.entrySet()) { m.put(entry.getKey(), "", entry.getValue()); } bw.addMutation(m); i++; } } finally { if (null != bw) { bw.close(); } } bw = c.createBatchWriter("flights", config); try { int i = 1; for (Map<String, String> record : flightData) { Mutation m = new Mutation(Integer.toString(i)); for (Entry<String, String> entry : record.entrySet()) { m.put(entry.getKey(), "", entry.getValue()); } bw.addMutation(m); i++; } } finally { if (null != bw) { bw.close(); } } } @Test public void test() throws Exception { loadTestData(); final String loadFlights = "flights = LOAD 'accumulo://flights?instance=" + accumuloCluster.getInstanceName() + "&user=root&password=password&zookeepers=" + accumuloCluster.getZooKeepers() + "' using org.apache.pig.backend.hadoop.accumulo.AccumuloStorage()" + " as (rowKey:chararray, column_map:map[]);"; final String loadAirports = "airports = LOAD 'accumulo://airports?instance=" + accumuloCluster.getInstanceName() + "&user=root&password=password&zookeepers=" + accumuloCluster.getZooKeepers() + "' using org.apache.pig.backend.hadoop.accumulo.AccumuloStorage()" + " as (rowKey:chararray, column_map:map[]);"; final String joinQuery = "joined = JOIN flights BY column_map#'origin', airports BY column_map#'code';"; pig.registerQuery(loadFlights); pig.registerQuery(loadAirports); pig.registerQuery(joinQuery); Iterator<Tuple> it = pig.openIterator("joined"); int i = 0; while (it.hasNext()) { Tuple t = it.next(); // id and map for each dataset we joined Assert.assertEquals(4, t.size()); Object o = t.get(1); Map<String, String> airport = null, flight = null; Assert.assertTrue(Map.class.isAssignableFrom(o.getClass())); @SuppressWarnings("unchecked") Map<String, String> data1 = (Map<String, String>) o; Assert.assertTrue(!data1.isEmpty()); if (data1.containsKey("origin")) { flight = data1; } else if (data1.containsKey("code")) { airport = data1; } else { Assert.fail("Received map which did not contain an expected key"); } o = t.get(3); Assert.assertTrue(Map.class.isAssignableFrom(o.getClass())); @SuppressWarnings("unchecked") Map<String, String> data2 = (Map<String, String>) o; Assert.assertTrue(!data2.isEmpty()); if (null == flight && data2.containsKey("origin")) { flight = data2; } else if (null == airport && data2.containsKey("code")) { airport = data2; } else { Assert.fail("Received map which did not contain an expected key"); } Assert.assertTrue(null != airport && null != flight); Assert.assertEquals(airport.get("code"), flight.get("origin")); i++; } Assert.assertEquals(6, i); } }