/* * Copyright 2012 NGDATA nv * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.lilyproject.mapreduce.test; import java.io.File; import java.io.IOException; import com.google.common.collect.Lists; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.lilyproject.hadooptestfw.TestHelper; import org.lilyproject.lilyservertestfw.LilyProxy; import org.lilyproject.mapreduce.LilyMapReduceUtil; import org.lilyproject.mapreduce.testjobs.Test1Mapper; import org.lilyproject.repository.api.FieldType; import org.lilyproject.repository.api.IdGenerator; import org.lilyproject.repository.api.LRepository; import org.lilyproject.repository.api.LTable; import org.lilyproject.repository.api.QName; import org.lilyproject.repository.api.RecordScan; import org.lilyproject.repository.api.RecordType; import org.lilyproject.repository.api.RepositoryManager; import org.lilyproject.repository.api.Scope; import org.lilyproject.repository.api.TypeManager; import org.lilyproject.repository.model.api.RepositoryDefinition; import org.lilyproject.repository.model.impl.RepositoryModelImpl; import org.lilyproject.util.test.TestHomeUtil; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.lilyproject.repository.model.api.RepositoryDefinition.RepositoryLifecycleState; public class MapReduceTest { private static LilyProxy lilyProxy; private static File tmpDir; @BeforeClass public static void setUpBeforeClass() throws Exception { TestHelper.setupLogging(); lilyProxy = new LilyProxy(); // // Make multiple record table splits, so that our MR job will have multiple map tasks // if (lilyProxy.getMode() == LilyProxy.Mode.CONNECT || lilyProxy.getMode() == LilyProxy.Mode.HADOOP_CONNECT) { // The record table will likely already exist and not be recreated, hence we won't be able to change // the number of regions. Therefore, drop the table. Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "localhost"); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.tableExists("record")) { hbaseAdmin.disableTable("record"); hbaseAdmin.deleteTable("record"); } HConnectionManager.deleteConnection(hbaseAdmin.getConfiguration(), true); } // Temp dir where we will create conf dir tmpDir = TestHomeUtil.createTestHome("lily-mapreduce-test-"); File customConfDir = setupConfDirectory(tmpDir); String oldCustomConfDir = setProperty("lily.conf.customdir", customConfDir.getAbsolutePath()); String oldRestoreTemplate = setProperty("lily.lilyproxy.restoretemplatedir", "false"); try { lilyProxy.start(); } finally { // Make sure the properties won't be used by later-running tests setProperty("lily.conf.customdir", oldCustomConfDir); setProperty("lily.lilyproxy.restoretemplatedir", oldRestoreTemplate); } } private static String setProperty(String name, String value) { String oldValue = System.getProperty(name); if (value == null) { System.getProperties().remove(name); } else { System.setProperty(name, value); } return oldValue; } @AfterClass public static void tearDownAfterClass() throws Exception { if (lilyProxy != null) { lilyProxy.stop(); } TestHomeUtil.cleanupTestHome(tmpDir); if (lilyProxy.getMode() == LilyProxy.Mode.CONNECT || lilyProxy.getMode() == LilyProxy.Mode.HADOOP_CONNECT) { // We're in connect mode, drop the record table again so that the remainder of the tests // don't have the overhead of the extra splits Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "localhost"); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.tableExists("record")) { hbaseAdmin.disableTable("record"); hbaseAdmin.deleteTable("record"); } HConnectionManager.deleteConnection(hbaseAdmin.getConfiguration(), true); } } private static File setupConfDirectory(File tmpDir) throws Exception { File confDir = new File(tmpDir, "conf"); File generalConfDir = new File(confDir, "general"); FileUtils.forceMkdir(generalConfDir); // Write configuration to activate the decorator String tablesXml = "<tables xmlns:conf='http://lilyproject.org/configuration' conf:inherit='shallow'>" + "<table name='record'><splits><regionCount>5</regionCount>" + "<splitKeys>\\x00020,\\x00040,\\x00060,\\x00080</splitKeys></splits></table>" + "</tables>"; FileUtils.writeStringToFile(new File(generalConfDir, "tables.xml"), tablesXml, "UTF-8"); return confDir; } @Test public void testOne() throws Exception { RepositoryManager repoMgr = lilyProxy.getLilyServerProxy().getClient(); // // Create some content in the default table of the default repository // LRepository repository = repoMgr.getDefaultRepository(); TypeManager typeManager = repository.getTypeManager(); IdGenerator idGenerator = repository.getIdGenerator(); LTable table = repository.getDefaultTable(); FieldType ft1 = typeManager.createFieldType("STRING", new QName("test", "field1"), Scope.NON_VERSIONED); RecordType rt1 = typeManager.recordTypeBuilder() .defaultNamespace("test") .name("rt1") .fieldEntry().use(ft1).add() .create(); for (int i = 0; i < 100; i++) { table.recordBuilder() .id(String.format("%1$03d", i)) .recordType(rt1.getName()) .field(ft1.getName(), "foo bar bar") .create(); } // // Also create some content in another repository with two tables // RepositoryModelImpl repositoryModel = new RepositoryModelImpl(lilyProxy.getLilyServerProxy().getZooKeeper()); String repositoryName = "otherrepo"; repositoryModel.create(repositoryName); assertTrue(repositoryModel.waitUntilRepositoryInState(repositoryName, RepositoryLifecycleState.ACTIVE, 60000L)); repositoryModel.close(); LRepository repository2 = repoMgr.getRepository(repositoryName); repository2.getTableManager().createTable("foobar"); LTable table2 = repository2.getTable("foobar"); LTable table3 = repository2.getDefaultTable(); for (int i = 0; i < 50; i++) { table2.recordBuilder() .id(String.valueOf(i)) .recordType(rt1.getName()) .field(ft1.getName(), "foo bar bar") .create(); } for (int i = 0; i < 20; i++) { table3.recordBuilder() .id(String.valueOf(i)) .recordType(rt1.getName()) .field(ft1.getName(), "foo bar bar") .create(); } // // Launch MapReduce job on default repository // { Configuration config = HBaseConfiguration.create(); config.set("mapred.job.tracker", "localhost:8021"); config.set("fs.defaultFS", "hdfs://localhost:8020"); Job job = new Job(config, "Test1"); job.setJarByClass(Test1Mapper.class); job.setMapperClass(Test1Mapper.class); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); LilyMapReduceUtil.initMapperJob(null, "localhost", repository, job); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } // Verify some counters assertEquals("Number of launched map tasks", 5L, getTotalLaunchedMaps(job)); assertEquals("Number of input records", 100L, getTotalInputRecords(job)); } // // Launch a job with a custom scan on the default repository // { Configuration config = HBaseConfiguration.create(); config.set("mapred.job.tracker", "localhost:8021"); config.set("fs.defaultFS", "hdfs://localhost:8020"); Job job = new Job(config, "Test1"); job.setJarByClass(Test1Mapper.class); job.setMapperClass(Test1Mapper.class); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); RecordScan scan = new RecordScan(); scan.setStartRecordId(idGenerator.newRecordId(String.format("%1$03d", 15))); scan.setStopRecordId(idGenerator.newRecordId(String.format("%1$03d", 25))); LilyMapReduceUtil.initMapperJob(scan, "localhost", repository, job); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } // expect 2 map tasks: our scan crossed the 020 border assertEquals("Number of launched map tasks", 2L, getTotalLaunchedMaps(job)); assertEquals("Number of input records", 10L, getTotalInputRecords(job)); /* for (CounterGroup cgroup: job.getCounters()) { for (Counter counter : cgroup) { System.out.println(cgroup.getName() + " -> " + counter.getName() + " = " + counter.getValue()); } } */ } // // Launch MapReduce job on the custom repository - over all tables // { Configuration config = HBaseConfiguration.create(); config.set("mapred.job.tracker", "localhost:8021"); config.set("fs.defaultFS", "hdfs://localhost:8020"); Job job = new Job(config, "Test1"); job.setJarByClass(Test1Mapper.class); job.setMapperClass(Test1Mapper.class); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); LilyMapReduceUtil.initMapperJob(null, "localhost", repository2, job); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } // Verify some counters assertEquals("Number of input records", 70L, getTotalInputRecords(job)); } // // Launch MapReduce job on the custom repository - over one specific table // { Configuration config = HBaseConfiguration.create(); config.set("mapred.job.tracker", "localhost:8021"); config.set("fs.defaultFS", "hdfs://localhost:8020"); Job job = new Job(config, "Test1"); job.setJarByClass(Test1Mapper.class); job.setMapperClass(Test1Mapper.class); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); LilyMapReduceUtil.initMapperJob(null, "localhost", repository2, job, Lists.newArrayList("foobar")); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } // Verify some counters assertEquals("Number of input records", 50L, getTotalInputRecords(job)); } } private long getTotalLaunchedMaps(Job job) throws IOException { return job.getCounters().findCounter("org.apache.hadoop.mapreduce.JobCounter", "TOTAL_LAUNCHED_MAPS").getValue(); } private long getTotalInputRecords(Job job) throws IOException { return job.getCounters().findCounter("org.apache.hadoop.mapreduce.TaskCounter", "MAP_INPUT_RECORDS").getValue(); } }