/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.bridge; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang.RandomStringUtils; import org.junit.Before; import org.junit.Test; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.data.TableInfo.TableInfoBuilder; import com.aliyun.odps.io.LongWritable; import com.aliyun.odps.io.Text; import com.aliyun.odps.mapred.MapperBase; import com.aliyun.odps.mapred.conf.BridgeJobConf; import com.aliyun.odps.mapred.example.WordCount; import com.aliyun.odps.mapred.utils.InputUtils; import com.aliyun.odps.mapred.utils.OutputUtils; import com.aliyun.odps.mapred.utils.SchemaUtils; public class LotMapperUDTFTest { private MockExecutionContext ctx; private BridgeJobConf conf; Object[][] testData = new Object[][]{new Object[]{new Text("to be ")}, new Object[]{new Text("or not to be")}}; Object[][] testDataSame = new Object[][]{new Object[]{new Text("1 1 1")}, new Object[]{new Text("1 1 1 1")}}; class MockMapperUDTF extends LotMapperUDTF { List<Object[]> forwarded = new ArrayList<Object[]>(); Object[][] testData; int testDataIndex = 0; public MockMapperUDTF(BridgeJobConf conf, Object[][] testData) { this.conf = conf; this.testData = testData; } public List<Object[]> getForwarded() { return forwarded; } @Override public void forward(Object... o) { forwarded.add(o.clone()); } public Object[] getNextRowWapper() { if (testDataIndex < testData.length) { return testData[testDataIndex++]; } return null; } public void setTestData(Object[][] testData) { this.testData = testData; } } ; @Before public void setUp() { ctx = new MockExecutionContext(); conf = new BridgeJobConf(); TableInfo tblInfo = new TableInfo.TableInfoBuilder().projectName("prj").tableName("tbl") .cols(new String[]{"col1"}).build(); TableInfo output = new TableInfo.TableInfoBuilder().projectName("prj").tableName("out") .label("foo").build(); ctx.setTableInfo(tblInfo.getProjectName() + "." + tblInfo.getTableName()); InputUtils.addTable(tblInfo, conf); OutputUtils.addTable(output, conf); conf.setOutputSchema(SchemaUtils.fromString("word:string,count:bigint"), "foo"); conf.setInputSchema(tblInfo, SchemaUtils.fromString("word:string")); } @Test public void testProcess() throws Exception { conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(6, forwarded.size()); assertEquals(new Text("to"), forwarded.get(0)[0]); assertEquals(new LongWritable(1), forwarded.get(0)[1]); assertEquals(new Text("be"), forwarded.get(1)[0]); assertEquals(new LongWritable(1), forwarded.get(1)[1]); assertEquals(new Text("or"), forwarded.get(2)[0]); assertEquals(new LongWritable(1), forwarded.get(2)[1]); assertEquals(new Text("not"), forwarded.get(3)[0]); assertEquals(new LongWritable(1), forwarded.get(3)[1]); assertEquals(new Text("to"), forwarded.get(4)[0]); assertEquals(new LongWritable(1), forwarded.get(4)[1]); assertEquals(new Text("be"), forwarded.get(5)[0]); assertEquals(new LongWritable(1), forwarded.get(5)[1]); } @Test public void testCombiner() throws Exception { conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setCombinerClass(WordCount.SumCombiner.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(4, forwarded.size()); assertEquals(new Text("be"), forwarded.get(0)[0]); assertEquals(new LongWritable(2), forwarded.get(0)[1]); assertEquals(new Text("not"), forwarded.get(1)[0]); assertEquals(new LongWritable(1), forwarded.get(1)[1]); assertEquals(new Text("or"), forwarded.get(2)[0]); assertEquals(new LongWritable(1), forwarded.get(2)[1]); assertEquals(new Text("to"), forwarded.get(3)[0]); assertEquals(new LongWritable(2), forwarded.get(3)[1]); } @Test public void testCombinerBuffer() throws Exception { conf.setCombinerCacheItems(2); conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setCombinerClass(WordCount.SumCombiner.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(6, forwarded.size()); assertEquals(new Text("be"), forwarded.get(0)[0]); assertEquals(new Text("to"), forwarded.get(1)[0]); assertEquals(new Text("not"), forwarded.get(2)[0]); assertEquals(new Text("or"), forwarded.get(3)[0]); assertEquals(new Text("be"), forwarded.get(4)[0]); assertEquals(new Text("to"), forwarded.get(5)[0]); } @Test public void testCombinerBufferSameKey() throws Exception { conf.setCombinerCacheItems(2); conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setCombinerClass(WordCount.SumCombiner.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testDataSame); udtf.setup(ctx); udtf.run(); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(4, forwarded.size()); assertEquals(new LongWritable(2), forwarded.get(0)[1]); assertEquals(new LongWritable(2), forwarded.get(1)[1]); assertEquals(new LongWritable(2), forwarded.get(2)[1]); assertEquals(new LongWritable(1), forwarded.get(3)[1]); } @Test public void testEmptyInputWithCombiner() throws Exception { conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setCombinerClass(WordCount.SumCombiner.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(0, forwarded.size()); } @Test public void testEmptyInput() throws Exception { conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); udtf.close(); List<Object[]> forwarded = udtf.getForwarded(); assertEquals(0, forwarded.size()); } @Test public void profile() throws Exception { conf.setMapperClass(WordCount.TokenizerMapper.class); conf.setCombinerClass(WordCount.SumCombiner.class); // conf.setCombinerClass(WordCount.SumCombiner.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); Object[][] testData = new Object[100000][1]; for (int i = 0; i < 100000; i++) { testData[i] = new Object[]{RandomStringUtils.randomAlphabetic(5)}; } udtf.setTestData(testData); udtf.run(); udtf.close(); int sum = 0; for (Object[] item : udtf.getForwarded()) { sum += ((LongWritable) item[1]).get(); } assertEquals(100000, sum); } @Test public void testGetTableInfo() { MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); TableInfoBuilder builder = TableInfo.builder().projectName("foo").tableName("bar"); assertEquals(builder.build(), udtf.getTableInfo(InputUtils.getTables(conf), "foo.bar")); assertEquals(builder.build(), udtf.getTableInfo(InputUtils.getTables(conf), "foo.bar/ds=19970701/hr=19;foo.bar/ds=19970701/hr=20")); builder.partSpec("ds=19970701/hr=19"); assertEquals(builder.build(), udtf.getTableInfo(InputUtils.getTables(conf), "foo.bar/ds=19970701/hr=19")); } public static class ExceptionMapper extends MapperBase { @Override public void map(long key, Record record, TaskContext context) throws IOException { throw new RuntimeException("By design."); } } @Test public void testThrowException() throws Exception { conf.setMapperClass(ExceptionMapper.class); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); try { Object[][] testData = new Object[][]{new Object[]{new Text("to be ")}, new Object[]{new Text("or not to be")}}; udtf.setTestData(testData); udtf.run(); udtf.close(); fail("Not throwing exception."); } catch (Exception e) { assertTrue(e.getMessage().contains("By design.")); } } public static class InvalidLabelMapper extends MapperBase { @Override public void map(long key, Record record, TaskContext context) throws IOException { context.write(record, "nonexist"); } } @Test public void testWriteToInvalidLabel() throws Exception { conf.setMapperClass(InvalidLabelMapper.class); conf.setNumReduceTasks(0); conf.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); conf.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); InputUtils.addTable(TableInfo.builder().projectName("proj").tableName("tb2").label("tb2") .build(), conf); MockMapperUDTF udtf = new MockMapperUDTF(conf, testData); udtf.setup(ctx); try { Object[][] testData = new Object[][]{new Object[]{new Text("to be ")}, new Object[]{new Text("or not to be")}}; udtf.setTestData(testData); udtf.run(); udtf.close(); fail("Not throwing exception."); } catch (Exception e) { assertTrue(e.getMessage().contains(ErrorCode.NO_SUCH_LABEL.toString())); } } public static void main(String[] args) throws Exception { LotMapperUDTFTest test = new LotMapperUDTFTest(); System.in.read(); test.profile(); System.out.println("Done"); System.in.read(); } }