/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.unittest; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.util.Iterator; import junit.framework.Assert; import org.junit.Test; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.mapred.MapperBase; import com.aliyun.odps.mapred.conf.JobConf; import com.aliyun.odps.mapred.utils.InputUtils; import com.aliyun.odps.mapred.utils.OutputUtils; import com.aliyun.odps.mapred.utils.SchemaUtils; import com.aliyun.odps.mapred.unittest.*; public class ReadArchiveResourceTest extends MRUnitTest { private final static String INPUT_SCHEMA = "a:string,b:string"; private final static String OUTPUT_SCHEMA = "k:string,v:bigint"; private static JobConf job = new JobConf(); public static class ArchiveMapper extends MapperBase { private void readCacheArchive(TaskContext context, String path) throws IOException { Record record = context.createOutputRecord(); StringBuilder importdata = new StringBuilder(); Iterable<BufferedInputStream> bufferedInputs = null; try { byte[] buffer = new byte[1024]; int bytesRead = 0; String resourceName = context.getJobConf().get("test_archive"); bufferedInputs = path.equals("all") ? context.readResourceArchiveAsStream(resourceName) : context .readResourceArchiveAsStream(resourceName, path); Iterator<BufferedInputStream> iter = bufferedInputs.iterator(); while (iter != null && iter.hasNext()) { BufferedInputStream bufInput = iter.next(); while ((bytesRead = bufInput.read(buffer)) != -1) { String chunk = new String(buffer, 0, bytesRead); importdata.append(chunk); } } String lines[] = importdata.toString().split("\n"); for (int i = 0; i < lines.length; i++) { String[] ss = lines[i].split(","); record.set(0, new String(ss[0].trim())); record.set(1, new Long(Long.parseLong(ss[1].trim()))); context.write(record); } } catch (IOException ex) { throw new IOException(ex); } finally { } } @Override public void setup(TaskContext context) throws IOException { // test for readCacheArchiveAsStream(String resourceName) readCacheArchive(context, "all"); // test for readCacheArchiveAsStream(String resourceName, String relativePath) readCacheArchive(context, "archive_test/test/2.txt"); } } public ReadArchiveResourceTest() { job.setMapperClass(ArchiveMapper.class); job.set("test_archive", "archive_test.tar.gz"); job.setNumReduceTasks(0); job.setMapOutputKeySchema(SchemaUtils.fromString("key:string")); job.setMapOutputValueSchema(SchemaUtils.fromString("value:bigint")); InputUtils.addTable(TableInfo.builder().tableName("mr_empty").build(), job); OutputUtils.addTable(TableInfo.builder().tableName("archive_out").build(), job); } @Test public void TestMapReduce() throws IOException, ClassNotFoundException, InterruptedException { // prepare test data MapUTContext mapContext = new MapUTContext(); mapContext.setInputSchema(INPUT_SCHEMA); mapContext.setOutputSchema(OUTPUT_SCHEMA, job); File archiveFile = new File("src/test/resources/data/archive_test.tar.gz"); mapContext.setArchiveResource("archive_test.tar.gz", archiveFile); // run mapper TaskOutput mapOutput = runMapper(job, mapContext); // verify results Assert.assertEquals(6, mapOutput.getTotalRecordCount()); Assert.assertTrue(equalRecords(new File("src/test/resources/data/archive_out"), mapOutput.getOutputRecords(), false)); } }