/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce.lib.input; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.junit.Test; public class TestLineRecordReader { private void testSplitRecords(String testFileName, long firstSplitLength) throws IOException { URL testFileUrl = getClass().getClassLoader().getResource(testFileName); assertNotNull("Cannot find " + testFileName, testFileUrl); File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); assertTrue("unexpected test data at " + testFile, testFileSize > firstSplitLength); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data without splitting to count the records FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); int numRecordsNoSplits = 0; while (reader.nextKeyValue()) { ++numRecordsNoSplits; } reader.close(); // count the records in the first split split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null); reader = new LineRecordReader(); reader.initialize(split, context); int numRecordsFirstSplit = 0; while (reader.nextKeyValue()) { ++numRecordsFirstSplit; } reader.close(); // count the records in the second split split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[])null); reader = new LineRecordReader(); reader.initialize(split, context); int numRecordsRemainingSplits = 0; while (reader.nextKeyValue()) { ++numRecordsRemainingSplits; } reader.close(); assertEquals("Unexpected number of records in bzip2 compressed split", numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits); } @Test public void testBzip2SplitEndsAtCR() throws IOException { // the test data contains a carriage-return at the end of the first // split which ends at compressed offset 136498 and the next // character is not a linefeed testSplitRecords("blockEndingInCR.txt.bz2", 136498); } @Test public void testBzip2SplitEndsAtCRThenLF() throws IOException { // the test data contains a carriage-return at the end of the first // split which ends at compressed offset 136498 and the next // character is a linefeed testSplitRecords("blockEndingInCRThenLF.txt.bz2", 136498); } }