/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.util; import java.io.ByteArrayInputStream; import org.apache.hadoop.io.Text; import org.apache.hadoop.util.LineReader; import org.junit.Test; import junit.framework.Assert; public class TestLineReader { private LineReader lineReader; private String TestData; private String Delimiter; private Text line; @Test public void testCustomDelimiter() throws Exception { /* TEST_1 * The test scenario is the tail of the buffer * equals the starting character/s of delimiter * * The Test Data is such that, * * 1) we will have "</entity>" as delimiter * * 2) The tail of the current buffer would be "</" * which matches with the starting character sequence of delimiter. * * 3) The Head of the next buffer would be "id>" * which does NOT match with the remaining characters of delimiter. * * 4) Input data would be prefixed by char 'a' * about numberOfCharToFillTheBuffer times. * So that, one iteration to buffer the input data, * would end at '</' ie equals starting 2 char of delimiter * * 5) For this we would take BufferSize as 64 * 1024; * * Check Condition * In the second key value pair, the value should contain * "</" from currentToken and * "id>" from next token */ Delimiter="</entity>"; String CurrentBufferTailToken= "</entity><entity><id>Gelesh</"; // Ending part of Input Data Buffer // It contains '</' ie delimiter character String NextBufferHeadToken= "id><name>Omathil</name></entity>"; // Supposing the start of next buffer is this String Expected = (CurrentBufferTailToken+NextBufferHeadToken) .replace(Delimiter, ""); // Expected ,must capture from both the buffer, excluding Delimiter String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken; int BufferSize=64 * 1024; int numberOfCharToFillTheBuffer=BufferSize-CurrentBufferTailToken.length(); StringBuilder fillerString=new StringBuilder(); for (int i=0;i<numberOfCharToFillTheBuffer;i++) { fillerString.append('a'); // char 'a' as a filler for the test string } TestData = fillerString + TestPartOfInput; lineReader = new LineReader( new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes()); line = new Text(); lineReader.readLine(line); Assert.assertEquals(fillerString.toString(),line.toString()); lineReader.readLine(line); Assert.assertEquals(Expected, line.toString()); /*TEST_2 * The test scenario is such that, * the character/s preceding the delimiter, * equals the starting character/s of delimiter */ Delimiter = "record"; StringBuilder TestStringBuilder = new StringBuilder(); TestStringBuilder.append(Delimiter+"Kerala "); TestStringBuilder.append(Delimiter+"Bangalore"); TestStringBuilder.append(Delimiter+" North Korea"); TestStringBuilder.append(Delimiter+Delimiter+ "Guantanamo"); TestStringBuilder.append(Delimiter+"ecord"+"recor"+"core"); //~EOF with 're' TestData=TestStringBuilder.toString(); lineReader = new LineReader( new ByteArrayInputStream(TestData.getBytes()),Delimiter.getBytes()); lineReader.readLine(line); Assert.assertEquals("",line.toString()); lineReader.readLine(line); Assert.assertEquals("Kerala ",line.toString()); lineReader.readLine(line); Assert.assertEquals("Bangalore",line.toString()); lineReader.readLine(line); Assert.assertEquals(" North Korea",line.toString()); lineReader.readLine(line); Assert.assertEquals("",line.toString()); lineReader.readLine(line); Assert.assertEquals("Guantanamo",line.toString()); lineReader.readLine(line); Assert.assertEquals(("ecord"+"recor"+"core"),line.toString()); } }