/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred.lib; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import junit.framework.TestCase; public class TestKeyFieldHelper extends TestCase { private static final Log LOG = LogFactory.getLog(TestKeyFieldHelper.class); /** * Test is key-field-helper's parse option. */ public void testparseOption() throws Exception { KeyFieldHelper helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); String keySpecs = "-k1.2,3.4"; String eKeySpecs = keySpecs; helper.parseOption(keySpecs); String actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); // test -k a.b keySpecs = "-k 1.2"; eKeySpecs = "-k1.2,0.0"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-nr -k1.2,3.4"; eKeySpecs = "-k1.2,3.4nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-nr -k1.2,3.4n"; eKeySpecs = "-k1.2,3.4n"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-nr -k1.2,3.4r"; eKeySpecs = "-k1.2,3.4r"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-nr -k1.2,3.4 -k5.6,7.8n -k9.10,11.12r -k13.14,15.16nr"; //1st eKeySpecs = "-k1.2,3.4nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); // 2nd eKeySpecs = "-k5.6,7.8n"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); //3rd eKeySpecs = "-k9.10,11.12r"; actKeySpecs = helper.keySpecs().get(2).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); //4th eKeySpecs = "-k13.14,15.16nr"; actKeySpecs = helper.keySpecs().get(3).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2n,3.4"; eKeySpecs = "-k1.2,3.4n"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2r,3.4"; eKeySpecs = "-k1.2,3.4r"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2nr,3.4"; eKeySpecs = "-k1.2,3.4nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2,3.4n"; eKeySpecs = "-k1.2,3.4n"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2,3.4r"; eKeySpecs = "-k1.2,3.4r"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2,3.4nr"; eKeySpecs = "-k1.2,3.4nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-nr -k1.2,3.4 -k5.6,7.8"; eKeySpecs = "-k1.2,3.4nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); eKeySpecs = "-k5.6,7.8nr"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-n -k1.2,3.4 -k5.6,7.8"; eKeySpecs = "-k1.2,3.4n"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); eKeySpecs = "-k5.6,7.8n"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-r -k1.2,3.4 -k5.6,7.8"; eKeySpecs = "-k1.2,3.4r"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); eKeySpecs = "-k5.6,7.8r"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2,3.4n -k5.6,7.8"; eKeySpecs = "-k1.2,3.4n"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); eKeySpecs = "-k5.6,7.8"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2,3.4r -k5.6,7.8"; eKeySpecs = "-k1.2,3.4r"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); eKeySpecs = "-k5.6,7.8"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-k1.2,3.4nr -k5.6,7.8"; eKeySpecs = "-k1.2,3.4nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); eKeySpecs = "-k5.6,7.8"; actKeySpecs = helper.keySpecs().get(1).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-n"; eKeySpecs = "-k1.1,0.0n"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-r"; eKeySpecs = "-k1.1,0.0r"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); keySpecs = "-nr"; eKeySpecs = "-k1.1,0.0nr"; helper = new KeyFieldHelper(); helper.parseOption(keySpecs); actKeySpecs = helper.keySpecs().get(0).toString(); assertEquals("KeyFieldHelper's parsing is garbled", eKeySpecs, actKeySpecs); } /** * Test is key-field-helper's getWordLengths. */ public void testGetWordLengths() throws Exception { KeyFieldHelper helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); // test getWordLengths with unspecified key-specifications String input = "hi"; int[] result = helper.getWordLengths(input.getBytes(), 0, 2); assertTrue(equals(result, new int[] {1})); // set the key specs helper.setKeyFieldSpec(1, 2); // test getWordLengths with 3 words input = "hi\thello there"; result = helper.getWordLengths(input.getBytes(), 0, input.length()); assertTrue(equals(result, new int[] {2, 2, 11})); // test getWordLengths with 4 words but with a different separator helper.setKeyFieldSeparator(" "); input = "hi hello\tthere you"; result = helper.getWordLengths(input.getBytes(), 0, input.length()); assertTrue(equals(result, new int[] {3, 2, 11, 3})); // test with non zero start index input = "hi hello there you where me there"; // ..................... result = helper.getWordLengths(input.getBytes(), 10, 33); assertTrue(equals(result, new int[] {5, 4, 3, 5, 2, 3})); input = "hi hello there you where me "; // .................. result = helper.getWordLengths(input.getBytes(), 10, input.length()); assertTrue(equals(result, new int[] {5, 4, 3, 5, 2, 0})); input = ""; result = helper.getWordLengths(input.getBytes(), 0, 0); assertTrue(equals(result, new int[] {1, 0})); input = " abc"; result = helper.getWordLengths(input.getBytes(), 0, 5); assertTrue(equals(result, new int[] {3, 0, 0, 3})); input = " abc"; result = helper.getWordLengths(input.getBytes(), 0, 2); assertTrue(equals(result, new int[] {3, 0, 0, 0})); input = " abc "; result = helper.getWordLengths(input.getBytes(), 0, 2); assertTrue(equals(result, new int[] {2, 0, 1})); helper.setKeyFieldSeparator("abcd"); input = "abc"; result = helper.getWordLengths(input.getBytes(), 0, 3); assertTrue(equals(result, new int[] {1, 3})); } /** * Test is key-field-helper's getStartOffset/getEndOffset. */ public void testgetStartEndOffset() throws Exception { KeyFieldHelper helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); // test getStartOffset with -k1,2 helper.setKeyFieldSpec(1, 2); String input = "hi\thello"; String expectedOutput = input; testKeySpecs(input, expectedOutput, helper); // test getStartOffset with -k1.0,0 .. should result into start = -1 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k1.0,0"); testKeySpecs(input, null, helper); // test getStartOffset with -k1,0 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k1,0"); expectedOutput = input; testKeySpecs(input, expectedOutput, helper); // test getStartOffset with -k1.2,0 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k1.2,0"); expectedOutput = "i\thello"; testKeySpecs(input, expectedOutput, helper); // test getWordLengths with -k1.0,2.3 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k1.1,2.3"); expectedOutput = "hi\thel"; testKeySpecs(input, expectedOutput, helper); // test getWordLengths with -k1.2,2.3 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k1.2,2.3"); expectedOutput = "i\thel"; testKeySpecs(input, expectedOutput, helper); // test getStartOffset with -k1.2,3.0 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k1.2,3.0"); expectedOutput = "i\thello"; testKeySpecs(input, expectedOutput, helper); // test getStartOffset with -k2,2 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k2,2"); expectedOutput = "hello"; testKeySpecs(input, expectedOutput, helper); // test getStartOffset with -k3.0,4.0 helper = new KeyFieldHelper(); helper.setKeyFieldSeparator("\t"); helper.parseOption("-k3.1,4.0"); testKeySpecs(input, null, helper); // test getStartOffset with -k2.1 helper = new KeyFieldHelper(); input = "123123123123123hi\thello\thow"; helper.setKeyFieldSeparator("\t"); helper.parseOption("-k2.1"); expectedOutput = "hello\thow"; testKeySpecs(input, expectedOutput, helper, 15, input.length()); // test getStartOffset with -k2.1,4 with end ending on \t helper = new KeyFieldHelper(); input = "123123123123123hi\thello\t\thow\tare"; helper.setKeyFieldSeparator("\t"); helper.parseOption("-k2.1,3"); expectedOutput = "hello\t"; testKeySpecs(input, expectedOutput, helper, 17, input.length()); // test getStartOffset with -k2.1 with end ending on \t helper = new KeyFieldHelper(); input = "123123123123123hi\thello\thow\tare"; helper.setKeyFieldSeparator("\t"); helper.parseOption("-k2.1"); expectedOutput = "hello\thow\t"; testKeySpecs(input, expectedOutput, helper, 17, 28); // test getStartOffset with -k2.1,3 with smaller length helper = new KeyFieldHelper(); input = "123123123123123hi\thello\thow"; helper.setKeyFieldSeparator("\t"); helper.parseOption("-k2.1,3"); expectedOutput = "hello"; testKeySpecs(input, expectedOutput, helper, 15, 23); } private void testKeySpecs(String input, String expectedOutput, KeyFieldHelper helper) { testKeySpecs(input, expectedOutput, helper, 0, -1); } private void testKeySpecs(String input, String expectedOutput, KeyFieldHelper helper, int s1, int e1) { LOG.info("input : " + input); String keySpecs = helper.keySpecs().get(0).toString(); LOG.info("keyspecs : " + keySpecs); byte[] inputBytes = input.getBytes(); // get the input bytes if (e1 == -1) { e1 = inputBytes.length; } LOG.info("length : " + e1); // get the word lengths int[] indices = helper.getWordLengths(inputBytes, s1, e1); // get the start index int start = helper.getStartOffset(inputBytes, s1, e1, indices, helper.keySpecs().get(0)); LOG.info("start : " + start); if (expectedOutput == null) { assertEquals("Expected -1 when the start index is invalid", -1, start); return; } // get the end index int end = helper.getEndOffset(inputBytes, s1, e1, indices, helper.keySpecs().get(0)); LOG.info("end : " + end); //my fix end = (end >= inputBytes.length) ? inputBytes.length -1 : end; int length = end + 1 - start; LOG.info("length : " + length); byte[] outputBytes = new byte[length]; System.arraycopy(inputBytes, start, outputBytes, 0, length); String output = new String(outputBytes); LOG.info("output : " + output); LOG.info("expected-output : " + expectedOutput); assertEquals(keySpecs + " failed on input '" + input + "'", expectedOutput, output); } // check for equality of 2 int arrays private boolean equals(int[] test, int[] expected) { // check array length if (test[0] != expected[0]) { return false; } // if length is same then check the contents for (int i = 0; i < test[0] && i < expected[0]; ++i) { if (test[i] != expected[i]) { return false; } } return true; } }