package com.esri.json.hadoop; import java.io.IOException; import java.util.LinkedList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; public class TestUnenclosedEsriJsonRecordReader { // MRv2 private TaskAttemptContext createTaskAttemptContext(Configuration conf, TaskAttemptID taid) throws Exception { //shim try { // Hadoop-1 return (TaskAttemptContext)TaskAttemptContext.class. getConstructor(Configuration.class, TaskAttemptID.class). newInstance(conf, taid); } catch (Exception e) { // Hadoop-2 Class<?> clazz = Class.forName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl"); return (TaskAttemptContext)clazz.getConstructor(Configuration.class, TaskAttemptID.class). newInstance(conf, taid); } } private UnenclosedEsriJsonRecordReader getReader() throws IOException { return new UnenclosedEsriJsonRecordReader(); } int [] getRecordIndexesInFile(String resource, int start, int end) throws Exception { return getRecordIndexesInFile(getReader(), resource, start, end); } int [] getRecordIndexesInFile(String resource, int start, int end, boolean flag) throws Exception { return getRecordIndexesInFile(getReader(), resource, start, end, flag); } int [] getRecordIndexesInFile(UnenclosedEsriJsonRecordReader reader, String resource, int start, int end) throws Exception { return getRecordIndexesInFile(reader, resource, start, end, false); } int [] getRecordIndexesInFile(UnenclosedEsriJsonRecordReader reader, String resource, int start, int end, boolean flag) throws Exception { Path path = new Path(this.getClass().getResource(resource).getFile()); FileSplit split = new FileSplit(path, start, end - start, new String[0]); try { TaskAttemptContext tac = createTaskAttemptContext(new Configuration(), new TaskAttemptID()); reader.initialize(split, tac); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } List<Integer> linesList = new LinkedList<Integer>(); LongWritable key = null; Text value = null; try { while (reader.nextKeyValue()) { key = reader.getCurrentKey(); value = reader.getCurrentValue(); int line = flag ? (int)(key.get()) : value.toString().charAt(23) - '0'; linesList.add(line); //System.out.println(key.get() + " - " + value.toString()); } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } int [] lines = new int[linesList.size()]; for (int i=0;i<linesList.size();i++) { lines[i] = linesList.get(i); } return lines; } @Test public void TestArbitrarySplitLocations() throws Exception { //int totalSize = 415; //int [] recordBreaks = new int[] { 0, 40, 80, 120, 160, 200, 240, 280, 320, 372 }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-simple.json", 0, 40)); Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInFile("unenclosed-json-simple.json", 0, 41)); Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInFile("unenclosed-json-simple.json", 0, 42)); Assert.assertArrayEquals(new int[] { 1, 2, 3 }, getRecordIndexesInFile("unenclosed-json-simple.json", 39, 123)); Assert.assertArrayEquals(new int[] { 1, 2, 3 }, getRecordIndexesInFile("unenclosed-json-simple.json", 20, 123)); Assert.assertArrayEquals(new int[] { 1, 2, 3 }, getRecordIndexesInFile("unenclosed-json-simple.json", 40, 123)); Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInFile("unenclosed-json-simple.json", 41, 123)); Assert.assertArrayEquals(new int[] { 6, 7, 8 }, getRecordIndexesInFile("unenclosed-json-simple.json", 240, 340)); Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInFile("unenclosed-json-simple.json", 353, 415)); Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInFile("unenclosed-json-simple.json", 354, 415)); Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInFile("unenclosed-json-simple.json", 355, 415)); } @Test public void TestEachOnce() throws Exception { //Each record exactly once - see commit b8f6d6dfaf11cce7d8cba54e6011e8684ade0e85, issue #68 Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInFile("unenclosed-json-simple.json", 0, 63)); Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInFile("unenclosed-json-simple.json", 63, 121)); Assert.assertArrayEquals(new int[] { 4 }, getRecordIndexesInFile("unenclosed-json-simple.json", 121, 187)); Assert.assertArrayEquals(new int[] { 5, 6 }, getRecordIndexesInFile("unenclosed-json-simple.json", 187, 264)); Assert.assertArrayEquals(new int[] { 7, 8 }, getRecordIndexesInFile("unenclosed-json-simple.json", 264, 352)); Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInFile("unenclosed-json-simple.json", 352, 412)); Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-simple.json", 0, 23)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-simple.json", 23, 41)); // Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInFile("unenclosed-json-simple.json", 41, 123)); } @Test public void TestWhitespace() throws Exception { //int [] recordBreaks = new int[] { 0, 57, 111, , }; int[] rslt = getRecordIndexesInFile("unenclosed-json-return.json", 0, 222, true); Assert.assertEquals(4, rslt.length); int[] before = null, after = null; before = getRecordIndexesInFile("unenclosed-json-return.json", 0, 56, true); after = getRecordIndexesInFile("unenclosed-json-return.json", 56, 222, true); Assert.assertEquals(4, before.length + after.length); before = getRecordIndexesInFile("unenclosed-json-return.json", 0, 57, true); after = getRecordIndexesInFile("unenclosed-json-return.json", 57, 222, true); Assert.assertEquals(4, before.length + after.length); before = getRecordIndexesInFile("unenclosed-json-return.json", 0, 58, true); after = getRecordIndexesInFile("unenclosed-json-return.json", 58, 222, true); Assert.assertEquals(4, before.length + after.length); } @Ignore // May not be guaranteed behavior public void TestComma() throws Exception { //int [] recordBreaks = new int[] { 0, 57, 111, , }; int[] rslt = getRecordIndexesInFile("unenclosed-json-comma.json", 0, 222, true); Assert.assertEquals(4, rslt.length); int[] before = null, after = null; before = getRecordIndexesInFile("unenclosed-json-comma.json", 0, 56, true); after = getRecordIndexesInFile("unenclosed-json-comma.json", 56, 222, true); Assert.assertEquals(4, before.length + after.length); before = getRecordIndexesInFile("unenclosed-json-comma.json", 0, 57, true); after = getRecordIndexesInFile("unenclosed-json-comma.json", 57, 222, true); Assert.assertEquals(4, before.length + after.length); before = getRecordIndexesInFile("unenclosed-json-comma.json", 0, 58, true); after = getRecordIndexesInFile("unenclosed-json-comma.json", 58, 222, true); Assert.assertEquals(4, before.length + after.length); } @Test public void TestAttrNamedAttributes() throws Exception { //int [] recordBreaks = new int[] { 0, 57, 111, , }; int[] rslt = getRecordIndexesInFile("unenclosed-json-attrs.json", 0, 225, true); Assert.assertEquals(5, rslt.length); int[] before = null, after = null; before = getRecordIndexesInFile("unenclosed-json-attrs.json", 0, 59, true); after = getRecordIndexesInFile("unenclosed-json-attrs.json", 59, 225, true); Assert.assertEquals(5, before.length + after.length); before = getRecordIndexesInFile("unenclosed-json-attrs.json", 0, 88, true); after = getRecordIndexesInFile("unenclosed-json-attrs.json", 88, 222, true); Assert.assertEquals(5, before.length + after.length); before = getRecordIndexesInFile("unenclosed-json-attrs.json", 0, 102, true); after = getRecordIndexesInFile("unenclosed-json-attrs.json", 102, 222, true); Assert.assertEquals(5, before.length + after.length); } @Test public void TestEscape() throws Exception { // Issue #68 //int [] recordBreaks = new int[] { 0, 44, 88, 137, 181, 229, 270, 311, 354 }; //length 395 Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-escape.json", 0, 44)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-escape.json", 0, 45)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-escape.json", 0, 46)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-escape.json", 43, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-escape.json", 19, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-escape.json", 44, 140)); Assert.assertArrayEquals(new int[] {2, 3}, getRecordIndexesInFile("unenclosed-json-escape.json", 45, 140)); Assert.assertArrayEquals(new int[] {4,5,6}, getRecordIndexesInFile("unenclosed-json-escape.json", 181, 289)); Assert.assertArrayEquals(new int[] { 8 }, getRecordIndexesInFile("unenclosed-json-escape.json", 336, 400)); // 7|{}" Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 14, 45)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 22, 45)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 23, 45)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 24, 45)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 25, 45)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 44, 68)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-escape.json", 44, 69)); } @Test public void TestEscQuoteLast() throws Exception { //int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-esc1.json", 0, 44)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc1.json", 0, 45)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc1.json", 0, 46)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc1.json", 19, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc1.json", 25, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc1.json", 44, 140)); } @Test public void TestEscAposLast() throws Exception { //int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-esc2.json", 0, 44)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc2.json", 0, 45)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc2.json", 0, 46)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc2.json", 19, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc2.json", 26, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc2.json", 43, 140)); } @Test public void TestEscSlashLast() throws Exception { //int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-esc3.json", 0, 44)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc3.json", 0, 45)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc3.json", 0, 46)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc3.json", 19, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc3.json", 26, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc3.json", 44, 140)); } @Test public void TestEscCloseLast() throws Exception { //int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-esc4.json", 0, 44)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc4.json", 0, 45)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc4.json", 0, 46)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc4.json", 19, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc4.json", 25, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc4.json", 44, 140)); } @Test public void TestEscOpenLast() throws Exception { //int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-esc5.json", 0, 44)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc5.json", 0, 45)); Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInFile("unenclosed-json-esc5.json", 0, 46)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc5.json", 19, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc5.json", 26, 140)); Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInFile("unenclosed-json-esc5.json", 44, 140)); Assert.assertArrayEquals(new int[] { 6 }, getRecordIndexesInFile("unenclosed-json-esc5.json", 268, 280)); } @Test public void TestEscPoints() throws Exception { //int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , }; Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-esc-points.json", 0, 74, true)); Assert.assertArrayEquals(new int[] {0, 75}, getRecordIndexesInFile("unenclosed-json-esc-points.json", 0, 76, true)); Assert.assertArrayEquals(new int[] {75, 146}, getRecordIndexesInFile("unenclosed-json-esc-points.json", 70, 148, true)); } // This tests some multi-byte characters in UTF-8. // If implementing a byte-based approach instead of character-based, // the test itself would probably have to be updated to byte-based offsets // See issue #75 @Ignore public void TestCharacters() throws Exception { //int[] recordBreaks = new int[] { 0, 42, 84, 126, 168, 210, ...}; // character-based offsets Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-chars.json", 0, 42, true)); Assert.assertArrayEquals(new int[] {0,42}, getRecordIndexesInFile("unenclosed-json-chars.json", 0, 43, true)); Assert.assertArrayEquals(new int[] { 42 }, getRecordIndexesInFile("unenclosed-json-chars.json", 38, 43, true)); Assert.assertArrayEquals(new int[] { 42 }, getRecordIndexesInFile("unenclosed-json-chars.json", 39, 43, true)); Assert.assertArrayEquals(new int[] {84,126,168}, getRecordIndexesInFile("unenclosed-json-chars.json", 43, 200, true)); Assert.assertArrayEquals(new int[] {210,252,294,336}, getRecordIndexesInFile("unenclosed-json-chars.json", 200, 400, true)); } @Test public void TestGeomFirst() throws Exception { Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-geom-first.json", 32, 54)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-geom-first.json", 48, 54)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile("unenclosed-json-geom-first.json", 49, 54)); Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile("unenclosed-json-geom-first.json", 0, 52, true)); } /** * @deprecated superseded by UnenclosedEsriJsonRecordReader */ @Deprecated @Test public void TestLegacyName() throws Exception { UnenclosedEsriJsonRecordReader uejrr = new UnenclosedJsonRecordReader(); Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 0, 63)); Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 63, 121)); Assert.assertArrayEquals(new int[] { 4 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 121, 187)); Assert.assertArrayEquals(new int[] { 5, 6 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 187, 264)); Assert.assertArrayEquals(new int[] { 7, 8 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 264, 352)); Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 352, 412)); Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 0, 23)); Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInFile(uejrr, "unenclosed-json-simple.json", 23, 41)); } }