package com.esri.json.hadoop;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
public class TestUnenclosedEsriJsonRecordMrv1 {
private UnenclosedEsriJsonRecordReader getReaderFor(String resource, int start, int end) throws IOException {
Path path = new Path(this.getClass().getResource(resource).getFile());
JobConf conf = new JobConf();
FileSplit split = new FileSplit(path, start, end - start, new String[0]);
return new UnenclosedEsriJsonRecordReader(split, conf);
}
int [] getRecordIndexesInReader(UnenclosedEsriJsonRecordReader reader) throws IOException {
return getRecordIndexesInReader(reader, false);
}
int [] getRecordIndexesInReader(UnenclosedEsriJsonRecordReader reader, boolean flag) throws IOException {
List<Integer> linesList = new LinkedList<Integer>();
LongWritable key = reader.createKey();
Text value = reader.createValue();
while (reader.next(key, value)) {
int line = flag ? (int)(key.get()) : value.toString().charAt(23) - '0';
linesList.add(line);
//System.out.println(key.get() + " - " + value.toString());
}
int [] lines = new int[linesList.size()];
for (int i=0;i<linesList.size();i++) {
lines[i] = linesList.get(i);
}
return lines;
}
@Test
public void TestArbitrarySplitLocations() throws IOException {
//int totalSize = 415;
//int [] recordBreaks = new int[] { 0, 40, 80, 120, 160, 200, 240, 280, 320, 372 };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 0, 40)));
Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 0, 41)));
Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 0, 42)));
Assert.assertArrayEquals(new int[] { 1, 2, 3 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 39, 123)));
Assert.assertArrayEquals(new int[] { 1, 2, 3 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 20, 123)));
Assert.assertArrayEquals(new int[] { 1, 2, 3 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 40, 123)));
Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 41, 123)));
Assert.assertArrayEquals(new int[] { 6, 7, 8 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 240, 340)));
Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 353, 415)));
Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 354, 415)));
Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 355, 415)));
}
@Test
public void TestEachOnce() throws IOException {
//Each record exactly once - see commit b8f6d6dfaf11cce7d8cba54e6011e8684ade0e85, issue #68
Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 0, 63)));
Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 63, 121)));
Assert.assertArrayEquals(new int[] { 4 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 121, 187)));
Assert.assertArrayEquals(new int[] { 5, 6 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 187, 264)));
Assert.assertArrayEquals(new int[] { 7, 8 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 264, 352)));
Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 352, 412)));
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 0, 23)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 23, 41)));
// Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-simple.json", 41, 123)));
}
@Test
public void TestWhitespace() throws IOException {
//int [] recordBreaks = new int[] { 0, 57, 111, , };
int[] rslt = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 0, 222), true);
Assert.assertEquals(4, rslt.length);
int[] before = null, after = null;
before = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 0, 56), true);
after = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 56, 222), true);
Assert.assertEquals(4, before.length + after.length);
before = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 0, 57), true);
after = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 57, 222), true);
Assert.assertEquals(4, before.length + after.length);
before = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 0, 58), true);
after = getRecordIndexesInReader(getReaderFor("unenclosed-json-return.json", 58, 222), true);
Assert.assertEquals(4, before.length + after.length);
}
@Ignore // May not be guaranteed behavior
public void TestComma() throws IOException {
//int [] recordBreaks = new int[] { 0, 57, 111, , };
int[] rslt = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 0, 222), true);
Assert.assertEquals(4, rslt.length);
int[] before = null, after = null;
before = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 0, 56), true);
after = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 56, 222), true);
Assert.assertEquals(4, before.length + after.length);
before = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 0, 57), true);
after = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 57, 222), true);
Assert.assertEquals(4, before.length + after.length);
before = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 0, 58), true);
after = getRecordIndexesInReader(getReaderFor("unenclosed-json-comma.json", 58, 222), true);
Assert.assertEquals(4, before.length + after.length);
}
@Test
public void TestEscape() throws IOException { // Issue #68
//int [] recordBreaks = new int[] { 0, 44, 88, 137, 181, 229, 270, 311, 354 }; //length 395
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 0, 44)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 0, 45)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 0, 46)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 43, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 19, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 44, 140)));
Assert.assertArrayEquals(new int[] {2, 3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 45, 140)));
Assert.assertArrayEquals(new int[] {4,5,6}, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 181, 289)));
Assert.assertArrayEquals(new int[] { 8 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 336, 400))); // 7|{}"
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 14, 45)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 22, 45)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 23, 45)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 24, 45)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 25, 45)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 44, 68)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-escape.json", 44, 69)));
}
@Test
public void TestEscQuoteLast() throws IOException {
//int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc1.json", 0, 44)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc1.json", 0, 45)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc1.json", 0, 46)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc1.json", 19, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc1.json", 25, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc1.json", 44, 140)));
}
@Test
public void TestEscAposLast() throws IOException {
//int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc2.json", 0, 44)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc2.json", 0, 45)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc2.json", 0, 46)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc2.json", 19, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc2.json", 26, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc2.json", 43, 140)));
}
@Test
public void TestEscSlashLast() throws IOException {
//int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc3.json", 0, 44)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc3.json", 0, 45)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc3.json", 0, 46)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc3.json", 19, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc3.json", 26, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc3.json", 44, 140)));
}
@Test
public void TestEscCloseLast() throws IOException {
//int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc4.json", 0, 44)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc4.json", 0, 45)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc4.json", 0, 46)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc4.json", 19, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc4.json", 25, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc4.json", 44, 140)));
}
@Test
public void TestEscOpenLast() throws IOException {
//int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 0, 44)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 0, 45)));
Assert.assertArrayEquals(new int[] {0, 1}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 0, 46)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 19, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 26, 140)));
Assert.assertArrayEquals(new int[] {1,2,3}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 44, 140)));
Assert.assertArrayEquals(new int[] { 6 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc5.json", 268, 280)));
}
@Test
public void TestEscPoints() throws IOException {
//int [] recordBreaks = new int[] { 0, 75, 146, 218, 290, 362, , , };
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc-points.json", 0, 74), true));
Assert.assertArrayEquals(new int[] {0, 75}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc-points.json", 0, 76), true));
Assert.assertArrayEquals(new int[] {75, 146}, getRecordIndexesInReader(getReaderFor("unenclosed-json-esc-points.json", 70, 148), true));
}
// This tests some multi-byte characters in UTF-8.
// If implementing a byte-based approach instead of character-based,
// the test itself would probably have to be updated to byte-based offsets
@Test
public void TestCharacters() throws IOException {
//int[] recordBreaks = new int[] { 0, 42, 84, 126, 168, 210, ...}; // character-based offsets
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-chars.json", 0, 42), true));
Assert.assertArrayEquals(new int[] {0,42}, getRecordIndexesInReader(getReaderFor("unenclosed-json-chars.json", 0, 43), true));
Assert.assertArrayEquals(new int[] { 42 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-chars.json", 38, 43), true));
Assert.assertArrayEquals(new int[] { 42 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-chars.json", 39, 43), true));
Assert.assertArrayEquals(new int[] {84,126,168}, getRecordIndexesInReader(getReaderFor("unenclosed-json-chars.json", 43, 200), true));
Assert.assertArrayEquals(new int[] {210,252,294,336}, getRecordIndexesInReader(getReaderFor("unenclosed-json-chars.json", 200, 400), true));
}
@Test
public void TestGeomFirst() throws IOException {
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-geom-first.json", 32, 54)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-geom-first.json", 48, 54)));
Assert.assertArrayEquals(new int[] { 1 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-geom-first.json", 49, 54)));
Assert.assertArrayEquals(new int[] { 0 }, getRecordIndexesInReader(getReaderFor("unenclosed-json-geom-first.json", 0, 52), true));
}
/**
* @deprecated superseded by UnenclosedEsriJsonRecordReader
*/
@Deprecated
@Test
public void TestLegacyName() throws Exception {
Assert.assertArrayEquals(new int[] { 0, 1 }, getRecordIndexesInReader(getLegacyReader("unenclosed-json-simple.json", 0, 63)));
Assert.assertArrayEquals(new int[] { 2, 3 }, getRecordIndexesInReader(getLegacyReader("unenclosed-json-simple.json", 63, 121)));
Assert.assertArrayEquals(new int[] { 4 }, getRecordIndexesInReader(getLegacyReader("unenclosed-json-simple.json", 121, 187)));
Assert.assertArrayEquals(new int[] { 5, 6 }, getRecordIndexesInReader(getLegacyReader("unenclosed-json-simple.json", 187, 264)));
Assert.assertArrayEquals(new int[] { 7, 8 }, getRecordIndexesInReader(getLegacyReader("unenclosed-json-simple.json", 264, 352)));
Assert.assertArrayEquals(new int[] { 9 }, getRecordIndexesInReader(getLegacyReader("unenclosed-json-simple.json", 352, 412)));
}
@Deprecated
private UnenclosedEsriJsonRecordReader getLegacyReader(String resource, int start, int end) throws IOException {
Path path = new Path(this.getClass().getResource(resource).getFile());
JobConf conf = new JobConf();
FileSplit split = new FileSplit(path, start, end - start, new String[0]);
return new UnenclosedJsonRecordReader(split, conf);
}
}