/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.input.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.mapreduce.input.impl.XMLInputFormat.XMLRecordReader;
import org.kiji.schema.KijiClientTest;
public class TestXMLInputFormat extends KijiClientTest {
private static final Logger LOG = LoggerFactory.getLogger(TestXMLInputFormat.class);
@Test
public void testReadTwice() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader br = new BufferedReader(new StringReader("12345<user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
br,
key,
sb));
assertEquals(5, key.get());
assertEquals("<user>", sb.toString());
assertFalse(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
br,
new LongWritable(),
new StringBuilder()));
}
@Test
public void testNoRecord() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("There's no record in here."));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
Text record = new Text();
bReader.mark(1000);
assertFalse(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("", sb.toString());
bReader.reset();
assertFalse(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
100L, // End offset
100L, // Overrun allowance
sb,
record));
assertEquals("There's no record in here.", sb.toString());
assertEquals("", record.toString());
}
@Test
public void testRegularRecord() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<user></user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
Text record = new Text();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("<user>", sb.toString());
assertTrue(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
100L, // End offset
100L, // Overrun allowance
sb,
record));
assertEquals("<user></user>", record.toString());
}
@Test
public void testRecordBeginsAfterSplit() throws java.io.IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("123456<user></user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
assertFalse(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
3L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("", sb.toString());
}
@Test
public void testRecordStartCrossesSplit() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<user></user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
3L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("<user>", sb.toString());
}
@Test
public void testRecordEndsAfterSplit() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<user></user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
7L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("<user>", sb.toString());
}
@Test
public void testTooLongRecord() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<user></user>"));
StringBuilder sb = new StringBuilder();
Text record = new Text();
bReader.mark(1000);
// Small overrun allowance will break before finding the end of the record.
assertFalse(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
8L, // End offset
1L, // Overrun allowance
sb,
record));
assertEquals("<user></us", sb.toString());
assertEquals("", record.toString());
reader = new XMLRecordReader();
sb = new StringBuilder();
bReader.reset();
// Large overrun allowance will find the end of the record.
assertTrue(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
8L, // End offset
10L, // Overrun allowance
sb,
record));
assertEquals("<user></user>", sb.toString());
assertEquals("<user></user>", record.toString());
}
@Test
public void testCloseMatches() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<use></use> <users></users>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
Text record = new Text();
bReader.mark(1000);
assertFalse(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("", sb.toString());
bReader.reset();
assertFalse(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
100L, // End offset
100L, // Overrun allowance
sb,
record));
assertEquals("<use></use> <users></users>", sb.toString());
assertEquals("", record.toString());
}
@Test
public void testCompleteRecord() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("1<user><name>Bob</name></user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
Text record = new Text();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertTrue(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
100L, // End offset
100L, // Overrun allowance
sb,
record));
assertEquals(1, key.get());
assertEquals("<user><name>Bob</name></user>", record.toString());
}
@Test
public void testWhitespace() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<user id=\"1\">"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("<user ", sb.toString());
reader = new XMLRecordReader();
bReader = new BufferedReader(new StringReader("<user\nid=\"1\">"));
key = new LongWritable();
sb = new StringBuilder();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("<user\n", sb.toString());
}
@Test
public void testTwoRecords() throws IOException {
XMLRecordReader reader = new XMLRecordReader();
BufferedReader bReader = new BufferedReader(new StringReader("<user>1</user><user>2</user>"));
LongWritable key = new LongWritable();
StringBuilder sb = new StringBuilder();
Text record = new Text();
// Find the first record.
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals(0, key.get());
assertEquals("<user>", sb.toString());
assertTrue(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
100L, // End offset
100L, // Overrun allowance
sb,
record));
assertEquals("<user>1</user>", record.toString());
// Find the second record.
sb = new StringBuilder();
record = new Text();
assertTrue(reader.findRecordStart(
"<user".toCharArray(),
0L, // Start offset
100L, // End offset
bReader,
key,
sb));
assertEquals("<user>", sb.toString());
assertTrue(reader.findRecordEnd(
"</user>".toCharArray(),
bReader,
100L, // End offset
100L, // Overrun allowance
sb,
record));
assertEquals("<user>2</user>", record.toString());
}
@Test
public void testSecondRecordOnSplitBoundary() throws IOException {
}
}