/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.stream.io.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.apache.nifi.stream.io.util.TextLineDemarcator.OffsetInfo;
import org.junit.Test;
@SuppressWarnings("resource")
public class TextLineDemarcatorTest {
@Test(expected = IllegalArgumentException.class)
public void nullStream() {
new TextLineDemarcator(null);
}
@Test(expected = IllegalArgumentException.class)
public void illegalBufferSize() {
new TextLineDemarcator(mock(InputStream.class), -234);
}
@Test
public void emptyStreamNoStartWithFilter() throws IOException {
String data = "";
InputStream is = stringToIs(data);
TextLineDemarcator demarcator = new TextLineDemarcator(is);
assertNull(demarcator.nextOffsetInfo());
}
@Test
public void emptyStreamAndStartWithFilter() throws IOException {
String data = "";
InputStream is = stringToIs(data);
TextLineDemarcator demarcator = new TextLineDemarcator(is);
assertNull(demarcator.nextOffsetInfo("hello".getBytes()));
}
// this test has no assertions. It's success criteria is validated by lack
// of failure (see NIFI-3278)
@Test
public void endsWithCRWithBufferLengthEqualStringLengthA() throws Exception {
String str = "\r";
InputStream is = stringToIs(str);
TextLineDemarcator demarcator = new TextLineDemarcator(is, str.length());
while (demarcator.nextOffsetInfo() != null) {
}
}
@Test
public void endsWithCRWithBufferLengthEqualStringLengthB() throws Exception {
String str = "abc\r";
InputStream is = stringToIs(str);
TextLineDemarcator demarcator = new TextLineDemarcator(is, str.length());
while (demarcator.nextOffsetInfo() != null) {
}
}
@Test
public void singleCR() throws IOException {
InputStream is = stringToIs("\r");
TextLineDemarcator demarcator = new TextLineDemarcator(is);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo();
assertEquals(0, offsetInfo.getStartOffset());
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
}
@Test
public void singleLF() throws IOException {
InputStream is = stringToIs("\n");
TextLineDemarcator demarcator = new TextLineDemarcator(is);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo();
assertEquals(0, offsetInfo.getStartOffset());
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
}
@Test
// essentially validates the internal 'isEol()' operation to ensure it will perform read-ahead
public void crlfWhereLFdoesNotFitInInitialBuffer() throws Exception {
InputStream is = stringToIs("oleg\r\njoe");
TextLineDemarcator demarcator = new TextLineDemarcator(is, 5);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo();
assertEquals(0, offsetInfo.getStartOffset());
assertEquals(6, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo();
assertEquals(6, offsetInfo.getStartOffset());
assertEquals(3, offsetInfo.getLength());
assertEquals(0, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
}
@Test
public void validateNiFi_3495() throws IOException {
String str = "he\ra-to-a\rb-to-b\rc-to-c\r\nd-to-d";
InputStream is = stringToIs(str);
TextLineDemarcator demarcator = new TextLineDemarcator(is, 10);
OffsetInfo info = demarcator.nextOffsetInfo();
assertEquals(0, info.getStartOffset());
assertEquals(3, info.getLength());
assertEquals(1, info.getCrlfLength());
info = demarcator.nextOffsetInfo();
assertEquals(3, info.getStartOffset());
assertEquals(7, info.getLength());
assertEquals(1, info.getCrlfLength());
info = demarcator.nextOffsetInfo();
assertEquals(10, info.getStartOffset());
assertEquals(7, info.getLength());
assertEquals(1, info.getCrlfLength());
info = demarcator.nextOffsetInfo();
assertEquals(17, info.getStartOffset());
assertEquals(8, info.getLength());
assertEquals(2, info.getCrlfLength());
info = demarcator.nextOffsetInfo();
assertEquals(25, info.getStartOffset());
assertEquals(6, info.getLength());
assertEquals(0, info.getCrlfLength());
}
@Test
public void mixedCRLF() throws Exception {
InputStream is = stringToIs("oleg\rjoe\njack\r\nstacymike\r\n");
TextLineDemarcator demarcator = new TextLineDemarcator(is, 4);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo();
assertEquals(0, offsetInfo.getStartOffset());
assertEquals(5, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo();
assertEquals(5, offsetInfo.getStartOffset());
assertEquals(4, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo();
assertEquals(9, offsetInfo.getStartOffset());
assertEquals(6, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo();
assertEquals(15, offsetInfo.getStartOffset());
assertEquals(11, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
}
@Test
public void consecutiveAndMixedCRLF() throws Exception {
InputStream is = stringToIs("oleg\r\r\njoe\n\n\rjack\n\r\nstacymike\r\n\n\n\r");
TextLineDemarcator demarcator = new TextLineDemarcator(is, 4);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo(); // oleg\r
assertEquals(5, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \r\n
assertEquals(2, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // joe\n
assertEquals(4, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \n
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \r
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // jack\n
assertEquals(5, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \r\n
assertEquals(2, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // stacymike\r\n
assertEquals(11, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \n
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \n
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
offsetInfo = demarcator.nextOffsetInfo(); // \r
assertEquals(1, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
}
@Test
public void startWithNoMatchOnWholeStream() throws Exception {
InputStream is = stringToIs("oleg\rjoe\njack\r\nstacymike\r\n");
TextLineDemarcator demarcator = new TextLineDemarcator(is, 4);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo("foojhkj".getBytes());
assertEquals(0, offsetInfo.getStartOffset());
assertEquals(5, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertFalse(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo("foo".getBytes());
assertEquals(5, offsetInfo.getStartOffset());
assertEquals(4, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertFalse(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo("joe".getBytes());
assertEquals(9, offsetInfo.getStartOffset());
assertEquals(6, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertFalse(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo("stasy".getBytes());
assertEquals(15, offsetInfo.getStartOffset());
assertEquals(11, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertFalse(offsetInfo.isStartsWithMatch());
}
@Test
public void startWithSomeMatches() throws Exception {
InputStream is = stringToIs("oleg\rjoe\njack\r\nstacymike\r\n");
TextLineDemarcator demarcator = new TextLineDemarcator(is, 7);
OffsetInfo offsetInfo = demarcator.nextOffsetInfo("foojhkj".getBytes());
assertEquals(0, offsetInfo.getStartOffset());
assertEquals(5, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertFalse(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo("jo".getBytes());
assertEquals(5, offsetInfo.getStartOffset());
assertEquals(4, offsetInfo.getLength());
assertEquals(1, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo("joe".getBytes());
assertEquals(9, offsetInfo.getStartOffset());
assertEquals(6, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertFalse(offsetInfo.isStartsWithMatch());
offsetInfo = demarcator.nextOffsetInfo("stacy".getBytes());
assertEquals(15, offsetInfo.getStartOffset());
assertEquals(11, offsetInfo.getLength());
assertEquals(2, offsetInfo.getCrlfLength());
assertTrue(offsetInfo.isStartsWithMatch());
}
@Test
public void testOnBufferSplitNoTrailingDelimiter() throws IOException {
final byte[] inputData = "Yes\nNo".getBytes(StandardCharsets.UTF_8);
final ByteArrayInputStream is = new ByteArrayInputStream(inputData);
final TextLineDemarcator demarcator = new TextLineDemarcator(is, 3);
final OffsetInfo first = demarcator.nextOffsetInfo();
final OffsetInfo second = demarcator.nextOffsetInfo();
final OffsetInfo third = demarcator.nextOffsetInfo();
assertNotNull(first);
assertNotNull(second);
assertNull(third);
assertEquals(0, first.getStartOffset());
assertEquals(4, first.getLength());
assertEquals(1, first.getCrlfLength());
assertEquals(4, second.getStartOffset());
assertEquals(2, second.getLength());
assertEquals(0, second.getCrlfLength());
}
@Test
public void validateStartsWithLongerThanLastToken() throws IOException {
final byte[] inputData = "This is going to be a spectacular test\nThis is".getBytes(StandardCharsets.UTF_8);
final byte[] startsWith = "This is going to be".getBytes(StandardCharsets.UTF_8);
try (final InputStream is = new ByteArrayInputStream(inputData);
final TextLineDemarcator demarcator = new TextLineDemarcator(is)) {
final OffsetInfo first = demarcator.nextOffsetInfo(startsWith);
assertNotNull(first);
assertEquals(0, first.getStartOffset());
assertEquals(39, first.getLength());
assertEquals(1, first.getCrlfLength());
assertTrue(first.isStartsWithMatch());
final OffsetInfo second = demarcator.nextOffsetInfo(startsWith);
assertNotNull(second);
assertEquals(39, second.getStartOffset());
assertEquals(7, second.getLength());
assertEquals(0, second.getCrlfLength());
assertFalse(second.isStartsWithMatch());
}
}
private InputStream stringToIs(String data) {
return new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
}
}