/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.standard; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Test; public class TestSplitText { final String originalFilename = "original.txt"; final Path dataPath = Paths.get("src/test/resources/TestSplitText"); final Path file = dataPath.resolve(originalFilename); final static String TEST_INPUT_DATA = "HeaderLine1\nLine2SpacesAtEnd \nLine3\nLine4\n\n\nLine8\nLine9\n\n\n13\n14\n15 EndofLine15\n16\n" + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLastLine\n"; @Test public void testLastLineExceedsSizeLimit() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "2"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "20 B"); runner.enqueue("Line #1\nLine #2\nLine #3\nLong line exceeding limit"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); final MockFlowFile originalFlowFile = runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0); originalFlowFile.assertAttributeEquals(SplitText.FRAGMENT_COUNT, "3"); originalFlowFile.assertAttributeExists(SplitText.FRAGMENT_ID); runner.assertTransferCount(SplitText.REL_SPLITS, 3); } @Test public void testIncompleteHeader() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "2"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "50 B"); runner.enqueue("Header Line #1"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 1); runner.assertTransferCount(SplitText.REL_ORIGINAL, 0); runner.assertTransferCount(SplitText.REL_SPLITS, 0); } @Test public void testSingleCharacterHeaderMarker() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "7"); runner.setProperty(SplitText.HEADER_MARKER, "H"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals("fragment.count", "2"); runner.assertTransferCount(SplitText.REL_SPLITS, 2); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "7"); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "86"); splits.get(1).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "3"); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "54"); final String fragmentUUID = splits.get(0).getAttribute(SplitText.FRAGMENT_ID); for (int i = 0; i < splits.size(); i++) { final MockFlowFile split = splits.get(i); split.assertAttributeEquals(SplitText.FRAGMENT_INDEX, String.valueOf(i+1)); split.assertAttributeEquals(SplitText.FRAGMENT_ID, fragmentUUID); split.assertAttributeEquals(SplitText.FRAGMENT_COUNT, String.valueOf(splits.size())); split.assertAttributeEquals(SplitText.SEGMENT_ORIGINAL_FILENAME, file.getFileName().toString()); } } @Test public void testMultipleHeaderIndicators() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); runner.setProperty(SplitText.HEADER_MARKER, "Head"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "5"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals("fragment.count", "3"); runner.assertTransferCount(SplitText.REL_SPLITS, 3); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "5"); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "62"); splits.get(1).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "5"); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "55"); splits.get(2).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "1"); splits.get(2).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "23"); final String fragmentUUID = splits.get(0).getAttribute(SplitText.FRAGMENT_ID); for (int i = 0; i < splits.size(); i++) { final MockFlowFile split = splits.get(i); split.assertAttributeEquals(SplitText.FRAGMENT_INDEX, String.valueOf(i + 1)); split.assertAttributeEquals(SplitText.FRAGMENT_ID, fragmentUUID); split.assertAttributeEquals(SplitText.FRAGMENT_COUNT, String.valueOf(splits.size())); split.assertAttributeEquals(SplitText.SEGMENT_ORIGINAL_FILENAME, file.getFileName().toString()); } } @Test public void testZeroLinesNoMaxSize() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "0"); runner.assertNotValid(); } @Test public void testMultipleSplitDirectives() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "2"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "50 B"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue("Header Line #1\nHeader Line #2\nLine #1\nLine #2\n" + "Line #3 This line has additional text added so that it exceeds the maximum fragment size\n" + "Line #4\nLine #5\nLine #6\nLine #7\nLine #8\nLine #9\nLine #10\n"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals("fragment.count", "6"); runner.assertTransferCount(SplitText.REL_SPLITS, 6); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); assertEquals(46, splits.get(0).getSize()); assertEquals(119, splits.get(1).getSize()); assertEquals(46, splits.get(2).getSize()); assertEquals(46, splits.get(3).getSize()); assertEquals(46, splits.get(4).getSize()); assertEquals(39, splits.get(5).getSize()); } @Test public void testFlowFileIsOnlyHeader() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "2"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "50 B"); runner.setProperty(SplitText.HEADER_MARKER, "Head"); runner.enqueue("Header Line #1\nHeaderLine#2\n"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals("fragment.count", "1"); runner.assertTransferCount(SplitText.REL_SPLITS, 1); // repeat with header cou8nt versus header marker runner.clearTransferState(); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "2"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "50 B"); runner.enqueue("Header Line #1\nHeaderLine #2\n"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals("fragment.count", "1"); runner.assertTransferCount(SplitText.REL_SPLITS, 1); // repeat single header line with no newline characters runner.clearTransferState(); runner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "2"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "50 B"); runner.enqueue("Header Line #1"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals("fragment.count", "1"); runner.assertTransferCount(SplitText.REL_SPLITS, 1); } @Test public void testMaxSizeExceeded() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "0"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "71 B"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "2"); runner.assertTransferCount(SplitText.REL_SPLITS, 2); List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); String fragmentUUID = splits.get(0).getAttribute(SplitText.FRAGMENT_ID); for (int i = 0; i < splits.size(); i++) { final MockFlowFile split = splits.get(i); split.assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "5"); split.assertAttributeEquals(SplitText.FRAGMENT_SIZE, "70"); split.assertAttributeEquals(SplitText.FRAGMENT_INDEX, String.valueOf(i + 1)); split.assertAttributeEquals(SplitText.FRAGMENT_ID, fragmentUUID); split.assertAttributeEquals(SplitText.FRAGMENT_COUNT, String.valueOf(splits.size())); split.assertAttributeEquals(SplitText.SEGMENT_ORIGINAL_FILENAME, file.getFileName().toString()); } // Repeat test without header runner.clearTransferState(); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "0"); runner.setProperty(SplitText.FRAGMENT_MAX_SIZE, "71 B"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "2"); runner.assertTransferCount(SplitText.REL_SPLITS, 2); splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); fragmentUUID = splits.get(0).getAttribute(SplitText.FRAGMENT_ID); splits.get(0).assertContentEquals("Header Line #1\nHeader Line #2\nLine #1\nLine #2\nLine #3\nLine #4\nLine #5\n"); splits.get(1).assertContentEquals("Line #6\nLine #7\nLine #8\nLine #9\nLine #10"); splits.get(0).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "7"); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "70"); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_INDEX, "1"); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_ID, fragmentUUID); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "2"); splits.get(0).assertAttributeEquals(SplitText.SEGMENT_ORIGINAL_FILENAME, file.getFileName().toString()); splits.get(1).assertAttributeEquals(SplitText.SPLIT_LINE_COUNT, "5"); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_SIZE, "40"); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_INDEX, "2"); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_ID, fragmentUUID); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "2"); splits.get(1).assertAttributeEquals(SplitText.SEGMENT_ORIGINAL_FILENAME, file.getFileName().toString()); } @Test public void testSplitWithOnlyCarriageReturn() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue("H1\rH2\r1\r2\r3\r\r\r\r\r\r\r10\r11\r12\r"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "4"); runner.assertTransferCount(SplitText.REL_SPLITS, 4); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("H1\rH2\r1\r2\r3"); splits.get(1).assertContentEquals("H1\rH2"); splits.get(2).assertContentEquals("H1\rH2"); splits.get(3).assertContentEquals("H1\rH2\r10\r11\r12"); runner.clearTransferState(); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue("1\r2\r3\r\r\r\r\r\r\r10\r11\r12\r"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "2"); runner.assertTransferCount(SplitText.REL_SPLITS, 2); final List<MockFlowFile> splitsWithNoHeader = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splitsWithNoHeader.get(0).assertContentEquals("1\r2\r3"); splitsWithNoHeader.get(1).assertContentEquals("10\r11\r12"); } @Test public void testSplitWithCarriageReturnAndNewLines() { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue("H1\r\nH2\r\n1\r\n2\r\n3\r\n\r\n\r\n\r\n\r\n\r\n\r\n10\r\n11\r\n12\r\n"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "4"); runner.assertTransferCount(SplitText.REL_SPLITS, 4); final List<MockFlowFile> splits =runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("H1\r\nH2\r\n1\r\n2\r\n3"); splits.get(1).assertContentEquals("H1\r\nH2"); splits.get(2).assertContentEquals("H1\r\nH2"); splits.get(3).assertContentEquals("H1\r\nH2\r\n10\r\n11\r\n12"); runner.clearTransferState(); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue("1\r\n2\r\n3\r\n\r\n\r\n\r\n\r\n\r\n\r\n10\r\n11\r\n12\r\n"); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "2"); runner.assertTransferCount(SplitText.REL_SPLITS, 2); final List<MockFlowFile> splitsWithNoHeader =runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splitsWithNoHeader.get(0).assertContentEquals("1\r\n2\r\n3"); splitsWithNoHeader.get(1).assertContentEquals("10\r\n11\r\n12"); } @Test public void testRoutesToFailureIfHeaderLinesNotAllPresent() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "100"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue(file); runner.run(); runner.assertAllFlowFilesTransferred(SplitText.REL_FAILURE, 1); } @Test public void testZeroByteOutput() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_SPLITS, 4); } @Test public void testZeroByteInputWithoutHeader() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "1"); runner.enqueue("".getBytes()); runner.run(); runner.assertTransferCount(SplitText.REL_SPLITS, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_FAILURE, 0); } @Test public void testZeroByteInput() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "1"); runner.enqueue("".getBytes()); runner.run(); runner.assertTransferCount(SplitText.REL_SPLITS, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_FAILURE, 0); } @Test public void testSplitWithoutHeader() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "4"); runner.assertTransferCount(SplitText.REL_SPLITS, 4); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); final String expected0 = "Header Line #1\nHeader Line #2\nLine #1"; final String expected1 = "Line #2\nLine #3\nLine #4"; final String expected2 = "Line #5\nLine #6\nLine #7"; final String expected3 = "Line #8\nLine #9\nLine #10"; splits.get(0).assertContentEquals(expected0); splits.get(1).assertContentEquals(expected1); splits.get(2).assertContentEquals(expected2); splits.get(3).assertContentEquals(expected3); } @Test public void testOneLineSplitWithoutHeader() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "1"); runner.enqueue(TEST_INPUT_DATA); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.getFlowFilesForRelationship(SplitText.REL_ORIGINAL).get(0).assertAttributeEquals(SplitText.FRAGMENT_COUNT, "11"); runner.assertTransferCount(SplitText.REL_SPLITS, 11); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("HeaderLine1"); splits.get(1).assertContentEquals("Line2SpacesAtEnd "); splits.get(2).assertContentEquals("Line3"); splits.get(3).assertContentEquals("Line4"); splits.get(4).assertContentEquals("Line8"); splits.get(5).assertContentEquals("Line9"); splits.get(6).assertContentEquals("13"); splits.get(7).assertContentEquals("14"); splits.get(8).assertContentEquals("15 EndofLine15"); splits.get(9).assertContentEquals("16"); splits.get(10).assertContentEquals("LastLine"); } @Test public void testFiveLineSplitWithoutHeader() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "5"); runner.enqueue(TEST_INPUT_DATA); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 4); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("HeaderLine1\nLine2SpacesAtEnd \nLine3\nLine4"); splits.get(1).assertContentEquals("\nLine8\nLine9"); splits.get(2).assertContentEquals("13\n14\n15 EndofLine15\n16"); splits.get(3).assertContentEquals("\n\nLastLine"); } @Test public void testFiveLineSplitWithoutHeaderRetainNewline() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "5"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue(TEST_INPUT_DATA); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 10); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("HeaderLine1\nLine2SpacesAtEnd \nLine3\nLine4\n\n"); splits.get(1).assertContentEquals("\nLine8\nLine9\n\n\n"); splits.get(2).assertContentEquals("13\n14\n15 EndofLine15\n16\n\n"); splits.get(3).assertContentEquals("\n\n\n\n\n"); splits.get(4).assertContentEquals("\n\n\n\n\n"); splits.get(5).assertContentEquals("\n\n\n\n\n"); splits.get(6).assertContentEquals("\n\n\n\n\n"); splits.get(7).assertContentEquals("\n\n\n\n\n"); splits.get(8).assertContentEquals("\n\n\n\n\n"); splits.get(9).assertContentEquals("\n\nLastLine\n"); } @Test public void testFiveLineSplitWithHeaderRetainNewline() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "5"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); runner.enqueue(TEST_INPUT_DATA); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 10); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("HeaderLine1\nLine2SpacesAtEnd \nLine3\nLine4\n\n\n"); splits.get(1).assertContentEquals("HeaderLine1\nLine8\nLine9\n\n\n13\n"); splits.get(2).assertContentEquals("HeaderLine1\n14\n15 EndofLine15\n16\n\n\n"); splits.get(3).assertContentEquals("HeaderLine1\n\n\n\n\n\n"); splits.get(4).assertContentEquals("HeaderLine1\n\n\n\n\n\n"); splits.get(5).assertContentEquals("HeaderLine1\n\n\n\n\n\n"); splits.get(6).assertContentEquals("HeaderLine1\n\n\n\n\n\n"); splits.get(7).assertContentEquals("HeaderLine1\n\n\n\n\n\n"); splits.get(8).assertContentEquals("HeaderLine1\n\n\n\n\n\n"); splits.get(9).assertContentEquals("HeaderLine1\n\nLastLine\n"); } @Test public void testFiveLineSplitWithHeaderNotRetainNewline() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "5"); runner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); runner.enqueue(TEST_INPUT_DATA); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 10); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("HeaderLine1\nLine2SpacesAtEnd \nLine3\nLine4"); splits.get(1).assertContentEquals("HeaderLine1\nLine8\nLine9\n\n\n13"); splits.get(2).assertContentEquals("HeaderLine1\n14\n15 EndofLine15\n16"); splits.get(3).assertContentEquals("HeaderLine1"); splits.get(4).assertContentEquals("HeaderLine1"); splits.get(5).assertContentEquals("HeaderLine1"); splits.get(6).assertContentEquals("HeaderLine1"); splits.get(7).assertContentEquals("HeaderLine1"); splits.get(8).assertContentEquals("HeaderLine1"); splits.get(9).assertContentEquals("HeaderLine1\n\nLastLine"); } @Test public void testOneLineSplitWithHeader() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "1"); runner.enqueue(TEST_INPUT_DATA); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 47); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("HeaderLine1\nLine2SpacesAtEnd "); splits.get(1).assertContentEquals("HeaderLine1\nLine3"); splits.get(2).assertContentEquals("HeaderLine1\nLine4"); splits.get(3).assertContentEquals("HeaderLine1"); splits.get(4).assertContentEquals("HeaderLine1"); splits.get(5).assertContentEquals("HeaderLine1\nLine8"); splits.get(6).assertContentEquals("HeaderLine1\nLine9"); splits.get(7).assertContentEquals("HeaderLine1"); splits.get(8).assertContentEquals("HeaderLine1"); splits.get(9).assertContentEquals("HeaderLine1\n13"); splits.get(10).assertContentEquals("HeaderLine1\n14"); splits.get(11).assertContentEquals("HeaderLine1\n15 EndofLine15"); splits.get(12).assertContentEquals("HeaderLine1\n16"); for (int i = 13; i < 46; i++) { splits.get(i).assertContentEquals("HeaderLine1"); } splits.get(46).assertContentEquals("HeaderLine1\nLastLine"); } @Test public void testSplitWithTwoLineHeader() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 4); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); for (int i = 0; i < splits.size(); i++) { final MockFlowFile split = splits.get(i); split.assertContentEquals(file.getParent().resolve((i + 1) + ".txt")); split.assertAttributeEquals(SplitText.FRAGMENT_INDEX, String.valueOf(i + 1)); } } @Test public void testSplitWithTwoLineHeaderAndEvenMultipleOfLines() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new SplitText()); runner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); runner.setProperty(SplitText.LINE_SPLIT_COUNT, "5"); runner.enqueue(file); runner.run(); runner.assertTransferCount(SplitText.REL_FAILURE, 0); runner.assertTransferCount(SplitText.REL_ORIGINAL, 1); runner.assertTransferCount(SplitText.REL_SPLITS, 2); final List<MockFlowFile> splits = runner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals(file.getParent().resolve("5.txt")); splits.get(0).assertAttributeEquals(SplitText.FRAGMENT_INDEX, String.valueOf(1)); splits.get(1).assertContentEquals(file.getParent().resolve("6.txt")); splits.get(1).assertAttributeEquals(SplitText.FRAGMENT_INDEX, String.valueOf(2)); } @Test public void testSplitThenMerge() throws IOException { final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "false"); splitRunner.enqueue(file); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 4); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); for (final MockFlowFile flowFile : splits) { flowFile.assertAttributeEquals(SplitText.SEGMENT_ORIGINAL_FILENAME, originalFilename); flowFile.assertAttributeEquals(SplitText.FRAGMENT_COUNT, "4"); } final TestRunner mergeRunner = TestRunners.newTestRunner(new MergeContent()); mergeRunner.setProperty(MergeContent.MERGE_FORMAT, MergeContent.MERGE_FORMAT_CONCAT); mergeRunner.setProperty(MergeContent.MERGE_STRATEGY, MergeContent.MERGE_STRATEGY_DEFRAGMENT); mergeRunner.enqueue(splits.toArray(new MockFlowFile[0])); mergeRunner.run(); mergeRunner.assertTransferCount(MergeContent.REL_MERGED, 1); mergeRunner.assertTransferCount(MergeContent.REL_ORIGINAL, 4); mergeRunner.assertTransferCount(MergeContent.REL_FAILURE, 0); final List<MockFlowFile> packed = mergeRunner.getFlowFilesForRelationship(MergeContent.REL_MERGED); MockFlowFile flowFile = packed.get(0); flowFile.assertAttributeEquals(CoreAttributes.FILENAME.key(), originalFilename); assertEquals(Files.size(dataPath.resolve(originalFilename)), flowFile.getSize()); flowFile.assertContentEquals(file); } /* * If an input FlowFile has a number of blank lines greater than the Line Split Count property, * ensure that the remainder of the FlowFile will be processed, resulting in no data loss. */ @Test public void testSplitWithOnlyNewLines() { final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("H1\nH2\n1\n2\n3\n\n\n\n\n\n\n10\n11\n12\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 4); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("H1\nH2\n1\n2\n3"); splits.get(1).assertContentEquals("H1\nH2"); splits.get(2).assertContentEquals("H1\nH2"); splits.get(3).assertContentEquals("H1\nH2\n10\n11\n12"); splitRunner.clearTransferState(); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("1\n2\n3\n\n\n\n\n\n\n10\n11\n12\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 2); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splitsWithNoHeader = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splitsWithNoHeader.get(0).assertContentEquals("1\n2\n3"); splitsWithNoHeader.get(1).assertContentEquals("10\n11\n12"); } /* * If an input FlowFile has X blank lines at the end of a file and Line Split Count is * greater than X, verify that newlines are removed. */ @Test public void testWithLotsOfBlankLinesAtEnd() { // verify with header lines final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "10"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("H1\nH2\n1\n\n\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 1); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("H1\nH2\n1"); // verify without headers splitRunner.clearTransferState(); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); splitRunner.enqueue("1\n2\n\n\n\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 1); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splitsWithNoHeader = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splitsWithNoHeader.get(0).assertContentEquals("1\n2"); } /* * If an input FlowFile has X blank lines at the end of a file and Header Line Count = 0, * ensure all newlines removed from end of file. Previous behavior was: In the case where X is greater than * Line Split Count, there will be split files consisting of nothing but blank lines, * specifically one fewer lines than Line Split Count (i.e. only the final newline character is removed). * * Ensure that the above behavior is no longer reflected by the Processor. */ @Test public void testAllNewLinesTrimmed() { final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("1\n2\n\n\n\n\n\n\n\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 1); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("1\n2"); } /* * Previous behavior that was exhibited: * If an input FlowFile has X blank lines at the end of a file and * Header Line Count = 1 (or any non-zero value), the blank lines * are removed and no split file of just blanks is created. However, * the final line does contain a newline character. In other split * files, the final line has the newline character removed. * * Ensure that this behavior has been addressed. The Split file that * does contain content should not have the trailing new line. The * last FlowFile should be generated, containing nothing. */ @Test public void testConsistentTrailingOfNewLines() { final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("H1\n1\n\n\n\n\n\n\n\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 3); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("H1\n1"); splits.get(1).assertContentEquals("H1"); splits.get(2).assertContentEquals("H1"); } @Test public void testWithSplitThatStartsWithNewLine() { final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "1"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("H1\n1\n2\n3\n\n\n4\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 2); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("H1\n1\n2\n3"); splits.get(1).assertContentEquals("H1\n\n\n4"); splitRunner.clearTransferState(); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "0"); splitRunner.enqueue("1\n2\n3\n\n\n4\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 2); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splitsWithoutHeader = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splitsWithoutHeader.get(0).assertContentEquals("1\n2\n3"); splitsWithoutHeader.get(1).assertContentEquals("\n\n4"); } @Test public void testWithEmptyHeaderLines() { final TestRunner splitRunner = TestRunners.newTestRunner(new SplitText()); splitRunner.setProperty(SplitText.HEADER_LINE_COUNT, "2"); splitRunner.setProperty(SplitText.LINE_SPLIT_COUNT, "3"); splitRunner.setProperty(SplitText.REMOVE_TRAILING_NEWLINES, "true"); splitRunner.enqueue("\n\n1\n\n\n\n\n"); splitRunner.run(); splitRunner.assertTransferCount(SplitText.REL_SPLITS, 2); splitRunner.assertTransferCount(SplitText.REL_ORIGINAL, 1); splitRunner.assertTransferCount(SplitText.REL_FAILURE, 0); final List<MockFlowFile> splits = splitRunner.getFlowFilesForRelationship(SplitText.REL_SPLITS); splits.get(0).assertContentEquals("\n\n1"); splits.get(1).assertContentEquals("\n"); } }