package com.thinkbiganalytics.ingest;
/*-
* #%L
* thinkbig-nifi-core-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
public class StripHeaderSupport {
/**
* Identify the byte boundary of the end of the header
*
* @param headerRows the number of header rows
* @param is the input steram
* @return the bytes
*/
public long findHeaderBoundary(int headerRows, InputStream is) throws IOException {
int rows = 0;
long bytes = 0;
boolean eof = false;
try (BufferedInputStream bis = new BufferedInputStream(is)) {
while (rows < headerRows && !eof) {
Boundary boundary = nextLineBoundary(bis);
eof = boundary.isEOF;
bytes += boundary.bytes;
rows++;
}
}
if (rows < headerRows) {
return -1L;
}
return bytes;
}
protected Boundary nextLineBoundary(BufferedInputStream bis) throws IOException {
int lastByte = -1;
long numBytesRead = 0L;
Boundary boundary = new Boundary();
while (true) {
bis.mark(1);
final int thisByte = bis.read();
if (thisByte == -1) {
boundary.isEOF = true;
boundary.bytes = numBytesRead;
return boundary;
}
numBytesRead++;
if (thisByte == '\n') {
boundary.bytes = numBytesRead;
return boundary;
} else if (lastByte == '\r') {
bis.reset();
numBytesRead--;
boundary.bytes = numBytesRead;
return boundary;
}
lastByte = thisByte;
}
}
static class Boundary {
long bytes;
boolean isEOF;
}
}