/*
* Copyright 2014 Splunk, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"): you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package com.splunk;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.concurrent.Callable;
/**
* Takes an InputStream containing a UTF-8 encoded XML document containing one or more
* root 'results' elements, and wraps a 'doc' element around
* all of them so normal XML parsers can handle the stream. Multiple root 'results'
* elements occur when getting results from an export search.
*
* It works by finding the first instance of '<results' and inserting
* the string '<doc>' before it, and then returning '</doc>' after the end of the stream
* it is filtering.
*/
class InsertRootElementFilterInputStream extends FilterInputStream {
private final ByteArrayInputStream suffix = new ByteArrayInputStream("</doc>".getBytes("UTF-8"));
private boolean wrotePrefix;
private byte[] oneByte = new byte[1];
InsertRootElementFilterInputStream(InputStream in) throws IOException {
// Wrap in with a pushback stream so we can write our modified version back
// onto the beginning of it.
super(new PushbackInputStream(in, 512));
PushbackInputStream pin = (PushbackInputStream)this.in;
// Read bytes until we reach '>', then push everything we read, followed by "<doc>",
// back onto the stream. If we run out of input before we reach '>', then don't
// modify the stream.
ByteArrayOutputStream beforeResultsChars = new ByteArrayOutputStream();
ByteArrayOutputStream atResultsChars = new ByteArrayOutputStream();
int ch;
while (true) {
ch = this.in.read();
if (ch == -1) {
// Never found a results element to write after, don't touch the stream.
wrotePrefix = false;
pin.unread(beforeResultsChars.toByteArray());
return;
} else if (ch == (int)'<') {
// Try extending
atResultsChars.reset();
int ech;
boolean matched = true;
for (byte b : "results".getBytes("UTF-8")) {
ech = this.in.read();
atResultsChars.write(ech);
if (ech != b) {
// Extension failed. Put the bytes back on and search again.
pin.unread(atResultsChars.toByteArray());
matched = false;
break;
}
}
if (matched) {
// If we reach here, the extension succeeded, so we insert <doc>, unread everything,
// and return.
// Unread the match.
pin.unread(atResultsChars.toByteArray());
// Unread the opening '<' that led to our extension
pin.unread(ch);
// Add a '<doc>' element to our read charactes and unread them.
beforeResultsChars.write("<doc>".getBytes("UTF-8"));
pin.unread(beforeResultsChars.toByteArray());
wrotePrefix = true;
return;
} else {
// Extension didn't find a match. Put the byte on and continue.
beforeResultsChars.write(ch);
}
} else {
// Not a character of interest. Put it on the buffer and continue.
beforeResultsChars.write(ch);
}
}
}
@Override
public int read(byte[] buffer, int offset, int length) throws IOException {
int result = in.read(buffer, offset, length);
if (result == -1 && wrotePrefix) {
// No more bytes to read from in, and we have written '<doc>' earlier in the stream
return suffix.read(buffer, offset, length);
} else {
// in still has data to return, so we return it.
return result;
}
}
@Override
public int read() throws IOException {
// Override to dispatch to the other read method. The third overload of read in FilterInputStream
// already dispatches to read(byte[], int, int).
if (read(oneByte, 0, 1) == -1) {
return -1;
} else {
return oneByte[0];
}
}
}