package au.gov.ga.earthsci.common.util; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Map; import java.util.Map.Entry; /** * Helper class that reads bytes from an {@link InputStream}, piping them to an * {@link OutputStream} until a pattern is found. If a pattern is found, a * callback is notified, and the pattern is not written to the output. * * @author Michael de Hoog (michael.dehoog@ga.gov.au) */ public class PatternPipe { /** * Callback interface for the {@link PatternPipe#copy} method */ public static interface Callback { void patternFound(String patternId, byte[] pattern, OutputStream out); } /** * Copy the data from the input to the output, searching for patterns along * the way. If a pattern is found, the callback is notified, and the pattern * bytes are not written to the output. * * @param in * Input to read from * @param out * Output to write to * @param patterns * Patterns to search for ({@link Map} key is the pattern id * passed to the callback, the actual byte[] pattern is stored in * the Map value) * @param callback * Callback to notify of pattern matches * @throws IOException */ public static void copy(InputStream in, OutputStream out, Map<String, byte[]> patterns, Callback callback) throws IOException { byte[][] patternsArray = new byte[patterns.size()][]; String[] patternIds = new String[patterns.size()]; int p = 0; int longestPatternLength = 0; for (Entry<String, byte[]> pattern : patterns.entrySet()) { patternIds[p] = pattern.getKey(); patternsArray[p] = pattern.getValue(); longestPatternLength = Math.max(longestPatternLength, patternsArray[p].length); p++; } //buffer the streams, because we read a byte at a time below @SuppressWarnings("resource") InputStream input = in instanceof BufferedInputStream ? in : new BufferedInputStream(in); @SuppressWarnings("resource") OutputStream output = out instanceof BufferedOutputStream ? out : new BufferedOutputStream(out); //we read into a buffer that is the length of the longest pattern, //so we can find the pattern within int bufferLength = Math.max(1, longestPatternLength); byte[] buffer = new byte[bufferLength]; int position = 0, count = 0; int skip = bufferLength - 1; boolean filled = false, closed = false; while (true) { int b = closed ? -1 : input.read(); if (b >= 0) { buffer[position] = (byte) b; count = Math.min(count + 1, bufferLength); filled = filled || count == bufferLength; } else { closed = true; if (!filled) { // special case where there weren't enough input characters skip = 0; position = -1; filled = true; } else { count--; } if (count <= 0) { break; } } position = (position + 1) % bufferLength; for (int i = 0; i < patternsArray.length; i++) { if (count >= patternsArray[i].length && arrayContainsPattern(patternsArray[i], buffer, position)) { skip = Math.max(skip, patternsArray[i].length); if (callback != null) { output.flush(); callback.patternFound(patternIds[i], patternsArray[i], out); } } } if (skip > 0) { skip--; } else { output.write(buffer[position]); } } output.flush(); } /** * Does <code>array</code> contain <code>pattern</code> at position * <code>start</code>? Searching begins at <code>start</code> and wraps * around if the position goes passed the end of the array. * * @param pattern * Pattern to search for * @param array * Array to search in * @param start * Position in <code>array</code> to start searching from * @return True if <code>array</code> contains <code>pattern</code> */ protected static boolean arrayContainsPattern(byte[] pattern, byte[] array, int start) { for (int i = 0; i < pattern.length; i++) { if (array[(start + i) % array.length] != pattern[i]) { return false; } } return true; } }