/* * DBeaver - Universal Database Manager * Copyright (C) 2010-2017 Serge Rider (serge@jkiss.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.jkiss.dbeaver.ui.editors.binary; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Find helper class to find binary and string literals in files. * Given a literal, finds its position in the file. It is possible to get subsequent finds. * The search is either binary or text based. Text based search uses standard java unicode (all of big * and little endian, odd and even address) plus ascii when the literal falls within ascii char limits. * * @author Jordi */ public class BinaryTextFinder { public static final int MAP_SIZE = 64 * 1024; public static final int MAX_SEQUENCE_SIZE = 2 * 1024; // a search string of 2K should be enough private long bufferPosition = -1L; private ByteBuffer byteBuffer = null; private int currentPartFound = -1; // relative positions private boolean currentPartFoundIsUnicode = false; private long currentPosition = 0L; // absolute value, start of forward finds, end(exclusive) of backward finds private byte[] byteFindSequence = null; private boolean caseSensitive = true; private BinaryContent content = null; private boolean directionForward = true; private CharSequence literal = null; private int literalByteLength = -1; private Pattern pattern = null; private boolean stopSearching = false; /** * Create a finder object for a sequence of characters; uses unicode and ascii traversing * * @param literal the char sequence to find * @param aContent provider to be traversed */ public BinaryTextFinder(CharSequence literal, BinaryContent aContent) { this.literal = literal; initSearchUnicodeAscii(); content = aContent; bufferPosition = 0L; currentPosition = 0L; } /** * Create a finder object for a raw sequence of bytes * * @param sequence the byte sequence to find * @param aContent provider to be traversed */ public BinaryTextFinder(byte[] sequence, BinaryContent aContent) { initSearchHex(sequence); content = aContent; bufferPosition = 0L; currentPosition = 0L; } void findAllMatches() throws IOException { currentPartFound = findHexAsciiMatchInPart(); int currentPartFoundUnicode = findUnicodeMatchInPart(); currentPartFoundIsUnicode = false; if (currentPartFoundUnicode >= 0 && (currentPartFound < 0 || directionForward && currentPartFound > currentPartFoundUnicode || !directionForward && currentPartFound < currentPartFoundUnicode)) { currentPartFound = currentPartFoundUnicode; currentPartFoundIsUnicode = true; } } private int findHexAsciiMatchInPart() throws IOException { if (byteFindSequence == null) return -1; int start = 0; int inclusiveEnd = byteBuffer.limit() - byteFindSequence.length; if (!directionForward) { start = inclusiveEnd; inclusiveEnd = 0; } for (int i = start; directionForward && i <= inclusiveEnd || !directionForward && i >= inclusiveEnd; i += directionForward ? 1 : -1) { boolean matchesSoFar = true; for (int j = 0; j < byteFindSequence.length && matchesSoFar; ++j) { byte existing = byteBuffer.get(i + j); byte matcher = byteFindSequence[j]; if (existing != matcher) { if (caseSensitive || existing < 'A' || existing > 'z' || matcher < 'A' || matcher > 'z' || existing - matcher != 32 && matcher - existing != 32) matchesSoFar = false; } } if (matchesSoFar) { return i; } } return -1; } private int findUnicodeMatchInPart() throws IOException { if (pattern == null) return -1; int result = Integer.MAX_VALUE; if (!directionForward) { result = -1; } byteBuffer.order(ByteOrder.LITTLE_ENDIAN); Matcher matcher = pattern.matcher(byteBuffer.asCharBuffer()); for (int encoding = 0; encoding < 4; ++encoding) { while (matcher.find()) { int index = matcher.start() * 2 + (encoding >= 2 ? 1 : 0); if (directionForward && result > index || !directionForward && result < index) { result = index; } if (directionForward) { break; } } if (encoding == 0) { byteBuffer.order(ByteOrder.BIG_ENDIAN); } else if (encoding == 1 && byteBuffer.limit() > 0) { byteBuffer.position(1); } else if (encoding == 2) { byteBuffer.order(ByteOrder.LITTLE_ENDIAN); } matcher.reset(byteBuffer.asCharBuffer()); } if (result == Integer.MAX_VALUE) { result = -1; } return result; } long getContentLength() { if (content == null) { return 0L; } return content.length(); } /** * Get the next position and length of a matching literal * * @return an array with 2 elements, the first one a Long (position in the file), * and the second one an Integer (byte length of the matching literal), * or null if there are no matches */ public Number[] getNextMatch() throws IOException { stopSearching = false; populatePart(); findAllMatches(); while (currentPartFound < 0) { // end of part if (nextPart() == null || stopSearching) { stopSearching = false; return null; // end of file } findAllMatches(); } long resultPosition = bufferPosition + currentPartFound; int length = currentPartFoundIsUnicode ? literalByteLength : byteFindSequence.length; setNewStart(resultPosition + (directionForward ? 1 : length - 1)); return new Number[]{resultPosition, length}; } void initSearchHex(byte[] sequence) { byteFindSequence = sequence; if (sequence.length > MAX_SEQUENCE_SIZE) { byteFindSequence = new byte[MAX_SEQUENCE_SIZE]; System.arraycopy(sequence, 0, byteFindSequence, 0, MAX_SEQUENCE_SIZE); } literalByteLength = byteFindSequence.length; } /** * Get the current location being searched in the content. Approximate value. * * @return position in the content */ public long getSearchPosition() { return bufferPosition; } void initSearchUnicodeAscii() { StringBuilder regex = new StringBuilder("\\Q"); // everything-quoted regular expression if (literal.length() * 2 > MAX_SEQUENCE_SIZE) // 16 bit Unicode chars literal = literal.subSequence(0, MAX_SEQUENCE_SIZE / 2); literalByteLength = literal.length() * 2; boolean isAsciiCompatible = true; byte[] tmpBytes = new byte[literal.length()]; char previous = '\0'; for (int i = 0; i < literal.length(); ++i) { char aChar = literal.charAt(i); regex.append(aChar); if (previous == '\\' && aChar == 'E') regex.append("\\\\E\\Q"); previous = aChar; tmpBytes[i] = (byte) aChar; if (aChar > 255) isAsciiCompatible = false; } regex.append("\\E"); int ignoreCaseFlags = 0; if (!caseSensitive) ignoreCaseFlags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; pattern = Pattern.compile(regex.toString(), ignoreCaseFlags); if (isAsciiCompatible) byteFindSequence = tmpBytes; } ByteBuffer nextPart() throws IOException { long newPos = bufferPosition + byteBuffer.limit() - literalByteLength + 1L; if (!directionForward) newPos = bufferPosition - MAP_SIZE + literalByteLength - 1L; if (newPos < 0L) newPos = 0L; int size = (int) Math.min(MAP_SIZE, getContentLength() - newPos); if (!directionForward) size = (int) (bufferPosition + literalByteLength - 1L - newPos); if (size < literalByteLength) return null; bufferPosition = newPos; populatePart(size); return byteBuffer; } void populatePart() throws IOException { int size = MAP_SIZE; if (!directionForward) { size = (int) Math.min(MAP_SIZE, currentPosition); } populatePart(size); } void populatePart(int size) throws IOException { if (content == null) return; byteBuffer = null; // multiple FileChannel.read(byteBuffer) leak memory, so don't reuse buffer byteBuffer = ByteBuffer.allocate(MAP_SIZE); // if (byteBuffer == null) // byteBuffer = ByteBuffer.allocate(MAP_SIZE); byteBuffer.limit(size); byteBuffer.position(0); // try { content.get(byteBuffer, bufferPosition); // } catch (OutOfMemoryError e) { // byteBuffer = null; // byteBuffer = ByteBuffer.allocate(MAP_SIZE); // byteBuffer.limit(size); //byteBuffer.position(0); //content.get(byteBuffer, myCurrentPosition); //} byteBuffer.limit(byteBuffer.position()); byteBuffer.position(0); } /** * Sets the case sensitiveness. The default is always case sensitive (not ignore case) * * @param beSensitive set to true will not match 'a' with 'A' */ public void setCaseSensitive(boolean beSensitive) { if (caseSensitive == beSensitive) return; caseSensitive = beSensitive; if (literal != null) initSearchUnicodeAscii(); } /** * Sets the search direction. The default search direction is always forward * * @param goForward set to true for forward search */ public void setDirectionForward(boolean goForward) { directionForward = goForward; } /** * Sets new search start point in the file. Inclusive in forward finds, exclusive in backward ones. * * @param startPoint next match search will start from this point */ public void setNewStart(long startPoint) { if (startPoint < 0L || startPoint > getContentLength()) return; currentPosition = startPoint; bufferPosition = startPoint; if (!directionForward) { bufferPosition = startPoint - MAP_SIZE; } if (bufferPosition < 0L) bufferPosition = 0L; } /** * Stop searching. Long running searches can be stopped from another thread. */ public void stopSearching() { stopSearching = true; } }