BinaryTextFinder.java example

Explorer
dbeaver-master
- modules
  - org.jkiss.utils
    - src
      - org
        jkiss
        code
        NotNull.java
        Nullable.java
        utils
        ArgumentTokenizer.java
        ArrayUtils.java
        Base64.java
        BeanUtils.java
        CommonUtils.java
        IOUtils.java
        IntKeyMap.java
        LongKeyMap.java
        MimeType.java
        Pair.java
        SecurityUtils.java
        StandardConstants.java
        time
        ExtendedDateFormat.java
        xml
        SAXListener.java
        SAXReader.java
        XMLBuilder.java
        XMLConstants.java
        XMLException.java
        XMLUtils.java
  - org.jkiss.wmi
    - src
      - java
        org
        jkiss
        wmi
        service
        WMIConstants.java
        WMIDataType.java
        WMIException.java
        WMIObject.java
        WMIObjectAttribute.java
        WMIObjectElement.java
        WMIObjectMethod.java
        WMIObjectSink.java
        WMIObjectSinkStatus.java
        WMIQualifiedObject.java
        WMIQualifier.java
        WMIService.java
        WMISinkStatus.java
        test
        TestService.java
- plugins
/*
 * DBeaver - Universal Database Manager
 * Copyright (C) 2010-2017 Serge Rider (serge@jkiss.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.jkiss.dbeaver.ui.editors.binary;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * Find helper class to find binary and string literals in files.
 * Given a literal, finds its position in the file. It is possible to get subsequent finds.
 * The search is either binary or text based. Text based search uses standard java unicode (all of big
 * and little endian, odd and even address) plus ascii when the literal falls within ascii char limits.
 *
 * @author Jordi
 */
public class BinaryTextFinder {


    public static final int MAP_SIZE = 64 * 1024;
    public static final int MAX_SEQUENCE_SIZE = 2 * 1024;  // a search string of 2K should be enough

    private long bufferPosition = -1L;
    private ByteBuffer byteBuffer = null;
    private int currentPartFound = -1;  // relative positions
    private boolean currentPartFoundIsUnicode = false;
    private long currentPosition = 0L;  // absolute value, start of forward finds, end(exclusive) of backward finds
    private byte[] byteFindSequence = null;
    private boolean caseSensitive = true;
    private BinaryContent content = null;
    private boolean directionForward = true;
    private CharSequence literal = null;
    private int literalByteLength = -1;
    private Pattern pattern = null;
    private boolean stopSearching = false;


    /**
     * Create a finder object for a sequence of characters; uses unicode and ascii traversing
     *
     * @param literal the char sequence to find
     * @param aContent provider to be traversed
     */
    public BinaryTextFinder(CharSequence literal, BinaryContent aContent)
    {
        this.literal = literal;
        initSearchUnicodeAscii();
        content = aContent;
        bufferPosition = 0L;
        currentPosition = 0L;
    }


    /**
     * Create a finder object for a raw sequence of bytes
     *
     * @param sequence the byte sequence to find
     * @param aContent  provider to be traversed
     */
    public BinaryTextFinder(byte[] sequence, BinaryContent aContent)
    {
        initSearchHex(sequence);
        content = aContent;
        bufferPosition = 0L;
        currentPosition = 0L;
    }


    void findAllMatches()
        throws IOException
    {
        currentPartFound = findHexAsciiMatchInPart();
        int currentPartFoundUnicode = findUnicodeMatchInPart();
        currentPartFoundIsUnicode = false;

        if (currentPartFoundUnicode >= 0 && (currentPartFound < 0 ||
            directionForward && currentPartFound > currentPartFoundUnicode ||
            !directionForward && currentPartFound < currentPartFoundUnicode)) {
            currentPartFound = currentPartFoundUnicode;
            currentPartFoundIsUnicode = true;
        }
    }


    private int findHexAsciiMatchInPart()
        throws IOException
    {
        if (byteFindSequence == null) return -1;

        int start = 0;
        int inclusiveEnd = byteBuffer.limit() - byteFindSequence.length;
        if (!directionForward) {
            start = inclusiveEnd;
            inclusiveEnd = 0;
        }

        for (int i = start;
            directionForward && i <= inclusiveEnd || !directionForward && i >= inclusiveEnd;
            i += directionForward ? 1 : -1)
        {
            boolean matchesSoFar = true;
            for (int j = 0; j < byteFindSequence.length && matchesSoFar; ++j) {
                byte existing = byteBuffer.get(i + j);
                byte matcher = byteFindSequence[j];
                if (existing != matcher) {
                    if (caseSensitive || existing < 'A' || existing > 'z' || matcher < 'A' ||
                        matcher > 'z' || existing - matcher != 32 && matcher - existing != 32)
                        matchesSoFar = false;
                }
            }
            if (matchesSoFar) {
                return i;
            }
        }

        return -1;
    }


    private int findUnicodeMatchInPart()
        throws IOException
    {
        if (pattern == null) return -1;

        int result = Integer.MAX_VALUE;
        if (!directionForward) {
            result = -1;
        }
        byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
        Matcher matcher = pattern.matcher(byteBuffer.asCharBuffer());

        for (int encoding = 0; encoding < 4; ++encoding) {
            while (matcher.find()) {
                int index = matcher.start() * 2 + (encoding >= 2 ? 1 : 0);
                if (directionForward && result > index || !directionForward && result < index) {
                    result = index;
                }
                if (directionForward) {
                    break;
                }
            }
            if (encoding == 0) {
                byteBuffer.order(ByteOrder.BIG_ENDIAN);
            } else if (encoding == 1 && byteBuffer.limit() > 0) {
                byteBuffer.position(1);
            } else if (encoding == 2) {
                byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
            }
            matcher.reset(byteBuffer.asCharBuffer());
        }
        if (result == Integer.MAX_VALUE) {
            result = -1;
        }

        return result;
    }


    long getContentLength()
    {
        if (content == null) {
            return 0L;
        }

        return content.length();
    }


    /**
     * Get the next position and length of a matching literal
     *
     * @return an array with 2 elements, the first one a Long (position in the file),
     *         and the second one an Integer (byte length of the matching literal),
     *         or null if there are no matches
     */
    public Number[] getNextMatch()
        throws IOException
    {
        stopSearching = false;
        populatePart();
        findAllMatches();

        while (currentPartFound < 0) { // end of part
            if (nextPart() == null || stopSearching) {
                stopSearching = false;
                return null;  // end of file
            }
            findAllMatches();
        }

        long resultPosition = bufferPosition + currentPartFound;
        int length = currentPartFoundIsUnicode ? literalByteLength : byteFindSequence.length;
        setNewStart(resultPosition + (directionForward ? 1 : length - 1));

        return new Number[]{resultPosition, length};
    }


    void initSearchHex(byte[] sequence)
    {
        byteFindSequence = sequence;

        if (sequence.length > MAX_SEQUENCE_SIZE) {
            byteFindSequence = new byte[MAX_SEQUENCE_SIZE];
            System.arraycopy(sequence, 0, byteFindSequence, 0, MAX_SEQUENCE_SIZE);
        }

        literalByteLength = byteFindSequence.length;
    }


    /**
     * Get the current location being searched in the content. Approximate value.
     *
     * @return position in the content
     */
    public long getSearchPosition()
    {
        return bufferPosition;
    }


    void initSearchUnicodeAscii()
    {
        StringBuilder regex = new StringBuilder("\\Q");  // everything-quoted regular expression

        if (literal.length() * 2 > MAX_SEQUENCE_SIZE)  // 16 bit Unicode chars
            literal = literal.subSequence(0, MAX_SEQUENCE_SIZE / 2);
        literalByteLength = literal.length() * 2;

        boolean isAsciiCompatible = true;
        byte[] tmpBytes = new byte[literal.length()];
        char previous = '\0';
        for (int i = 0; i < literal.length(); ++i) {
            char aChar = literal.charAt(i);
            regex.append(aChar);

            if (previous == '\\' && aChar == 'E')
                regex.append("\\\\E\\Q");

            previous = aChar;

            tmpBytes[i] = (byte) aChar;
            if (aChar > 255) isAsciiCompatible = false;
        }
        regex.append("\\E");

        int ignoreCaseFlags = 0;
        if (!caseSensitive) ignoreCaseFlags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
        pattern = Pattern.compile(regex.toString(), ignoreCaseFlags);

        if (isAsciiCompatible)
            byteFindSequence = tmpBytes;
    }


    ByteBuffer nextPart()
        throws IOException
    {
        long newPos = bufferPosition + byteBuffer.limit() - literalByteLength + 1L;
        if (!directionForward)
            newPos = bufferPosition - MAP_SIZE + literalByteLength - 1L;
        if (newPos < 0L)
            newPos = 0L;

        int size = (int) Math.min(MAP_SIZE, getContentLength() - newPos);
        if (!directionForward)
            size = (int) (bufferPosition + literalByteLength - 1L - newPos);

        if (size < literalByteLength)
            return null;
        bufferPosition = newPos;
        populatePart(size);

        return byteBuffer;
    }


    void populatePart()
        throws IOException
    {
        int size = MAP_SIZE;
        if (!directionForward) {
            size = (int) Math.min(MAP_SIZE, currentPosition);
        }
        populatePart(size);
    }


    void populatePart(int size)
        throws IOException
    {
        if (content == null) return;

        byteBuffer = null;  // multiple FileChannel.read(byteBuffer) leak memory, so don't reuse buffer
        byteBuffer = ByteBuffer.allocate(MAP_SIZE);
//	if (byteBuffer == null)
//		byteBuffer = ByteBuffer.allocate(MAP_SIZE);
        byteBuffer.limit(size);
        byteBuffer.position(0);
//	try {
        content.get(byteBuffer, bufferPosition);
//	} catch (OutOfMemoryError e) {
//		byteBuffer = null;
//		byteBuffer = ByteBuffer.allocate(MAP_SIZE);
        //	byteBuffer.limit(size);
        //byteBuffer.position(0);
        //content.get(byteBuffer, myCurrentPosition);
        //}
        byteBuffer.limit(byteBuffer.position());
        byteBuffer.position(0);
    }


    /**
     * Sets the case sensitiveness. The default is always case sensitive (not ignore case)
     *
     * @param beSensitive set to true will not match 'a' with 'A'
     */
    public void setCaseSensitive(boolean beSensitive)
    {
        if (caseSensitive == beSensitive) return;

        caseSensitive = beSensitive;
        if (literal != null)
            initSearchUnicodeAscii();
    }


    /**
     * Sets the search direction. The default search direction is always forward
     *
     * @param goForward set to true for forward search
     */
    public void setDirectionForward(boolean goForward)
    {
        directionForward = goForward;
    }


    /**
     * Sets new search start point in the file. Inclusive in forward finds, exclusive in backward ones.
     *
     * @param startPoint next match search will start from this point
     */
    public void setNewStart(long startPoint)
    {
        if (startPoint < 0L || startPoint > getContentLength())
            return;

        currentPosition = startPoint;
        bufferPosition = startPoint;
        if (!directionForward) {
            bufferPosition = startPoint - MAP_SIZE;
        }
        if (bufferPosition < 0L)
            bufferPosition = 0L;
    }


    /**
     * Stop searching. Long running searches can be stopped from another thread.
     */
    public void stopSearching()
    {
        stopSearching = true;
    }
}