Offsets.java example

Explorer
lux-master
- src
package lux.xml;

import java.util.Arrays;

import lux.index.analysis.XmlTextTokenStream;

/**
 * This class stores the offsets of text nodes and character entities in serialized XML.  It is 
 * populated by {@link SaxonDocBuilder} and consumed by {@link XmlTextTokenStream}.
 */
public final class Offsets {
    
    private int iOffset;
    private int iDelta;
    private int[] textOffsets;
    private int[] deltaLocations;
    private short[] deltas;
    
    public Offsets () {
        textOffsets = new int[1024];
        deltas = new short[1024];
        deltaLocations = new int[1024];
        reset ();
    }
    
    public void reset () {
        iOffset = iDelta = 0;
    }
    
    // store the character offsets of every character reference or entity in the document text,
    // along with the difference between the length of the entity reference and its replacement text.
    // eg, for an & appearing at position 100, we would store (100, 4), since len('&')=5, and len ('&')=1.

    public void addDelta(int deltaLocation, short delta) {
        if (iDelta >= deltas.length) {
            deltas = Arrays.copyOf(deltas, deltas.length + 1024);
            deltaLocations = Arrays.copyOf(deltaLocations, deltaLocations.length + 1024);
        }
        deltaLocations[iDelta] = deltaLocation;
        deltas[iDelta++] = delta;
    }

    // store the character offsets of all of the text nodes in the document: According to StAX javadocs,
    // these will either be bytes or they will be characters, depending on whether the parser
    // was fed a byte stream or a character stream!  However in practice we seem to get character
    // offsets in both cases??
    
    public void addOffset(int characterOffset) {
        // StAX documentation claims this may be a byte offset when fed a byte stream, but
        // that doesn't seem to be the case?
        if (iOffset >= textOffsets.length) {
            textOffsets = Arrays.copyOf(textOffsets, textOffsets.length + 1024);
        }
        textOffsets[iOffset++] = characterOffset;
    }
    
    /**
     * @param i the index of the text node
     * @return the character location in the input character stream of i'th text node.
     */
    public int getTextLocation (int i) {
        return textOffsets[i];
    }
    
    /** 
     * A delta is stored whenever the number of characters in the output token is not the same
     * as the number in the input character stream.
     * @param i the index of the delta
     * @return the character location in the input character stream of the i'th delta.
     */
    public int getDeltaLocation (int i) {
        return deltaLocations[i];
    }

    /**
     * @return the number of deltas found in the input stream
     */
    public int getDeltaCount() {
        return iDelta;
    }

    /**
     * @param i the index of the delta
     * @return the value of the i'th delta
     */
    public int getDelta(int i) {
        return deltas[i];
    }
    
}

/*
 * This Source Code Form is subject to the terms of the Mozilla Public License,
 * v. 2.0. If a copy of the MPL was not distributed with this file, You can
 * obtain one at http://mozilla.org/MPL/2.0/.
 */