/* RleSegmenter.java (c) 2012 Edward Swartz All rights reserved. This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html */ package ejs.base.utils; import java.util.Iterator; /** * @author ejs * */ public class RleSegmenter implements Iterable<RleSegmenter.Segment> { public static class Segment { private boolean repeat; private int offset; private int length; public Segment(int offset, int length) { this.offset = offset; this.length = length; } public Segment(int offset, int length, byte repeatByte) { this.offset = offset; this.length = length; this.repeat = true; } public boolean isRepeat() { return repeat; } public int getOffset() { return offset; } public int getLength() { return length; } } private final byte[] data; private final int offset; private final int end; private final int threshold; /** * @param threshold minimum number of repeats * @param data * @param offset * @param length */ public RleSegmenter(int threshold, byte[] data, int offset, int length) { this.threshold = threshold; this.data = data; this.offset = offset; this.end = offset + length; } /* (non-Javadoc) * @see java.lang.Iterable#iterator() */ @Override public Iterator<Segment> iterator() { return new Iterator<RleSegmenter.Segment>() { int idx = offset; @Override public boolean hasNext() { return idx < end; } @Override public Segment next() { int origIdx = idx; boolean isRepeat = false; if (idx + 1 < end && data[idx] == data[idx + 1]) { // possible repeat isRepeat = true; idx++; while (idx < end && data[origIdx] == data[idx]) { idx++; } if (idx - origIdx < threshold) { // oh, guess not isRepeat = false; } } if (!isRepeat) { // look for non-repeating data int repeatIdx = -1; while (idx < end) { if (idx + 1 < end && data[idx] == data[idx + 1]) { if (repeatIdx < 0) { repeatIdx = idx; } } else { if (repeatIdx >= 0) { // tracking a possible repeat? if (idx + 1 - repeatIdx >= threshold) { // ok, that'll work -- get it next time idx = repeatIdx; break; } else { repeatIdx = -1; } } } idx++; } } if (!isRepeat) { return new Segment(origIdx, idx - origIdx); } else { return new Segment(origIdx, idx - origIdx, data[origIdx]); } } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }