package org.rascalmpl.unicode;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;
/**
* Wraps a reader counting first a number (offset) of unicode characters before it starts reading
* and then only reads a number of unicode characters (length) before it returns end-of-stream.
* When length is -1, the reader reads until the end of the stream.
*
* Counting unicode characters is
* interesting because of the utf16 encoding Java has in memory for unicode.
*/
public class UnicodeOffsetLengthReader extends FilterReader {
private int charsRead;
private int offset;
private int length;
public UnicodeOffsetLengthReader(Reader in, int offset, int len) {
super(in);
this.offset = offset;
this.length = len;
}
private void offset() throws IOException {
if (offset > 0) {
char[] buf = new char[8096];
while (offset > 0) {
int res = in.read(buf, 0, Math.min(offset, buf.length));
if (res == -1) {
offset = 0;
return;
}
offset -= res; // may be not enough due to surrogate pairs
for (int i = 0; i < res; i++) {
if (Character.isHighSurrogate(buf[i])) {
offset++; // correct for earlier subtraction
}
}
}
}
}
@Override
public int read() throws IOException {
offset();
if (length != -1 && charsRead >= length) {
return -1;
}
int res = super.read();
if (res != -1 && !Character.isHighSurrogate((char) res)) {
charsRead++;
}
return res;
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
offset();
if (length != -1 && charsRead >= length) {
// we are at the end already
return -1;
}
// just get what we can, we will cut the result below
int res = super.read(cbuf, off, len);
if (res == 0) {
return res; // unlikely corner case
}
if (res != -1) {
// now cut off the result
int count = 0;
for (int i = 0; i < res; i++) {
count++;
if (!Character.isHighSurrogate(cbuf[i])) {
charsRead++;
if (length != -1 && charsRead >= length) {
// done
return count;
}
}
}
return count;
}
return res;
}
}