/* * Copyright 2007 Sascha Weinreuter * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.intellij.plugins.relaxNG.compact.lexer; import gnu.trove.TIntArrayList; import gnu.trove.TIntIntHashMap; import org.kohsuke.rngom.util.Utf16; import java.io.FilterReader; import java.io.IOException; import java.io.Reader; /** * A reader that deals with escape sequences in RNC files (\x{xx}) and keeps track of their positions to build correct * token ranges in the lexer. * <p/> * Created by IntelliJ IDEA. * User: sweinreuter * Date: 05.08.2007 */ class EscapePreprocessor extends FilterReader { private final TIntArrayList myQueuedChars; private final TIntIntHashMap myLengthMap; private int myOffset; public EscapePreprocessor(Reader reader, int startOffset, TIntIntHashMap map) { super(reader); myOffset = startOffset; myQueuedChars = new TIntArrayList(); myLengthMap = map; } @Override public int read(char cbuf[], int off, int len) throws IOException { final int i = read(); if (i == -1) { return -1; } cbuf[off] = (char)i; // not really efficient, but acceptable since we're usually not having to deal with megabytes of RNC files... return 1; } @Override public int read() throws IOException { if (myQueuedChars.size() > 0) { return consume(); } final int i = super.read(); if (i == -1) { return -1; } myOffset++; switch (i) { case '\r': assert false : "Unexpected newline character"; // IDEA document's are supposed to newlines normalized to \n if (peek() == '\n') { consume(); myLengthMap.put(myOffset - 1, 2); } case '\n': return '\u0000'; case '\\': int n = 0; int x; while ((x = peek()) == 'x') { n++; } if (n > 0 && x == '{') { n++; } else { return i; } int value = 0; while (isHexChar((char)(x = peek()))) { n++; value <<= 4; value |= Character.digit(x, 16); } if (x == '}') { n++; } consume(n); myLengthMap.put(myOffset - 1, n); myOffset += n; if (value <= 0xFFFF) { return value; } myQueuedChars.add(Utf16.surrogate2(value)); return Utf16.surrogate1(value); } return i; } private static boolean isHexChar(char i) { if (Character.isDigit(i)) return true; final char c = Character.toLowerCase(i); return c >= 'a' && c <= 'f'; } private int consume() { if (myQueuedChars.size() > 0) { myOffset++; return myQueuedChars.remove(0); } return -1; } private void consume(int n) { myQueuedChars.remove(0, n); } private int peek() throws IOException { final int i = super.read(); if (i == -1) { return -1; } myQueuedChars.add(i); return i; } }