EscapePreprocessor.java example

Explorer
intellij-community-master
/*
 * Copyright 2007 Sascha Weinreuter
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.intellij.plugins.relaxNG.compact.lexer;

import gnu.trove.TIntArrayList;
import gnu.trove.TIntIntHashMap;
import org.kohsuke.rngom.util.Utf16;

import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;

/**
 * A reader that deals with escape sequences in RNC files (\x{xx}) and keeps track of their positions to build correct
 * token ranges in the lexer.
 * <p/>
 * Created by IntelliJ IDEA.
 * User: sweinreuter
 * Date: 05.08.2007
 */
class EscapePreprocessor extends FilterReader {
  private final TIntArrayList myQueuedChars;
  private final TIntIntHashMap myLengthMap;

  private int myOffset;

  public EscapePreprocessor(Reader reader, int startOffset, TIntIntHashMap map) {
    super(reader);
    myOffset = startOffset;
    myQueuedChars = new TIntArrayList();
    myLengthMap = map;
  }

  @Override
  public int read(char cbuf[], int off, int len) throws IOException {
    final int i = read();
    if (i == -1) {
      return -1;
    }
    cbuf[off] = (char)i; // not really efficient, but acceptable since we're usually not having to deal with megabytes of RNC files...
    return 1;
  }

  @Override
  public int read() throws IOException {
    if (myQueuedChars.size() > 0) {
      return consume();
    }
    final int i = super.read();
    if (i == -1) {
      return -1;
    }
    myOffset++;

    switch (i) {
      case '\r':
        assert false : "Unexpected newline character";  // IDEA document's are supposed to newlines normalized to \n
        if (peek() == '\n') {
          consume();
          myLengthMap.put(myOffset - 1, 2);
        }
      case '\n':
        return '\u0000';

      case '\\':
        int n = 0;
        int x;
        while ((x = peek()) == 'x') {
          n++;
        }
        if (n > 0 && x == '{') {
          n++;
        } else {
          return i;
        }
        int value = 0;
        while (isHexChar((char)(x = peek()))) {
          n++;
          value <<= 4;
          value |= Character.digit(x, 16);
        }
        if (x == '}') {
          n++;
        }
        consume(n);

        myLengthMap.put(myOffset - 1, n);
        myOffset += n;

        if (value <= 0xFFFF) {
          return value;
        }

        myQueuedChars.add(Utf16.surrogate2(value));
        return Utf16.surrogate1(value);
    }

    return i;
  }

  private static boolean isHexChar(char i) {
    if (Character.isDigit(i)) return true;
    final char c = Character.toLowerCase(i);
    return c >= 'a' && c <= 'f';
  }

  private int consume() {
    if (myQueuedChars.size() > 0) {
      myOffset++;
      return myQueuedChars.remove(0);
    }
    return -1;
  }

  private void consume(int n) {
    myQueuedChars.remove(0, n);
  }

  private int peek() throws IOException {
    final int i = super.read();
    if (i == -1) {
      return -1;
    }
    myQueuedChars.add(i);
    return i;
  }
}