/*******************************************************************************
* Copyright (c) 2007, 2008 IBM Corporation.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Robert Fuhrer (rfuhrer@watson.ibm.com) - initial API and implementation
* Stan Sutton (suttons@us.ibm.com) - maintenance of iterator
*******************************************************************************/
package org.eclipse.imp.parser;
import java.util.Iterator;
import lpg.runtime.IPrsStream;
import lpg.runtime.IToken;
import lpg.runtime.Monitor;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.imp.core.ErrorHandler;
import org.eclipse.imp.services.IAnnotationTypeInfo;
import org.eclipse.jface.text.IRegion;
/**
* Base class for an IParseController implementation that encapsulates a simple
* LPG-based scanner and parser.
*
* @author rfuhrer@watson.ibm.com
* @author Stan Sutton (suttons@us.ibm.com): rewrote token iterator
*/
public abstract class SimpleLPGParseController extends ParseControllerBase {
// private char fKeywords[][];
private boolean fIsKeyword[];
protected IParser fParser;
protected ILexer fLexer;
private ISourcePositionLocator fSourcePositionLocator;
private final SimpleAnnotationTypeInfo fSimpleAnnotationTypeInfo= new SimpleAnnotationTypeInfo();
/**
* An adapter from an Eclipse IProgressMonitor to an LPG Monitor
*/
protected class PMMonitor implements Monitor {
private IProgressMonitor monitor;
private boolean wasCancelled= false;
public PMMonitor(IProgressMonitor monitor) {
this.monitor= monitor;
}
public boolean isCancelled() {
if (!wasCancelled)
wasCancelled= monitor.isCanceled();
return wasCancelled;
}
public void setMonitor(IProgressMonitor monitor) {
this.monitor= monitor;
}
}
/**
* Note: the derived class constructor should instantiate the lexer and parser
* and store them in fLexer/fParser.
*/
public SimpleLPGParseController(String languageID) {
super(languageID);
}
public IParser getParser() {
return fParser;
}
public ILexer getLexer() {
return fLexer;
}
public ISourcePositionLocator getSourcePositionLocator() {
if (fSourcePositionLocator == null) {
fSourcePositionLocator= new LPGSourcePositionLocator(this);
}
return fSourcePositionLocator;
}
public Object parse(String contents, IProgressMonitor monitor) {
PMMonitor my_monitor = new PMMonitor(monitor);
char[] contentsArray = contents.toCharArray();
fLexer.reset(contentsArray, (fFilePath != null ? fFilePath.toPortableString() : null));
fParser.reset(fLexer.getILexStream());
fParser.getIPrsStream().setMessageHandler(new MessageHandlerAdapter(handler));
// RMF 1 Mar 2010: Don't do any resource-related operations, like clearing markers: what we're parsing may not come from a resource.
fLexer.lexer(my_monitor, fParser.getIPrsStream()); // Lex the stream to produce the token stream
if (my_monitor.isCancelled())
return fCurrentAst; // TODO currentAst might (probably will) be inconsistent wrt the lex stream now
fCurrentAst = fParser.parser(my_monitor, 0);
cacheKeywordsOnce(); // better place/time to do this?
return fCurrentAst;
}
public Iterator<IToken> getTokenIterator(final IRegion region) {
final int regionOffset= region.getOffset();
final int regionLength= region.getLength();
final int regionEnd= regionOffset + regionLength - 1;
return new Iterator<IToken>() {
final IPrsStream stream= SimpleLPGParseController.this.getParser().getIPrsStream();
final int firstTokIdx= getTokenIndexAtCharacter(regionOffset);
final int lastTokIdx;
{
int endIdx= getTokenIndexAtCharacter(regionEnd);
char[] streamChars= stream.getInputChars();
int streamLen= streamChars.length;
try {
if (regionEnd >= 1 && regionEnd < streamLen
&& streamChars[regionEnd] == IToken.EOF) {
// skip EOF token (assume LPG puts one at end of input
// character stream, since it does)
endIdx--;
}
} catch (ArrayIndexOutOfBoundsException e) {
ErrorHandler.logError("SimpleLPGParseController.getTokenIterator(IRegion): error initializing lastTokIdx",
e);
// System.err.println("getTokenIterator: new Iterator(..)<init>: ArrayIndexOutOfBoundsException");
// System.err.println(" regionEnd = " + regionEnd + ", endIdx = " + endIdx + ", streamLen = " + streamLen + ",
// inputChars.length = " + streamChars.length);
}
lastTokIdx= endIdx;
}
int curTokIdx= Math.max(1, firstTokIdx); // skip bogus initial token
private int getTokenIndexAtCharacter(int offset) {
int result= stream.getTokenIndexAtCharacter(offset);
// getTokenIndexAtCharacter() answers the negative of the index of the
// preceding token if the given offset is not actually within a token.
if (result < 0) {
result= -result + 1;
}
// The above may leave result set to a value that is one more than the
// last token index, so return the last token index if that's the case
// (This can happen if the end of the file contains some text that
// does not correspond to a token--e.g., if the text represents an adjunct
// or something unrecognized)
if (result >= stream.getTokens().size())
result= stream.getTokens().size() - 1;
return result;
}
// The following declarations cover the whole input stream, which
// may be a proper superset of the range of the given region.
// For now, that's a simple way to collect the information, and
// most often the given region corresponds to the whole input anyway.
// In any case, iteration is based on the range of the given region.
// The preceding adjuncts for each token
IToken[][] precedingAdjuncts= new IToken[lastTokIdx + 1][];
{
stream.setStreamLength();
for(int i= 0; i < precedingAdjuncts.length; i++) {
precedingAdjuncts[i]= stream.getPrecedingAdjuncts(i);
}
}
// The current indices for each array of preceding adjuncts
int[] nextPrecedingAdjunct= new int[lastTokIdx + 1];
{
for(int i= 0; i < nextPrecedingAdjunct.length; i++) {
if (precedingAdjuncts[i].length == 0)
nextPrecedingAdjunct[i]= -1;
else
nextPrecedingAdjunct[i]= 0;
}
}
// The following adjuncts (for the last token only)
IToken[] followingAdjuncts;
{
if (lastTokIdx <= 0)
followingAdjuncts= new IToken[0];
else
followingAdjuncts= stream.getFollowingAdjuncts(lastTokIdx);
}
// The current index for the array of following adjuncts
int nextFollowingAdjunct;
{
if (followingAdjuncts.length == 0)
nextFollowingAdjunct= -1;
else
nextFollowingAdjunct= 0;
}
// To support hasNext(); initial values may be reset if appropriate
private boolean finalTokenReturned= regionEnd < 1 || lastTokIdx <= 0;
private boolean finalAdjunctsReturned= !(followingAdjuncts.length > 0);
/**
* Tests whether the iterator has any unreturned tokens. These may
* include "regular" tokens and "adjunct" tokens (e.g., representing
* comments).
*
* @return True if there is another token available, false otherwise
*/
public boolean hasNext() {
return !(finalTokenReturned && finalAdjunctsReturned);
}
/**
* Returns the next available token in the iterator (or null if
* there is none)
*
* Will return a valid token under conditions that would cause
* hasNext() to to return true; conversely, will return null under
* conditions that would cause hasNext() to return false.
*
* As a side effect, updates the flags that are used to compute the
* value returned by hasNext().
*
* The returned token may be a "regular" token (which will have a
* corresponding AST node) or an "adjunct" token (which will
* represent a comment). The tokens are returned in the order in
* which they occur in the text, regardless of their kind.
*
*/
public IToken next() {
int next= -1; // for convenience
// If we're not all the way through the tokens
if (curTokIdx <= lastTokIdx) {
// First check for any remaining preceding adjuncts
// of the current token
next= nextPrecedingAdjunct[curTokIdx];
// If the current token has any unreturned preceding
// adjuncts
if (next >= 0 && next < precedingAdjuncts[curTokIdx].length) {
// Return the next preceding adjunct, incrementing the
// adjunct index afterwards
return precedingAdjuncts[curTokIdx][nextPrecedingAdjunct[curTokIdx]++];
}
// Flag whether the current token is the last one
finalTokenReturned= curTokIdx >= lastTokIdx;
// Return the current token, incrementing the token index
// afterwards
return stream.getIToken(curTokIdx++);
}
// If there are any adjuncts following the last token
if (nextFollowingAdjunct >= 0 && nextFollowingAdjunct < followingAdjuncts.length) {
// Flag whether the current adjunct is the last one
finalAdjunctsReturned= (nextFollowingAdjunct + 1) >= followingAdjuncts.length;
// Return the current adjunct, incrementing the adjunct
// index afterwards
return followingAdjuncts[nextFollowingAdjunct++];
}
return null;
}
public void remove() {
throw new IllegalArgumentException("Unimplemented");
}
};
}
public IAnnotationTypeInfo getAnnotationTypeInfo() {
return fSimpleAnnotationTypeInfo;
}
public boolean isKeyword(int kind) {
return kind >= 0 && kind < fIsKeyword.length && fIsKeyword[kind];
}
protected void cacheKeywordsOnce() {
if (fIsKeyword == null) {
IParser parser= getParser();
String tokenKindNames[]= parser.orderedTerminalSymbols();
fIsKeyword= new boolean[tokenKindNames.length];
// fKeywords= new char[tokenKindNames.length][];
int[] keywordKinds= getLexer().getKeywordKinds();
for(int i= 1; i < keywordKinds.length; i++) {
int index= parser.getIPrsStream().mapKind(keywordKinds[i]);
fIsKeyword[index]= true;
// fKeywords[index]= parser.orderedTerminalSymbols()[index].toCharArray();
}
}
}
}