/** * Copyright 2004-2016 Riccardo Solmi. All rights reserved. * This file is part of the Whole Platform. * * The Whole Platform is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The Whole Platform is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the Whole Platform. If not, see <http://www.gnu.org/licenses/>. */ package org.whole.lang.parsers; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.whole.lang.util.StringUtils; /** * @author Riccardo Solmi */ public class Lexer implements Iterator<String> { protected CharSequence source; protected int position; protected int horizonPosition = -1; protected Matcher matcher; protected Pattern tokenPattern; protected Pattern delimPattern; protected boolean delimited = false; protected String lookahead; protected int lookaheadPosition = -1; public static Lexer createLexer(CharSequence source) { return new Lexer(source); } public Lexer(CharSequence source) { tokenPattern = Pattern.compile("(?s).*"); matcher = tokenPattern.matcher(this.source = source); } public Lexer withPattern(Pattern pattern) { matcher.usePattern(tokenPattern = pattern); lookahead = null; lookaheadPosition = -1; return this; } public Lexer withDelimiter(Pattern pattern) { delimPattern = pattern; lookahead = null; lookaheadPosition = -1; return this; } public boolean hitEnd() { int endPosition = position; if (delimited) { matcher.usePattern(delimPattern); if (!matcher.region(position, getEndPosition()).lookingAt()) return false; else endPosition = matcher.end(); } return endPosition == getEndPosition(); } public static class Memento { public final int position; public final int horizonPosition; public Memento(int position, int horizonPosition) { this.position = position; this.horizonPosition = horizonPosition; } @Override public boolean equals(Object obj) { if (!(obj instanceof Memento)) return false; Memento other = (Memento) obj; return position == other.position && horizonPosition == other.horizonPosition; } } public boolean isSame(Memento memento) { return position == memento.position && horizonPosition == memento.horizonPosition; } public Memento mark() { return new Memento(position, horizonPosition); } public void reset(Memento memento) { position = memento.position; horizonPosition = memento.horizonPosition; lookahead = null; lookaheadPosition = -1; } // public boolean find() { // return matcher.region(position, getEndPosition()).find(); // } public String lookahead() { if (lookahead != null) return lookahead; try { if (delimPattern != null) { matcher.usePattern(delimPattern); if (!matcher.region(position, getEndPosition()).lookingAt()) return null; int tokenStart = matcher.end(); matcher.region(tokenStart, getEndPosition()); boolean foundNextDelim = matcher.find(); if (!foundNextDelim) return null; if (matcher.end() == tokenStart) { if (!matcher.usePattern(tokenPattern).lookingAt()) return null; } else { int tokenEnd = matcher.start(); if (!matcher.usePattern(tokenPattern).region(tokenStart, tokenEnd) .matches()) return null; } delimited = true; } else if (!matcher.usePattern(tokenPattern).region(position, getEndPosition()).lookingAt()) return null; } catch (Exception e) { return null; } lookaheadPosition = matcher.end(); for (int g=1, groupCount = matcher.groupCount(); g<=groupCount; g++) { String group = matcher.group(g); if (group != null) return group; } return lookahead = matcher.group(); } public boolean hasNext() { return lookahead() != null; } public String next() { String token = lookahead(); if (token == null) throw new NoSuchElementException(); position = lookaheadPosition; lookahead = null; lookaheadPosition = -1; return token; } protected int findHorizonPosition(int startPosition, Pattern pattern) { return findHorizonPosition(startPosition, horizonPosition ,pattern); } protected int findHorizonPosition(int startPosition, int endPosition ,Pattern pattern) { matcher.usePattern(pattern); if (!matcher.region(startPosition, getEndPosition(endPosition)).find()) throw new ParseException(this, "Unable to find the new horizon position"); final int groupCount = matcher.groupCount(); for (int g=1; g<=groupCount; g++) { String group = matcher.group(g); if (group != null) return matcher.start(g); } return matcher.start(); } public int getHorizonPosition() { return horizonPosition; } public int setHorizonPosition(int position, boolean force) { if (!force && position > getEndPosition()) throw new ParseException(this, "The new horizon position is over the current one"); int oldHorizonPosition = horizonPosition; horizonPosition = position; lookahead = null; lookaheadPosition = -1; return oldHorizonPosition; } public int setHorizonBySize(int length) { return setHorizonPosition(position + length, false); } public int setHorizonByDelimiter(Pattern pattern) { return setHorizonPosition(findHorizonPosition(position, pattern), false); } public int moveHorizonByDelimiter(Pattern pattern, int endPosition) { return setHorizonPosition(findHorizonPosition(horizonPosition+1, endPosition, pattern), false); } public int setHorizonByLines(int lines) { if (lines <= 0) throw new ParseException(this, "bad horizon lines number"); int position = this.position; for (int i=0; i<lines; i++) position = findHorizonPosition(position, StringUtils.EOL_PATTERN); return setHorizonPosition(position, false); } public void unsetHorizon() { horizonPosition = -1; lookahead = null; lookaheadPosition = -1; } // public int getPosition() { // return position; // } protected int getEndPosition() { return getEndPosition(horizonPosition); } protected int getEndPosition(int endPosition) { return endPosition == -1 ? source.length() : endPosition; } private static final int CHARS_TO_PRINT = 30; public String toString() { int start = Math.max(0, position-CHARS_TO_PRINT); int end = Math.min(source.length(), position+CHARS_TO_PRINT); StringBuilder sb = new StringBuilder(); if (start < end) { sb.append("after input '[...]"); sb.append(StringUtils.escapeString( source.subSequence(start, position).toString(), false, true)); sb.append("' got, at offset "); sb.append(position); sb.append(", '"); sb.append(StringUtils.escapeString(source.subSequence(position, end).toString(), false, true)); sb.append("[...]'"); } else { sb.append("got '' at offset "); sb.append(position); } return sb.toString(); } }