/*
* Copyright (c) 2012, the Dart project authors.
*
* Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.eclipse.org/legal/epl-v10.html
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.dart.tools.ui.web.xml;
import com.google.dart.tools.ui.web.utils.AnyWordRule;
import com.google.dart.tools.ui.web.utils.Token;
import com.google.dart.tools.ui.web.utils.WhitespaceDetector;
import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.Document;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.IRule;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.MultiLineRule;
import org.eclipse.jface.text.rules.RuleBasedScanner;
import org.eclipse.jface.text.rules.SingleLineRule;
import org.eclipse.jface.text.rules.WhitespaceRule;
import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* Convert xml-ish content into a stream of tokens.
*/
public class Tokenizer implements Iterator<Token> {
private static IDocument createDocument(Reader in) throws IOException {
String content = readContent(in);
return new Document(content);
}
private static String readContent(Reader in) throws IOException {
CharArrayWriter out = new CharArrayWriter();
char[] buffer = new char[4096];
int count = in.read(buffer);
while (count != -1) {
out.write(buffer, 0, count);
count = in.read(buffer);
}
in.close();
return out.toString();
}
private RuleBasedScanner scanner;
private IDocument document;
private Token currentToken;
public Tokenizer(IDocument document, String[] comments) {
this.document = document;
setupScanner(document, comments);
}
public Tokenizer(Reader reader, String[] comments) throws IOException {
this(createDocument(reader), comments);
}
@Override
public boolean hasNext() {
if (currentToken != null) {
return true;
}
if (scanner == null) {
return false;
}
advance();
return currentToken != null;
}
@Override
public Token next() {
if (currentToken != null) {
Token temp = currentToken;
currentToken = null;
return temp;
}
advance();
if (currentToken == null) {
throw new NoSuchElementException();
} else {
return next();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private void advance() {
if (scanner != null && currentToken == null) {
IToken t = scanner.nextToken();
if (t.isEOF()) {
scanner = null;
} else if (t.isWhitespace()) {
advance();
} else {
try {
currentToken = new Token(
document.get(scanner.getTokenOffset(), scanner.getTokenLength()),
scanner.getTokenOffset());
} catch (BadLocationException e) {
throw new RuntimeException(e);
}
}
}
}
private void setupScanner(IDocument document, String[] comments) {
IToken commentToken = new org.eclipse.jface.text.rules.Token(XmlPartitionScanner.XML_COMMENT);
IToken stringToken = new org.eclipse.jface.text.rules.Token("string");
scanner = new RuleBasedScanner();
List<IRule> rules = new ArrayList<IRule>();
rules.add(new MultiLineRule(comments[0], comments[1], commentToken));
rules.add(new MultiLineRule("<?", "?>", commentToken));
rules.add(new SingleLineRule("\"", "\"", stringToken, '\\'));
rules.add(new SingleLineRule("'", "'", stringToken, '\\'));
rules.add(new AnyWordRule(new XmlWordDetector()));
rules.add(new WhitespaceRule(new WhitespaceDetector()));
scanner.setRules(rules.toArray(new IRule[rules.size()]));
scanner.setRange(document, 0, document.getLength());
}
}