/*
* Tokenizer.java
*
* Copyright (C) 2009-12 by RStudio, Inc.
*
* Unless you have received this program directly from RStudio pursuant
* to the terms of a commercial license agreement with RStudio, then
* this program is licensed to you under the terms of version 3 of the
* GNU Affero General Public License. This program is distributed WITHOUT
* ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
* AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
*
*/
package org.rstudio.studio.client.workbench.views.source.editors.text.ace;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import com.google.gwt.core.client.JavaScriptObject;
public class Tokenizer extends JavaScriptObject
{
protected Tokenizer()
{
}
public static final native Tokenizer createRTokenizer() /*-{
var Tokenizer = $wnd.require("ace/tokenizer").Tokenizer;
var RHighlightRules = $wnd.require("mode/r_highlight_rules").RHighlightRules;
return new Tokenizer(new RHighlightRules().getRules());
}-*/;
// This is a wrapper function that takes a line (that may have
// new lines) and properly tokenizes it as a flat token array, which
// makes it very easy to iterate and munge. This will work for any
// generic tokenizer (not just R)
private final native Token[] doTokenize(String line) /*-{
var currentToken;
var tokens = [];
var state = "start";
var splat = line.split("\n");
// Add the first line.
var tokenizedLine = this.getLineTokens(splat[0], state);
tokens = tokenizedLine.tokens;
state = tokenizedLine.state;
// If there were no tokens on this line, add a newline token.
if (tokens.length === 0)
{
tokens.push({
type: "text",
value: "\n"
});
}
// Cache the final token. We may need to munge it.
var lastToken = tokens[tokens.length - 1];
// Iterate through the rest of the lines.
for (var i = 1; i < splat.length; i++) {
var tokenizedLine = this.getLineTokens(splat[i], state);
var lineTokens = tokenizedLine.tokens;
var n = lineTokens.length;
state = tokenizedLine.state;
// We may skip the first token if it is text.
var start = 0;
// If there are no tokens on this line...
if (n === 0)
{
// ... and the last token of the previous line
// was whitespace, then add a newline to it.
if (lastToken.type === "text")
{
lastToken.value += "\n";
continue;
}
// ... otherwise, add a newline token and set
// it as the 'lastToken', implicitly adding a
// newline to the previous line.
else
{
tokens.push({
type: "text",
value: "\n"
});
lastToken = tokens[tokens.length - 1];
continue;
}
}
// If the last token on the previous line was 'text'...
if (lastToken.type === "text")
{
// ... and the first token on this line is text too,
// then merge them
if (lineTokens[0].type === "text")
{
start++;
lastToken.value += "\n";
lastToken.value += lineTokens[0].value;
}
// ... otherwise, just append a newline to the last token.
else
{
lastToken.value += "\n";
}
}
// ... otherwise, if the last token on the previous line was not 'text'...
else
{
// ... and the first token on this line was text,
// prepend a newline to it -- effectively merging a
// single 'newline' whitespace token into that token.
if (lineTokens[0].type === "text")
{
lineTokens[0].value = "\n" + lineTokens[0].value;
}
// ... otherwise, insert a newline text token before
// appending the tokens from this line -- this adds
// a single whitespace token, separating two non-whitespace
// tokens.
else
{
tokens.push({
type: "text",
value: "\n"
});
lastToken = tokens[tokens.length - 1];
}
}
// Push back the rest of the tokens.
var n = lineTokens.length;
for (var j = start; j < n; j++)
tokens.push(lineTokens[j]);
// Update the last token.
lastToken = tokens[tokens.length - 1];
}
return tokens;
}-*/;
public final List<Token> tokenize(String line)
{
return new ArrayList<Token>(Arrays.asList(doTokenize(line)));
}
}