package gutenberg.pygments;
import com.google.common.base.Optional;
import org.python.core.PyObject;
import org.python.core.PyString;
import org.python.util.PythonInterpreter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author <a href="http://twitter.com/aloyer">@aloyer</a>
*/
public class Pygments {
private Logger log = LoggerFactory.getLogger(Pygments.class);
private final PyGateway gateway;
private final Lexers lexers;
public Pygments() {
this(PyGateway.getInstance(), Lexers.getInstance());
}
public Pygments(PyGateway gateway, Lexers lexers) {
this.lexers = lexers;
this.gateway = gateway;
}
public Tokens tokenize(String language, String code) {
Optional<Object> lexer = lexers.lookupLexer(gateway, language);
if (!lexer.isPresent())
return new Tokens().append(Token.Text, code);
return process(lexer.get(), code);
}
private Tokens process(Object lexer, String code) {
PythonInterpreter interpreter = gateway.getInterpreter();
Tokens tokens = new Tokens();
interpreter.set("code", code);
interpreter.set("lexer", lexer);
interpreter.set("out", new RFormatter(tokens));
// Simple use Pygments as you would in Python
interpreter.exec(""
+ "from pygments import highlight\n"
+ "from pygments.formatter import Formatter\n"
+ "\n"
+ "class ForwardFormatter(Formatter):\n"
+ " def format(self, tokensource, outfile):\n"
+ " for ttype, value in tokensource:\n"
+ " out.write(ttype, value)\n"
+ "\n"
+ "result = highlight(code, lexer, ForwardFormatter())");
return tokens;
}
public class RFormatter extends PyObject {
private final Tokens tokens;
public RFormatter(Tokens tokens) {
this.tokens = tokens;
}
public void write(PyObject ttype, PyString value) {
// TODO find a better way to get the token name than relying on the 'toString'...
String ttypeStr = ttype.__repr__().getString();
String stringRepr = ttypeStr.replace(".", "");
Optional<Token> tokenOpt = Token.findTokenByRepr(stringRepr);
Token token = tokenOpt.or(Token.Text);
if (!tokenOpt.isPresent()) {
log.warn("Unrecognized Pygments token {}, fallbacking to Text one", ttypeStr);
}
String valueString = value.getString();
log.debug("Token {} ({}): '{}'", token, ttypeStr, valueString.replace("\n", "\\n"));
tokens.append(token, valueString);
}
}
}