/* * $Id: Java2Html.java 1238 2008-11-03 12:37:53Z amandel $ * * Copyright 2006, The jCoderZ.org Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * Neither the name of the jCoderZ.org Project nor the names of * its contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.jcoderz.phoenix.report; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.io.Reader; import java.io.StringReader; import java.nio.charset.Charset; import java.util.logging.Logger; import javax.swing.text.Segment; import org.gjt.sp.jedit.Mode; import org.gjt.sp.jedit.syntax.DefaultTokenHandler; import org.gjt.sp.jedit.syntax.ModeProvider; import org.gjt.sp.jedit.syntax.ParserRuleSet; import org.gjt.sp.jedit.syntax.Token; import org.gjt.sp.jedit.syntax.TokenMarker; import org.gjt.sp.jedit.syntax.TokenMarker.LineContext; import org.jcoderz.commons.util.Assert; import org.jcoderz.commons.util.IoUtil; /** * Splits an input file into several tokens suitable for syntax * highlighting. * This class encapsulates the access to the jEdit syntax * highlighter package. No jEdit related classes should be * passed by interfaces of this class. * * @author Andreas Mandel */ public class Syntax { private static final int MAX_RATIO_ILLEGAL_CHARACTERS = 10; private static final int MAX_AVERAGE_LINE_LENGTH = 200; private static final int BINARY_TEST_PROBE_CHARACTERS = 1024; private static final String CLASSNAME = Syntax.class.getName(); private static final Logger LOGGER = Logger.getLogger(CLASSNAME); private final Charset mSourceCharset; private final int mTabWidth; private final char[] mFileContent; private int mFileContentPos; // CHECKME: ate tabs in the token counted to the length? private Token mToken = emptyToken(); private int mCurrentLineNumber; private int mCurrentLinePos; private Segment mCurrentLine; private int mNumberOfLines; private final TokenMarker mTokenMarker; private final DefaultTokenHandler mTokenHandler = new DefaultTokenHandler(); private LineContext mLineContext = null; /** First line of the file. */ private String mFirstLine; static { SyntaxModeCatalogHandler.loadModes(); } /** * Initializes an Ascii2Html instance. * * @param in the source file to read. * @param charSet the char set to use when reading the source file. * If null the platform default char set will be used. * @param tabWidth the tab width to use when calculating the cursor * position. * @throws IOException if a error occurs while reading the source file. */ public Syntax (File in, Charset charSet, int tabWidth) throws IOException { Assert.notNull(in, "in"); mSourceCharset = charSet == null ? Charset.defaultCharset() : charSet; mTabWidth = tabWidth; mFileContent = readFile(in).toCharArray(); mFileContentPos = 0; mCurrentLineNumber = 0; mCurrentLine = null; final Mode mode = ModeProvider.instance.getModeForFile(in.getName(), mFirstLine); if (mode == null) { if (isBinary(in.getAbsolutePath(), mFileContent)) { throw new RuntimeException("No html view for binary file '" + in.getAbsolutePath() + "'."); } LOGGER.fine("Could not find mode file for '" + in.getName() + "'. Is the jedit-syntax.jar on the classpath?"); mTokenMarker = new TokenMarker(); mTokenMarker.addRuleSet(new ParserRuleSet("text", "MAIN")); } else { mTokenMarker = mode.getTokenMarker(); } } /** * Returns the number of lines of the parsed file. * The value is available after creation of the class. * @return the number of lines of the parsed file. */ public int getNumberOfLines () { return mNumberOfLines; } /** * The line number of the currently parsed token. * Counting starts with line 1. Nevertheless prior the first call * to {@link #nextToken()} 0 is returned. * @return the line number of the currently parsed token. */ public int getCurrentLineNumber () { return mCurrentLineNumber; } /** * Returns the cursor position of start of the current token. * @return the cursor position of start of the current token. */ public int getCurrentLinePos () { return mCurrentLinePos; } /** * Returns the current token type as string. * To be used as symbolic identifier of the token. Possible * return values can be fount in {@link Token#tokenToString(byte)}. * For the {@link Token#END} null is returned. * @return the current token type as string. */ public String getCurrentTokenType () { final String result; if (mToken.id == Token.END) { result = null; } else { result = Token.tokenToString(mToken.id); } return result; } /** * Returns the length of the current reported token. * @return the length of the current reported token. */ public int getCurrentTokenLength () { return mToken.length; } /** * Parses the next token and returns its textual content as string. * @return the textual content of the new token. */ public String nextToken () { if (mCurrentLine == null || mToken.id == Token.END) { nextLine(); } else { mCurrentLinePos += mToken.length; mToken = mToken.next; } final String result; if (mCurrentLine.count == 0) { mToken = emptyToken(); result = ""; } else { if (mToken != null) { result = new String(mFileContent, mCurrentLine.offset + mToken.offset, mToken.length); } else { result = ""; mToken = emptyToken(); } } return result; } /** * Forward to next line. Takes care for different line ending styles. * Parsing for next line is started. */ private void nextLine () { if (mFileContentPos > mFileContent.length) { mCurrentLine = null; // END OF FILE mCurrentLineNumber = mNumberOfLines + 1; } else { int pos = mFileContentPos; while (pos < mFileContent.length && mFileContent[pos] != '\n' && mFileContent[pos] != '\r') { pos++; } final int currentLineEnd = pos; if (pos < mFileContent.length && (mFileContent[pos] == '\n' || mFileContent[pos] == '\r')) { pos++; } if (pos < mFileContent.length && mFileContent[pos - 1] != mFileContent[pos] && (mFileContent[pos] == '\n' || mFileContent[pos] == '\r')) { pos++; } mCurrentLine = new Segment(mFileContent, mFileContentPos, currentLineEnd - mFileContentPos); mCurrentLineNumber++; mFileContentPos = pos; mCurrentLinePos = 1; if (mCurrentLine.count > 0) { mTokenHandler.init(); mLineContext = mTokenMarker.markTokens( mLineContext, mTokenHandler, mCurrentLine); mToken = mTokenHandler.getTokens(); } else { mToken = emptyToken(); } } } private String readFile (File in) throws IOException { String result = ""; final FileInputStream fis = new FileInputStream(in); Reader reader = null; LineNumberReader lnr = null; try { reader = new InputStreamReader(fis, mSourceCharset); lnr = new LineNumberReader(reader); result = IoUtil.readFully(lnr); mNumberOfLines = lnr.getLineNumber(); mFirstLine = new BufferedReader(new StringReader(result)).readLine(); } finally { IoUtil.close(lnr); IoUtil.close(reader); IoUtil.close(fis); } return result; } private static Token emptyToken () { return new Token(Token.END, 0, 0, null); } static boolean isBinary (String name, char[] fileContent) { int newLines = 0; int chars = 0; int illegal = 0; int i; for (i = 0; i < fileContent.length && i < BINARY_TEST_PROBE_CHARACTERS; i++) { final char c = fileContent[i]; if (c == '\n' || c == '\r') { newLines++; } else if (Character.isWhitespace(c)) { chars++; } else if (Character.isISOControl(c)) { illegal++; } else if (Character.isDefined(c)) { chars++; } else { illegal++; } } boolean result = false; // assume a text file // less than a new line per 200 characters if (((newLines + 1) * MAX_AVERAGE_LINE_LENGTH) < i) { result = true; } // to many 'illegal' chars else if (illegal * MAX_RATIO_ILLEGAL_CHARACTERS > chars) { result = true; } LOGGER.finest("For file " + name + " tested " + i + " chars with " + newLines + " newlines, " + chars + " legal chars, " + illegal + " illegal chars. -> " + (result ? "isBinary" : "isNotBinary")); return result; } }