/* * Copyright (c) 1998-2011 Caucho Technology -- all rights reserved * * Caucho Technology permits redistribution, modification and use * of this file in source and binary form ("the Software") under the * Caucho Developer Source License ("the License"). The following * conditions must be met: * * 1. Each copy or derived work of the Software must preserve the copyright * notice and this notice unmodified. * * 2. Redistributions of the Software in source or binary form must include * an unmodified copy of the License, normally in a plain ASCII text * * 3. The names "Resin" or "Caucho" are trademarks of Caucho Technology and * may not be used to endorse products derived from this software. * "Resin" or "Caucho" may not appear in the names of products derived * from this software. * * This Software is provided "AS IS," without a warranty of any kind. * ALL EXPRESS OR IMPLIED REPRESENTATIONS AND WARRANTIES, INCLUDING ANY * IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. * * CAUCHO TECHNOLOGY AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES * SUFFERED BY LICENSEE OR ANY THIRD PARTY AS A RESULT OF USING OR * DISTRIBUTING SOFTWARE. IN NO EVENT WILL CAUCHO OR ITS LICENSORS BE LIABLE * FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, * CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND * REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF THE USE OF OR * INABILITY TO USE SOFTWARE, EVEN IF HE HAS BEEN ADVISED OF THE POSSIBILITY * OF SUCH DAMAGES. * * @author Sam */ package com.caucho.doc.javadoc; import com.caucho.log.Log; import com.caucho.util.CharBuffer; import com.caucho.util.L10N; import com.caucho.vfs.IOExceptionWrapper; import com.caucho.vfs.ReadStream; import java.io.IOException; import java.util.logging.Level; import java.util.logging.Logger; /** * Parse a javadoc generated html index file. */ public class IndexParser { static protected final Logger log = Log.open(IndexParser.class); static final L10N L = new L10N(IndexParser.class); static private final int EOF = -1; static private final int EODL = -2; static public final int TYPE_PACKAGE = 0x01; static public final int TYPE_CLASS = 0x02; static public final int TYPE_INTERFACE = 0x04; static public final int TYPE_ENUM = 0x08; static public final int TYPE_ANNOTATION = 0x10; static public final int TYPE_EXCEPTION = 0x20; static public final int TYPE_ERROR = 0x40; static public final int TYPE_CONSTRUCTOR = 0x80; static public final int TYPE_METHOD = 0x100; static public final int TYPE_VARIABLE = 0x200; static public final int MODIFIER_STATIC = 0x1000; private ReadStream _rs = null; private Callback _callback; private int _currLine = 0; private int _lastDTLine = -1; public IndexParser(ReadStream rs, Callback callback) { _rs = rs; _callback = callback; } public void parse() throws IOException { // find <DL> int ch = 0;; while (ch != EOF) { ch = readChar(); if (ch == '<') { ch = readChar(); if (ch == 'D' || ch == 'd') { ch = readChar(); if (ch == 'L' || ch == 'l') { ch = readChar(); // eat '>' ch = parseDL(); } } } } } public interface Callback { public void item(String path, String anchor, String name, String fullname, int typ, int modifier, String description); } /** * A String describing the read source and the current line of parsing. */ public String getLineInfo() { CharBuffer cb = CharBuffer.allocate(); cb.append(_rs.getPath().toString()); cb.append(':'); cb.append(_currLine); return cb.close(); } protected boolean readLine(CharBuffer cb) throws IOException { boolean n = _rs.readLine(cb); if (n) _currLine++; return n; } protected int readChar() throws IOException { int n = _rs.readChar(); if (n == '\n') { _currLine++; } else if (n == '\r') { if (_rs.readChar() != '\n') _rs.unread(); else { _currLine++; n = '\n'; } } return n; } // return EOF or EODL private int parseDL() throws IOException { boolean ignore = true; // ignore the first, it's just junk until the first <DT> int ch = 0; while (ch != EOF && ch != EODL) { ch = readChar(); _lastDTLine = _currLine; ch = parseDT(ignore); ignore = false; } return ch; } // return EOF or EODL or last char read // ignore means just ignore, used to get to first DT private int parseDT(boolean ignore) throws IOException { int r = EOF; // fill dt until there is another <DT> // or there is a </DL> // once it's full, call readDT() CharBuffer dt = ignore ? null : CharBuffer.allocate(); CharBuffer cbb = CharBuffer.allocate(); int ch; while ((ch = readChar()) != -1) { if (ch == '<') { cbb.append((char)ch); ch = readChar(); if (ch == 'D' || ch == 'd') { cbb.append((char)ch); ch = readChar(); if (ch == 'T' || ch == 't') { cbb.clear(); if (!ignore) readDT(dt); break; } } else if (ch == '/') { cbb.append((char)ch); ch = readChar(); if (ch == 'D' || ch == 'd') { cbb.append((char)ch); ch = readChar(); if (ch == 'L' || ch == 'l') { if (!ignore) readDT(dt); ch = EODL; break; } } } if (dt != null) dt.append(cbb); cbb.clear(); } if (dt != null) dt.append((char)ch); } cbb.free(); if (dt != null) dt.free(); return ch; } private void readDT(CharBuffer cb) throws IOException { String parseDescr = ""; // the step being performed CharBuffer t = CharBuffer.allocate(); try { if (log.isLoggable(Level.FINEST)) log.finest(L.l("<DT> entry from line {0} is [[{1}]]",String.valueOf(_lastDTLine),cb.toString())); String path; String anchor = null; String fullname; String name; int typ; int modifier = 0; String description; int i = 0; parseDescr = "parsing href, looking for first \""; i = readToAndEat(cb,i,'\"',null); parseDescr = "parsing href, looking for next \""; i = readToAndEat(cb,i,'\"',t); while (t.startsWith("../")) t.delete(0,3); int ai = t.indexOf('#'); if (ai > -1) { path = t.substring(0,ai); anchor = t.substring(ai + 1); } else { path = t.toString(); } t.clear(); if (log.isLoggable(Level.FINEST)) { log.finest(L.l("path: [{0}]",path)); log.finest(L.l("anchor: [{0}]",anchor)); } parseDescr = "using href to determine fullName"; t.append(path); t.setLength(t.length() - 5); // drop .hmtl for (int ti = t.length() - 1; ti >= 0; ti--) { if (t.charAt(ti) == '/') t.setCharAt(ti,'.'); } if (anchor != null) { t.append('.'); t.append(anchor); } if (t.endsWith(".package-summary")) t.setLength(t.length() - 16); fullname = t.toString(); t.clear(); if (log.isLoggable(Level.FINEST)) { log.finest(L.l("fullname: [{0}]",fullname)); } parseDescr = "parsing name, looking for opening <B>"; i = readToAndEat(cb,i,"<B>",null); parseDescr = "parsing name, looking for closing </B>"; i = readToAndEat(cb,i,"<",t); name = t.toString(); t.clear(); if (log.isLoggable(Level.FINEST)) { log.finest(L.l("name: [{0}]",name)); } parseDescr = "parsing description, `-' marks beginning"; i = readToAndEat(cb,i,'-',null); parseDescr = "parsing description, removing markup"; clean(cb,i); parseDescr = "parsing description"; // < 1.4 has "package ", 1.5 has "Package " if (cb.startsWith("package ")) { typ = TYPE_PACKAGE; } else if (cb.startsWith("Package ")) { typ = TYPE_PACKAGE; } else if (cb.startsWith("class ")) { typ = TYPE_CLASS; } else if (cb.startsWith("Class ")) { typ = TYPE_CLASS; } else if (cb.startsWith("enum ")) { typ = TYPE_ENUM; } else if (cb.startsWith("Enum ")) { typ = TYPE_ENUM; } else if (cb.startsWith("annotation ")) { typ = TYPE_ANNOTATION; } else if (cb.startsWith("Annotation ")) { typ = TYPE_ANNOTATION; } else if (cb.startsWith("interface ")) { typ = TYPE_INTERFACE; } else if (cb.startsWith("Interface ")) { typ = TYPE_INTERFACE; } else if (cb.startsWith("exception ")) { typ = TYPE_EXCEPTION; } else if (cb.startsWith("Exception ")) { typ = TYPE_EXCEPTION; } else if (cb.startsWith("error ")) { typ = TYPE_ERROR; } else if (cb.startsWith("Error ")) { typ = TYPE_ERROR; } else if (cb.startsWith("Constructor")) { typ = TYPE_CONSTRUCTOR; } else if (cb.startsWith("Method")) { typ = TYPE_METHOD; } else if (cb.startsWith("Static method")) { typ = TYPE_METHOD; modifier = MODIFIER_STATIC; } else if (cb.startsWith("Variable")) { typ = TYPE_VARIABLE; } else if (cb.startsWith("Static variable")) { typ = TYPE_VARIABLE; modifier = MODIFIER_STATIC; } else { throw new IndexOutOfBoundsException(L.l("cannot determine type from `{0}'",cb.close())); } if (log.isLoggable(Level.FINEST)) { log.finest(L.l("type: [{0}]",typ)); } parseDescr = "parsing description, remove first sentence"; eatSentence(cb); description = cb.toString(); if (log.isLoggable(Level.FINEST)) log.finest(L.l("description: [{0}]",description)); // do the callback _callback.item(path,anchor,name,fullname,typ,modifier,description); } catch (IndexOutOfBoundsException ex) { String msg = L.l("parsing error {0}: {1}, {2}",parseDescr, ex.getMessage(),getLineInfo()); if (log.isLoggable(Level.FINE)) { log.fine(msg); log.fine(L.l("buffer was [[{0}]]",cb.toString())); } throw new IOExceptionWrapper(msg,ex); } finally { t.free(); } } private int readToAndEat(CharBuffer in, int i, char after, CharBuffer out) { int l = in.length(); char ch; while ( (ch = in.charAt(i)) != after) { if (out != null) out.append(ch); i++; if (i >= l) throw new IndexOutOfBoundsException(L.l("error looking for `{0}'",new Character(after))); } return ++i; } private int readToAndEat(CharBuffer in, int i, String after, CharBuffer out) { int al = after.length(); while (!in.regionMatches(i,after,0,al) ) { if (out != null) out.append(in.charAt(i)); i++; if (i >= in.length()) throw new IndexOutOfBoundsException(L.l("error looking for `{0}'",after)); } return i+al; } private void eatSentence(CharBuffer cb) { log.finest("eat sentence [[" + cb.toString() + "]]"); int cbl = cb.length(); int i = 0; if (cb.startsWith("package ")) { // second " " marks end of first sentence i = cb.indexOf(' ') + 1; if (i < cbl) i = cb.indexOf(' ',i) + 1; if (i <= 0) i = cbl; } else { // ". " marks end of first sentence do { int d = cb.indexOf('.',i); if (d > -1) { i = d + 1; if (i >= cbl || Character.isWhitespace(cb.charAt(i))) break; else { i++; } } else break; } while (i < cbl); } // strip whitespace from beginning while (i < cbl && (Character.isWhitespace(cb.charAt(i)) || cb.charAt(i) == '.')) { i++; } if (i >= cbl) { cb.clear(); } else { cb.delete(0,i); } } /** * remove whitespace or '.' at begining and whitespace at end, fix first * sentence (add .), strip out equivalent of regexp match "<.*>", replace *   with space, replace newlines with space, and merge multiple spaces * into a single space; */ private void clean(CharBuffer cb, int i) { CharBuffer r = CharBuffer.allocate(); for (;;) { i = eatWhitespace(cb,i); if (i < cb.length() && cb.charAt(i) == '.') i++; else break; } boolean lastws = false; // reduce multiple ws to a single space while (i < cb.length()) { char ch = cb.charAt(i); if (ch == '\n') ch = ' '; if (ch == '\r') ch = ' '; if (Character.isWhitespace(ch)) { if (lastws) { i++; continue; } } if (ch == '<') { if (cb.charAt(i+1) == '/' && cb.charAt(i+2) == 'A') r.append(". "); // have to watch for stray < that are not really markup // only something that matches "</?[A-Za-z]>" counts as markup int cn = (i + 1 >= cb.length()) ? -1 : cb.charAt(i+1); if (cn == '/') cn = (i + 2 >= cb.length()) ? cn : cb.charAt(i+2); if ((cn >= 'a' && cn <= 'z') || (cn >= 'A' && cn <= 'Z')) { i = eatUntil(cb,++i,'>'); if (cn == 'D' || cn == 'd') r.append(' '); i++; continue; } } if (cb.regionMatches(i," ",0,6) ) { r.append(' '); i += 5; lastws = true; } else { r.append(ch); lastws = Character.isWhitespace(ch); } i++; } int l = r.length() - 1; while (l > 0 && Character.isWhitespace(r.charAt(l))) { r.setLength(l--); } cb.clear(); cb.append(r); } private int eatWhitespace(CharBuffer cb, int i) { while (i < cb.length() && Character.isWhitespace(cb.charAt(i))) { i++; } return i; } private int eatUntil(CharBuffer cb, int i, char until) { int l = cb.length(); while (cb.charAt(i) != until) { i++; if (i >= l) throw new IndexOutOfBoundsException(L.l("error looking for `{0}'",new Character(until))); } return i; } }