/** * Copyright 2011-2012 Akiban Technologies, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.CharBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.SortedMap; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.persistit.exception.PersistitException; public class AsciiDocIndex { private final static String NOISY_STRINGS[] = { "\r", "\n", "<B>", "</B>", "<b>", "</b>", "<CODE>", "</CODE>", "<code>", "</code>", "<TT>", "</TT>", "<tt>", "</tt>", "<FONT>", "</FONT>", "<font>", "</font>" }; // Charset and decoder for ISO-8859-15 private final static Charset charset = Charset.forName("ISO-8859-15"); private final static CharsetDecoder decoder = charset.newDecoder(); // // Regex Pattern to pull various attributes and fields out of the anchor // tags in Javadoc index-NN.html files. // private final static Pattern PATTERN = Pattern.compile( "(<a href=\"(\\./com/persistit.*?)\" *(title=\"(.*?)\")?.*?>(.*?)</a>)", Pattern.CASE_INSENSITIVE); private int _count; final SortedMap<String, String> classMap = new TreeMap<String, String>(); final SortedMap<String, String> methodMap = new TreeMap<String, String>(); /** * Builds a JDocSearch index from the specified Javadoc file or directory. * If the supplied <tt>File</tt> object is a file, then read and index the * content of that one file. If it is a directory, read the files in that * directory and index them. * * @param file * * @return The count of indexable terms in the file or directory * * @throws IOException * * @throws PersistitException */ public int buildIndex(final String pathName, String base) throws IOException { File file = new File(pathName); // The index generated by the standard Javadoc Doclet is either // at the root of the api tree, in a file called index-all.html, or // in a subdirectory called index-files. This code tries each case. // if (file.exists() && file.isDirectory() && !file.getPath().endsWith("index-files")) { final File indexAll = new File(file, "index-all.html"); final File indexDir = new File(file, "index-files"); if (indexAll.exists() && !indexAll.isDirectory()) { file = indexAll; } else if (indexDir.exists() && indexDir.isDirectory()) { file = indexDir; } } if (file.exists()) { if (base == null) { base = file.getParent(); } if (file.isDirectory()) { indexOneDirectory(file, base); } else { indexOneFile(file, base); } } else { throw new IllegalArgumentException("Requires the name of a Javadoc API index file, " + "or of a directory containing Javadoc API index files."); } return _count; } public void indexOneDirectory(final File indexDir, final String base) throws IOException { final File[] indexFiles = indexDir.listFiles(); for (int i = 0; i < indexFiles.length; i++) { indexOneFile(indexFiles[i], base); } } public void indexOneFile(final File file, final String base) throws IOException { FileChannel fc = null; try { System.out.println("Indexing " + file); fc = new FileInputStream(file).getChannel(); final MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); final CharBuffer cb = decoder.decode(bb); final Matcher matcher = PATTERN.matcher(cb); while (matcher.find()) { final String wholeTag = matcher.group(1); final String href = matcher.group(2); final String url = base + "/" + fixDotSlash(href); final String title = matcher.group(4); final String text = matcher.group(5); indexOneTerm(wholeTag, href, url, title, text); } } catch (final IOException e) { System.err.println(); e.printStackTrace(); throw e; } finally { if (fc != null) { fc.close(); } } } private void indexOneTerm(final String wholeTag, String href, final String url, final String title, String text) { text = cleanupNoise(text); href = fixDotSlash(href); final int pHtml = href.lastIndexOf(".html"); if (pHtml == -1) { return; } final int pPackageSummary = href.indexOf("/package-summary"); if (pPackageSummary > 0) { // // Enumerate the package name segments // The HREF starts with "./", which is chopped off here. // final String packageName = href.substring(0, pPackageSummary).replace('/', '.'); int q = -1; while (q < packageName.length()) { final int p = q + 1; q = packageName.indexOf('.', p); if (q < 0) q = packageName.length(); final String term = packageName.substring(p, q); saveTerm("Package", term, url); } return; } if (href.startsWith("com/") || href.startsWith("java/") || href.startsWith("javax/") || href.startsWith("org/") || href.startsWith("COM/") || href.startsWith("ORG/")) { final int pHash = href.indexOf('#'); if (pHash == -1) { // This is a class or interface name String category = "Class"; if (title.startsWith("interface")) category = "Interface"; final int pSlash = href.lastIndexOf('/', pHtml - 1); // String className = href.substring(pSlash + 1, pHtml); final String className = href.substring(0, pHtml).replace('/', '.'); saveTerm(category, className, url); return; } else { final String className = href.substring(0, pHtml).replace('/', '.'); final String name = href.substring(pHash + 1); final int pLeftParen = name.indexOf('('); if (pLeftParen == -1) { // // This is a field or a constant. We'll call it a constant // if // it is spelled in upper case. // final String uCaseName = name.toUpperCase(); final String category = name.equals(uCaseName) ? "Constant" : "Field"; final String displayText = name + " in " + className; saveTerm(category, name, url); return; } else { // // This is a method name. We will index it as a method, // and then if it conforms to the pattern for property // set/get methods, we'll also index the property name. // final int pRightParen = name.indexOf(')', pLeftParen); if (pRightParen == -1) { System.out.println("Missing right paren"); System.out.println(wholeTag); return; } final String paramList = name.substring(pLeftParen + 1, pRightParen).trim(); // String term = name.substring(0, pLeftParen); String term = (href.substring(0, pHtml) + href.substring(pHtml + 5)).replace('/', '.'); saveTerm("Method", term, url); final String displayText = name + " in " + className; if (name.startsWith("get") && paramList.length() == 0 || name.startsWith("is") && paramList.length() == 0 || name.startsWith("set") && paramList.length() > 0 && paramList.indexOf(',') == -1) { term = term.substring(name.startsWith("is") ? 2 : 3); saveTerm("Property", term, url); } return; } } } } private void saveTerm(final String type, final String term, final String url) { if (term.contains("#end")) { System.out.println(term); } if ("Method".equals(type)) { methodMap.put(term, url); } if ("Class".equals(type) || "Interface".equals(type)) { classMap.put(term, url); } } private String fixDotSlash(final String url) { if (url.startsWith("./")) return url.substring(2); else return url; } private String cleanupNoise(final String term) { boolean changed = false; final StringBuffer sb = new StringBuffer(term); for (int i = 0; i < NOISY_STRINGS.length; i++) { final String tag = NOISY_STRINGS[i]; for (int p; (p = sb.indexOf(tag)) >= 0;) { sb.delete(p, p + tag.length()); changed = true; } } return changed ? sb.toString() : term; } public void index(final String javaDocPathname) throws Exception { final String base = "http://www.akiban.com/documentation/apidocs"; buildIndex(javaDocPathname, base); } public SortedMap<String, String> getClassMap() { return classMap; } public SortedMap<String, String> getMethodMap() { return methodMap; } }