/**
* Copyright 2011-2012 Akiban Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.persistit.exception.PersistitException;
public class AsciiDocIndex {
private final static String NOISY_STRINGS[] = { "\r", "\n", "<B>", "</B>", "<b>", "</b>", "<CODE>", "</CODE>",
"<code>", "</code>", "<TT>", "</TT>", "<tt>", "</tt>", "<FONT>", "</FONT>", "<font>", "</font>" };
// Charset and decoder for ISO-8859-15
private final static Charset charset = Charset.forName("ISO-8859-15");
private final static CharsetDecoder decoder = charset.newDecoder();
//
// Regex Pattern to pull various attributes and fields out of the anchor
// tags in Javadoc index-NN.html files.
//
private final static Pattern PATTERN = Pattern.compile(
"(<a href=\"(\\./com/persistit.*?)\" *(title=\"(.*?)\")?.*?>(.*?)</a>)", Pattern.CASE_INSENSITIVE);
private int _count;
final SortedMap<String, String> classMap = new TreeMap<String, String>();
final SortedMap<String, String> methodMap = new TreeMap<String, String>();
/**
* Builds a JDocSearch index from the specified Javadoc file or directory.
* If the supplied <tt>File</tt> object is a file, then read and index the
* content of that one file. If it is a directory, read the files in that
* directory and index them.
*
* @param file
*
* @return The count of indexable terms in the file or directory
*
* @throws IOException
*
* @throws PersistitException
*/
public int buildIndex(final String pathName, String base) throws IOException {
File file = new File(pathName);
// The index generated by the standard Javadoc Doclet is either
// at the root of the api tree, in a file called index-all.html, or
// in a subdirectory called index-files. This code tries each case.
//
if (file.exists() && file.isDirectory() && !file.getPath().endsWith("index-files")) {
final File indexAll = new File(file, "index-all.html");
final File indexDir = new File(file, "index-files");
if (indexAll.exists() && !indexAll.isDirectory()) {
file = indexAll;
} else if (indexDir.exists() && indexDir.isDirectory()) {
file = indexDir;
}
}
if (file.exists()) {
if (base == null) {
base = file.getParent();
}
if (file.isDirectory()) {
indexOneDirectory(file, base);
} else {
indexOneFile(file, base);
}
} else {
throw new IllegalArgumentException("Requires the name of a Javadoc API index file, "
+ "or of a directory containing Javadoc API index files.");
}
return _count;
}
public void indexOneDirectory(final File indexDir, final String base) throws IOException {
final File[] indexFiles = indexDir.listFiles();
for (int i = 0; i < indexFiles.length; i++) {
indexOneFile(indexFiles[i], base);
}
}
public void indexOneFile(final File file, final String base) throws IOException {
FileChannel fc = null;
try {
System.out.println("Indexing " + file);
fc = new FileInputStream(file).getChannel();
final MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
final CharBuffer cb = decoder.decode(bb);
final Matcher matcher = PATTERN.matcher(cb);
while (matcher.find()) {
final String wholeTag = matcher.group(1);
final String href = matcher.group(2);
final String url = base + "/" + fixDotSlash(href);
final String title = matcher.group(4);
final String text = matcher.group(5);
indexOneTerm(wholeTag, href, url, title, text);
}
} catch (final IOException e) {
System.err.println();
e.printStackTrace();
throw e;
} finally {
if (fc != null) {
fc.close();
}
}
}
private void indexOneTerm(final String wholeTag, String href, final String url, final String title, String text) {
text = cleanupNoise(text);
href = fixDotSlash(href);
final int pHtml = href.lastIndexOf(".html");
if (pHtml == -1) {
return;
}
final int pPackageSummary = href.indexOf("/package-summary");
if (pPackageSummary > 0) {
//
// Enumerate the package name segments
// The HREF starts with "./", which is chopped off here.
//
final String packageName = href.substring(0, pPackageSummary).replace('/', '.');
int q = -1;
while (q < packageName.length()) {
final int p = q + 1;
q = packageName.indexOf('.', p);
if (q < 0)
q = packageName.length();
final String term = packageName.substring(p, q);
saveTerm("Package", term, url);
}
return;
}
if (href.startsWith("com/") || href.startsWith("java/") || href.startsWith("javax/") || href.startsWith("org/")
|| href.startsWith("COM/") || href.startsWith("ORG/")) {
final int pHash = href.indexOf('#');
if (pHash == -1) {
// This is a class or interface name
String category = "Class";
if (title.startsWith("interface"))
category = "Interface";
final int pSlash = href.lastIndexOf('/', pHtml - 1);
// String className = href.substring(pSlash + 1, pHtml);
final String className = href.substring(0, pHtml).replace('/', '.');
saveTerm(category, className, url);
return;
}
else {
final String className = href.substring(0, pHtml).replace('/', '.');
final String name = href.substring(pHash + 1);
final int pLeftParen = name.indexOf('(');
if (pLeftParen == -1) {
//
// This is a field or a constant. We'll call it a constant
// if
// it is spelled in upper case.
//
final String uCaseName = name.toUpperCase();
final String category = name.equals(uCaseName) ? "Constant" : "Field";
final String displayText = name + " in " + className;
saveTerm(category, name, url);
return;
} else {
//
// This is a method name. We will index it as a method,
// and then if it conforms to the pattern for property
// set/get methods, we'll also index the property name.
//
final int pRightParen = name.indexOf(')', pLeftParen);
if (pRightParen == -1) {
System.out.println("Missing right paren");
System.out.println(wholeTag);
return;
}
final String paramList = name.substring(pLeftParen + 1, pRightParen).trim();
// String term = name.substring(0, pLeftParen);
String term = (href.substring(0, pHtml) + href.substring(pHtml + 5)).replace('/', '.');
saveTerm("Method", term, url);
final String displayText = name + " in " + className;
if (name.startsWith("get") && paramList.length() == 0 || name.startsWith("is")
&& paramList.length() == 0 || name.startsWith("set") && paramList.length() > 0
&& paramList.indexOf(',') == -1) {
term = term.substring(name.startsWith("is") ? 2 : 3);
saveTerm("Property", term, url);
}
return;
}
}
}
}
private void saveTerm(final String type, final String term, final String url) {
if (term.contains("#end")) {
System.out.println(term);
}
if ("Method".equals(type)) {
methodMap.put(term, url);
}
if ("Class".equals(type) || "Interface".equals(type)) {
classMap.put(term, url);
}
}
private String fixDotSlash(final String url) {
if (url.startsWith("./"))
return url.substring(2);
else
return url;
}
private String cleanupNoise(final String term) {
boolean changed = false;
final StringBuffer sb = new StringBuffer(term);
for (int i = 0; i < NOISY_STRINGS.length; i++) {
final String tag = NOISY_STRINGS[i];
for (int p; (p = sb.indexOf(tag)) >= 0;) {
sb.delete(p, p + tag.length());
changed = true;
}
}
return changed ? sb.toString() : term;
}
public void index(final String javaDocPathname) throws Exception {
final String base = "http://www.akiban.com/documentation/apidocs";
buildIndex(javaDocPathname, base);
}
public SortedMap<String, String> getClassMap() {
return classMap;
}
public SortedMap<String, String> getMethodMap() {
return methodMap;
}
}