/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package ca.weblite.netbeans.mirah.cc; /* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved. * * Oracle and Java are registered trademarks of Oracle and/or its affiliates. * Other names may be trademarks of their respective owners. * * The contents of this file are subject to the terms of either the GNU * General Public License Version 2 only ("GPL") or the Common * Development and Distribution License("CDDL") (collectively, the * "License"). You may not use this file except in compliance with the * License. You can obtain a copy of the License at * http://www.netbeans.org/cddl-gplv2.html * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the * specific language governing permissions and limitations under the * License. When distributing the software, include this License Header * Notice in each file and include the License file at * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the GPL Version 2 section of the License file that * accompanied this code. If applicable, add the following below the * License Header, with the fields enclosed by brackets [] replaced by * your own identifying information: * "Portions Copyrighted [year] [name of copyright owner]" * * Contributor(s): * * The Original Software is NetBeans. The Initial Developer of the Original * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun * Microsystems, Inc. All Rights Reserved. * * If you wish your version of this file to be governed by only the CDDL * or only the GPL Version 2, indicate your decision by adding * "[Contributor] elects to include this software in this distribution * under the [CDDL or GPL Version 2] license." If you do not indicate a * single choice of license, a recipient has the option to distribute * your version of this file under either the CDDL, the GPL Version 2 or * to extend the choice of license to its licensees as provided above. * However, if you add GPL Version 2 code and therefore, elected the GPL * Version 2 license, then the option applies only if the new code is * made subject to such option by the copyright holder. */ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.net.URLDecoder; import java.util.Arrays; import java.util.Locale; import java.util.StringTokenizer; import javax.swing.text.ChangedCharSetException; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; /** * HTML Parser. It retrieves sections of the javadoc HTML file. * * @author Martin Roskanin */ final class HTMLJavadocParser { public HTMLJavadocParser() { super(); } /** Gets the javadoc text from the given URL * @param url nbfs protocol URL * @param pkg true if URL should be retrieved for a package */ public static String getJavadocText(URL url, boolean pkg, boolean isGDK) { if (url == null) return null; HTMLEditorKit.Parser parser; InputStream is = null; String charset = null; for (;;) { try{ is = url.openStream(); parser = new ParserDelegator(); String urlStr = URLDecoder.decode(url.toString(), "UTF-8"); //NOI18N int offsets[] = new int[2]; Reader reader = charset == null?new InputStreamReader(is): new InputStreamReader(is, charset); if (pkg){ // package description offsets = parsePackage(reader, parser, charset != null); }else if (urlStr.indexOf('#')>0){ // member javadoc info String memberName = urlStr.substring(urlStr.indexOf('#')+1); if (memberName.length()>0) offsets = parseMember(reader, memberName, parser, charset != null, isGDK); }else{ // class javadoc info offsets = parseClass(reader, parser, charset != null); } if (offsets !=null && offsets[0]!=-1 && offsets[1]>offsets[0]){ return getTextFromURLStream(url, offsets[0], offsets[1], charset); } break; } catch (ChangedCharSetException e) { if (charset == null) { charset = getCharSet(e); //restart with valid charset } else { e.printStackTrace(); break; } } catch(IOException ioe){ ioe.printStackTrace(); break; }finally{ // Findbugs removed: parser = null; if (is!=null) { try{ is.close(); }catch(IOException ioe){ ioe.printStackTrace(); } } } } return null; } private static String getCharSet(ChangedCharSetException e) { String spec = e.getCharSetSpec(); if (e.keyEqualsCharSet()) { //charsetspec contains only charset return spec; } //charsetspec is in form "text/html; charset=UTF-8" int index = spec.indexOf(";"); // NOI18N if (index != -1) { spec = spec.substring(index + 1); } spec = spec.toLowerCase(Locale.ENGLISH); StringTokenizer st = new StringTokenizer(spec, " \t=", true); //NOI18N boolean foundCharSet = false; boolean foundEquals = false; while (st.hasMoreTokens()) { String token = st.nextToken(); if (token.equals(" ") || token.equals("\t")) { //NOI18N continue; } if (foundCharSet == false && foundEquals == false && token.equals("charset")) { //NOI18N foundCharSet = true; continue; } else if (foundEquals == false && token.equals("=")) {//NOI18N foundEquals = true; continue; } else if (foundEquals == true && foundCharSet == true) { return token; } foundCharSet = false; foundEquals = false; } return null; } private static String getTextFromURLStream(URL url, int startOffset, int endOffset, String charset) throws IOException{ if (url == null) return null; if (startOffset > endOffset) { throw new IOException(); } InputStream fis = url.openStream(); InputStreamReader fisreader = charset == null ? new InputStreamReader(fis) : new InputStreamReader(fis, charset); char buffer[]; try { int len = endOffset - startOffset; int bytesAlreadyRead = 0; buffer = new char[len]; int bytesToSkip = startOffset; long bytesSkipped = 0; do { bytesSkipped = fisreader.skip(bytesToSkip); bytesToSkip -= bytesSkipped; } while ((bytesToSkip > 0) && (bytesSkipped > 0)); do { int count = fisreader.read(buffer, bytesAlreadyRead, len - bytesAlreadyRead); if (count < 0) { break; } bytesAlreadyRead += count; } while (bytesAlreadyRead < len); } finally { fisreader.close(); } return new String(buffer); } /** Retrieves the position (start offset and end offset) of class javadoc info * in the raw html file */ private static int[] parseClass(Reader reader, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException { final int INIT = 0; // javadoc HTML comment '======== START OF CLASS DATA ========' final int CLASS_DATA_START = 1; // start of the text we need. Located just after first P. final int TEXT_START = 2; final int state[] = new int[1]; final int offset[] = new int[2]; offset[0] = -1; //start offset offset[1] = -1; //end offset state[0] = INIT; HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() { int nextHRPos = -1; int lastHRPos = -1; @Override public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.HR){ if (state[0] == TEXT_START){ nextHRPos = pos; } lastHRPos = pos; } } @Override public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.P && state[0] == CLASS_DATA_START){ state[0] = TEXT_START; } if (t == HTML.Tag.A && state[0] == TEXT_START) { String attrName = (String)a.getAttribute(HTML.Attribute.NAME); if (attrName!=null && attrName.length()>0){ if (nextHRPos!=-1){ offset[1] = nextHRPos; }else{ offset[1] = pos; } state[0] = INIT; } } } @Override public void handleComment(char[] data, int pos){ String comment = String.valueOf(data); if (comment!=null){ if (comment.indexOf("START OF CLASS DATA")>0){ //NOI18N state[0] = CLASS_DATA_START; } else if (comment.indexOf("NESTED CLASS SUMMARY")>0){ //NOI18N if (lastHRPos!=-1){ offset[1] = lastHRPos; }else{ offset[1] = pos; } } } } @Override public void handleText(char[] data, int pos) { if (state[0] == TEXT_START && offset[0] < 0) offset[0] = pos; } }; parser.parse(reader, callback, ignoreCharset); // Findbugs-Removed: callback = null; return offset; } /** Retrieves the position (start offset and end offset) of member javadoc info * in the raw html file */ private static int[] parseMember(Reader reader, final String name, final HTMLEditorKit.Parser parser, boolean ignoreCharset, final boolean isGDK) throws IOException { final int INIT = 0; // 'A' tag with the name we are looking for. final int A_OPEN = 1; // close tag of 'A' final int A_CLOSE = 2; // PRE close tag after the A_CLOSE final int PRE_CLOSE = 3; final int state[] = new int[1]; final int offset[] = new int[2]; offset[0] = -1; //start offset offset[1] = -1; //end offset state[0] = INIT; HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() { int hrPos = -1; String methodName(String signature){ if(signature == null){ return "<NULL>"; } int idx = signature.indexOf("("); if(idx != -1) { return signature.substring(0,idx); } else { return signature; } } int countParameters(String signature){ int openSign = signature.indexOf("("); int closeSign = signature.indexOf(")"); if(openSign == -1 || closeSign == -1 || (closeSign <= openSign)){ return -1; } // "()" means no parameter if((closeSign - openSign) == 1){ return 0; } else { // count the number of commas: String paramList = signature.substring(openSign + 1, closeSign); int num = 0; int idx = 0; while((idx = paramList.indexOf(",", idx)) != -1 ){ idx++; num++; } return num + 1; } } boolean checkSignatureLink(String signature, String attrName) { // There's a difference in JavaDoc Link format. GDK comes with variable names: // JDK: String.html#endsWith(java.lang.String) // GDK: String.html#center(java.lang.Number%20numberOfChars,%20java.lang.String%20padding) if(signature == null && attrName == null){ return false; } if (signature.equals(attrName)) { return true; } return false; } @Override public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.HR && state[0]!=INIT){ if (state[0] == PRE_CLOSE){ hrPos = pos; } } } @Override public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.A) { String attrName = (String)a.getAttribute(HTML.Attribute.NAME); if (checkSignatureLink(name, attrName)){ // we have found desired javadoc member info anchor state[0] = A_OPEN; offset[0] = pos; } else { if (state[0] == PRE_CLOSE && attrName!=null){ // reach the end of retrieved javadoc info state[0] = INIT; offset[1] = (hrPos!=-1) ? hrPos : pos; } } } else if ((t == HTML.Tag.DIV || t == HTML.Tag.DD) && state[0] == PRE_CLOSE && offset[0] < 0){ offset[0] = pos; } } @Override public void handleEndTag(HTML.Tag t, int pos){ if (t == HTML.Tag.A && state[0] == A_OPEN){ state[0] = A_CLOSE; } else if (t == HTML.Tag.PRE && state[0] == A_CLOSE){ state[0] = PRE_CLOSE; } } }; parser.parse(reader, callback, ignoreCharset); // Findbugs-Removed: callback = null; return offset; } /** Retrieves the position (start offset and end offset) of member javadoc info * in the raw html file */ private static int[] parsePackage(Reader reader, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException { final String name = "package_description"; //NOI18N final int INIT = 0; // 'A' tag with the name we are looking for. final int A_OPEN = 1; final int state[] = new int[1]; final int offset[] = new int[2]; offset[0] = -1; //start offset offset[1] = -1; //end offset state[0] = INIT; HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() { int hrPos = -1; @Override public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.HR && state[0]!=INIT){ if (state[0] == A_OPEN){ hrPos = pos; offset[1] = pos; } } } @Override public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.A) { String attrName = (String)a.getAttribute(HTML.Attribute.NAME); if (name.equals(attrName)){ // we have found desired javadoc member info anchor state[0] = A_OPEN; offset[0] = pos; } else { if (state[0] == A_OPEN && attrName!=null){ // reach the end of retrieved javadoc info state[0] = INIT; offset[1] = (hrPos!=-1) ? hrPos : pos; } } } } }; parser.parse(reader, callback, ignoreCharset); // Findbugs-Removed: callback = null; return offset; } }