/* $Id$ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.connectors.webcrawler; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.connectorcommon.fuzzyml.*; import java.util.*; /** This class interprets the tag stream generated by the BasicParseState class, and keeps track of the form tags. */ public class FormParseState extends LinkParseState { // States for form handling. protected final static int FORMPARSESTATE_NORMAL = 0; protected final static int FORMPARSESTATE_IN_FORM = 1; protected final static int FORMPARSESTATE_IN_SELECT = 2; protected final static int FORMPARSESTATE_IN_TEXTAREA = 3; protected final static int FORMPARSESTATE_IN_OPTION = 4; protected int formParseState = FORMPARSESTATE_NORMAL; protected String selectName = null; protected String selectMultiple = null; protected String optionValue = null; protected String optionSelected = null; protected StringBuilder optionValueText = null; public FormParseState(IHTMLHandler handler) { super(handler); } // Override methods having to do with notification of tag discovery @Override protected boolean noteNonscriptTag(String tagName, Map<String,String> attributes) throws ManifoldCFException { if (super.noteNonscriptTag(tagName,attributes)) return true; switch (formParseState) { case FORMPARSESTATE_NORMAL: if (tagName.equals("form")) { formParseState = FORMPARSESTATE_IN_FORM; handler.noteFormStart(attributes); } break; case FORMPARSESTATE_IN_FORM: if (tagName.equals("input")) { String type = (String)attributes.get("type"); // We're only interested in form elements that can actually transmit data if (type == null || (!type.toLowerCase(Locale.ROOT).equals("button") && !type.toLowerCase(Locale.ROOT).equals("reset") && !type.toLowerCase(Locale.ROOT).equals("image"))) handler.noteFormInput(attributes); } else if (tagName.equals("select")) { selectName = (String)attributes.get("name"); selectMultiple = (String)attributes.get("multiple"); formParseState = FORMPARSESTATE_IN_SELECT; } else if (tagName.equals("textarea")) { formParseState = FORMPARSESTATE_IN_TEXTAREA; Map textareaMap = new HashMap(); textareaMap.put("type","textarea"); // Default value is too tough to meaningfully compute because of the embedded tags etc. Known limitation. textareaMap.put("value",""); handler.noteFormInput(textareaMap); } else if (tagName.equals("button")) { String type = (String)attributes.get("type"); if (type == null || type.toLowerCase(Locale.ROOT).equals("submit")) { // Same as input type="submit" handler.noteFormInput(attributes); } } else if (tagName.equals("isindex")) { Map indexMap = new HashMap(); indexMap.put("type","text"); } break; case FORMPARSESTATE_IN_SELECT: if (tagName.equals("option")) { optionValue = (String)attributes.get("value"); optionSelected = (String)attributes.get("selected"); formParseState = FORMPARSESTATE_IN_OPTION; // In case there's no end tag, if we have everything we need, do it now. if (optionValue != null) { Map optionMap = new HashMap(); optionMap.put("type","select"); optionMap.put("name",selectName); optionMap.put("multiple",selectMultiple); optionMap.put("value",optionValue); optionMap.put("selected",optionSelected); handler.noteFormInput(optionMap); } else optionValueText = new StringBuilder(); } break; case FORMPARSESTATE_IN_OPTION: break; case FORMPARSESTATE_IN_TEXTAREA: break; default: throw new ManifoldCFException("Unknown form parse state: "+Integer.toString(formParseState)); } return false; } @Override protected boolean noteNonscriptEndTag(String tagName) throws ManifoldCFException { if (super.noteNonscriptEndTag(tagName)) return true; switch (formParseState) { case FORMPARSESTATE_NORMAL: break; case FORMPARSESTATE_IN_FORM: if (tagName.equals("form")) { handler.noteFormEnd(); formParseState = FORMPARSESTATE_NORMAL; } break; case FORMPARSESTATE_IN_SELECT: formParseState = FORMPARSESTATE_IN_FORM; selectName = null; selectMultiple = null; break; case FORMPARSESTATE_IN_OPTION: if (tagName.equals("option")) { // If we haven't already emitted the option, emit it now. if (optionValueText != null) { Map optionMap = new HashMap(); optionMap.put("type","select"); optionMap.put("name",selectName); optionMap.put("multiple",selectMultiple); optionMap.put("value",optionValueText.toString()); optionMap.put("selected",optionSelected); handler.noteFormInput(optionMap); } formParseState = FORMPARSESTATE_IN_SELECT; optionSelected = null; optionValue = null; optionValueText = null; } break; case FORMPARSESTATE_IN_TEXTAREA: if (tagName.equals("textarea")) formParseState = FORMPARSESTATE_IN_FORM; break; default: throw new ManifoldCFException("Unknown form parse state: "+Integer.toString(formParseState)); } return false; } @Override protected boolean noteNormalCharacter(char thisChar) throws ManifoldCFException { if (super.noteNormalCharacter(thisChar)) return true; if (formParseState == FORMPARSESTATE_IN_OPTION) { if (optionValueText != null) optionValueText.append(thisChar); } else handler.noteTextCharacter(thisChar); return false; } }