/* $Id$ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.connectors.webcrawler; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.crawler.system.Logging; import java.util.regex.*; import java.util.*; /** This class is the handler for HTML form parsing during state transitions */ public class FindHTMLFormHandler extends FindHandler implements IHTMLHandler { protected final Pattern formNamePattern; protected FormDataAccumulator discoveredFormData = null; protected FormDataAccumulator currentFormData = null; public FindHTMLFormHandler(String parentURI, Pattern formNamePattern) { super(parentURI); this.formNamePattern = formNamePattern; } public void applyFormOverrides(LoginParameters lp) throws ManifoldCFException { if (discoveredFormData != null && lp != null) { if (lp.getOverrideTargetURL() != null) { super.noteDiscoveredLink(lp.getOverrideTargetURL()); discoveredFormData.overrideActionURI(getTargetURI()); } discoveredFormData.applyOverrides(lp); } } public FormData getFormData() { return discoveredFormData; } /** Note a character of text. * Structured this way to keep overhead low for handlers that don't use text. */ @Override public void noteTextCharacter(char textCharacter) throws ManifoldCFException { } /** Note a meta tag */ @Override public void noteMetaTag(Map metaAttributes) throws ManifoldCFException { } /** Note the start of a form */ @Override public void noteFormStart(Map formAttributes) throws ManifoldCFException { if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("WEB: Saw form with"+ " name "+((formAttributes.get("name")==null)?"null":"'"+formAttributes.get("name")+"'") + " id "+((formAttributes.get("id")==null)?"null":"'"+formAttributes.get("id")+"'") + " action "+((formAttributes.get("action")==null)?"null":"'"+formAttributes.get("action")+"'") ); // Is this a form element we can use? boolean canUse; if (formNamePattern != null) { // Find the identifier we will use for the form. If name isn't there, // we use id. If id isn't there, we use action. The only other thing we // could reasonably do is identify the form by its form elements. String formName = (String)formAttributes.get("name"); if (formName == null) formName = (String)formAttributes.get("id"); if (formName == null) formName = (String)formAttributes.get("action"); if (formName == null) formName = ""; Matcher m = formNamePattern.matcher(formName); canUse = m.find(); } else canUse = true; if (canUse) { String actionURI = (String)formAttributes.get("action"); if (actionURI == null) // Action URI is THIS uri! actionURI = parentURI; else if (actionURI.length() == 0) actionURI = ""; noteDiscoveredLink(actionURI); actionURI = getTargetURI(); if (actionURI != null) { String method = (String)formAttributes.get("method"); if (method == null || method.length() == 0) method = "get"; else method = method.toLowerCase(Locale.ROOT); // Start a new form currentFormData = new FormDataAccumulator(actionURI,method.equals("post")?FormData.SUBMITMETHOD_POST:FormData.SUBMITMETHOD_GET); } } } /** Note an input tag */ @Override public void noteFormInput(Map inputAttributes) throws ManifoldCFException { if (Logging.connectors.isDebugEnabled()) { String type = (String)inputAttributes.get("type"); if (type == null) type = "text"; String name = (String)inputAttributes.get("name"); if (name == null) name = "(null)"; Logging.connectors.debug("WEB: Saw form element of type '"+type+"' name '"+name+"'"); } if (currentFormData != null) currentFormData.addElement(inputAttributes); } /** Note the end of a form */ @Override public void noteFormEnd() throws ManifoldCFException { if (currentFormData != null) { discoveredFormData = currentFormData; currentFormData = null; } } /** Note discovered href */ @Override public void noteAHREF(String rawURL) throws ManifoldCFException { } /** Note discovered href */ @Override public void noteLINKHREF(String rawURL) throws ManifoldCFException { } /** Note discovered IMG SRC */ @Override public void noteIMGSRC(String rawURL) throws ManifoldCFException { } /** Note discovered FRAME SRC */ @Override public void noteFRAMESRC(String rawURL) throws ManifoldCFException { } @Override public void finishUp() throws ManifoldCFException { } }