/* $Id$ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.connectorcommon.fuzzyml;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.core.system.Logging;
import java.util.*;
import java.io.*;
/** This class takes the output of the basic tag parser and converts it for
* typical HTML usage. It takes the attribute lists, for instance, and converts
* them to lowercased maps. It also bashes all tag names etc to lower case as
* well.
*/
public class HTMLParseState extends TagParseState
{
/** Constructor.
*/
public HTMLParseState()
{
}
/** This method gets called for every tag. Override this method to intercept tag begins.
*@return true to halt further processing.
*/
@Override
protected final boolean noteTag(String tagName, List<AttrNameValue> attributes)
throws ManifoldCFException
{
Map<String,String> attrMap = new HashMap<String,String>(attributes.size());
for (AttrNameValue nv : attributes)
{
attrMap.put(nv.getName().toLowerCase(Locale.ROOT), nv.getValue());
}
return noteTag(tagName.toLowerCase(Locale.ROOT), attrMap);
}
/** Map version of the noteTag method.
*@return true to halt further processing.
*/
protected boolean noteTag(String tagName, Map<String,String> attributes)
throws ManifoldCFException
{
if (Logging.misc.isDebugEnabled())
Logging.misc.debug(" Saw tag '"+tagName+"'");
return false;
}
/** This method gets called for every end tag. Override this method to intercept tag ends.
*@return true to halt further processing.
*/
@Override
protected final boolean noteEndTag(String tagName)
throws ManifoldCFException
{
return noteTagEnd(tagName.toLowerCase(Locale.ROOT));
}
/** Note end tag.
*/
protected boolean noteTagEnd(String tagName)
throws ManifoldCFException
{
if (Logging.misc.isDebugEnabled())
Logging.misc.debug(" Saw end tag '"+tagName+"'");
return false;
}
/** This method is called for every <? ... ?> construct, or 'qtag'.
* This is not useful for HTML.
*@return true to halt further processing.
*/
@Override
protected final boolean noteQTag(String tagName, List<AttrNameValue> attributes)
throws ManifoldCFException
{
return super.noteQTag(tagName, attributes);
}
/** This method is called for every <! <token> ... > construct, or 'btag'.
* Override it to intercept these.
*@return true to halt further processing.
*/
@Override
protected final boolean noteBTag(String tagName)
throws ManifoldCFException
{
return super.noteBTag(tagName);
}
/** This method is called for the end of every btag, or any time
* there's a naked '>' in the document. Override it if you want to intercept these.
*@return true to halt further processing.
*/
@Override
protected final boolean noteEndBTag()
throws ManifoldCFException
{
return super.noteEndBTag();
}
/** Called for the start of every cdata-like tag, e.g. <![ <token> [ ... ]]>
*@param token may be empty!!!
*@return true to halt further processing.
*/
@Override
protected final boolean noteEscaped(String token)
throws ManifoldCFException
{
return super.noteEscaped(token);
}
/** Called for the end of every cdata-like tag.
*@return true to halt further processing.
*/
@Override
protected final boolean noteEndEscaped()
throws ManifoldCFException
{
return super.noteEndEscaped();
}
/** This method gets called for every token inside a btag.
*@return true to halt further processing.
*/
@Override
protected final boolean noteBTagToken(String token)
throws ManifoldCFException
{
return super.noteBTagToken(token);
}
/** This method gets called for every character that is found within an
* escape block, e.g. CDATA.
* Override this method to intercept such characters.
*@return true to halt further processing.
*/
@Override
protected final boolean noteEscapedCharacter(char thisChar)
throws ManifoldCFException
{
return super.noteEscapedCharacter(thisChar);
}
}