/* $Id$ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.connectorcommon.fuzzyml;
import org.apache.manifoldcf.core.interfaces.*;
import java.util.*;
import java.io.*;
/** This is the XML encoding detector.
* It is basically looking for the preamble's <?xml ... ?> tag, which it parses
* looking for the "encoding" attribute. It stops either when it is beyond
* any possibility of finding the preamble, or it finds the tag, whichever comes first.
*/
public class XMLEncodingDetector extends XMLParseState implements EncodingDetector
{
protected String encoding = null;
/** Constructor.
*/
public XMLEncodingDetector()
{
}
/** Set initial encoding.
*/
@Override
public void setEncoding(String encoding)
{
this.encoding = encoding;
}
/** Retrieve final encoding determination.
*/
@Override
public String getEncoding()
{
return encoding;
}
/** Map version of the noteTag method.
*@return true to halt further processing.
*/
@Override
protected boolean noteTag(String tagName, Map<String,String> attributes)
throws ManifoldCFException
{
// Terminate immediately.
return true;
}
/** This method gets called for every end tag. Override this method to intercept tag ends.
*@return true to halt further processing.
*/
@Override
protected boolean noteEndTag(String tagName)
throws ManifoldCFException
{
return true;
}
/** Map version of noteQTag method.
*@return true to halt further processing.
*/
protected boolean noteQTag(String tagName, Map<String,String> attributes)
throws ManifoldCFException
{
if (tagName.equals("xml"))
{
// Look for "encoding" attribute
String value = attributes.get("encoding");
if (value != null)
encoding = value;
}
// Either way, stop now.
return true;
}
/** This method is called for every <! <token> ... > construct, or 'btag'.
* Override it to intercept these.
*@return true to halt further processing.
*/
@Override
protected boolean noteBTag(String tagName)
throws ManifoldCFException
{
return true;
}
/** This method is called for the end of every btag, or any time
* there's a naked '>' in the document. Override it if you want to intercept these.
*@return true to halt further processing.
*/
protected boolean noteEndBTag()
throws ManifoldCFException
{
return true;
}
/** Called for the start of every cdata-like tag, e.g. <![ <token> [ ... ]]>
*@param token may be empty!!!
*@return true to halt further processing.
*/
@Override
protected boolean noteEscaped(String token)
throws ManifoldCFException
{
return true;
}
/** Called for the end of every cdata-like tag.
*@return true to halt further processing.
*/
@Override
protected boolean noteEndEscaped()
throws ManifoldCFException
{
return true;
}
/** This method gets called for every token inside a btag.
*@return true to halt further processing.
*/
@Override
protected boolean noteBTagToken(String token)
throws ManifoldCFException
{
return true;
}
/** This method gets called for every character that is not part of a tag etc.
* Override this method to intercept such characters.
*@return true to halt further processing.
*/
@Override
protected boolean noteNormalCharacter(char thisChar)
throws ManifoldCFException
{
return true;
}
/** This method gets called for every character that is found within an
* escape block, e.g. CDATA.
* Override this method to intercept such characters.
*@return true to halt further processing.
*/
@Override
protected boolean noteEscapedCharacter(char thisChar)
throws ManifoldCFException
{
return true;
}
}