/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
//package org.apache.xerces.parsers;
package org.apache.xml.dtm.ref;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.Attributes;
import org.xml.sax.ext.LexicalHandler;
import java.io.IOException;
import org.apache.xml.dtm.ref.CoroutineManager;
import org.apache.xml.utils.FastStringBuffer;
/** <p>DTMBuilder is a glue layer which accepts input from the
* CoroutineParser and issues calls to construct DTM nodes.</p>
*
* <p>We're not really delighted with this; it's an extra layer of
* call-and-return per node created. A better approach, since this is
* so specific to DTM, might be to give it direct ("friend") access to
* DTM's storage... or to fold it directly into the DTM class. We've
* broken it out primarily to allow parallel development of the
* builder and the DTM model. (Or, in fact, to make it a subclass
* of CoroutineParser to avoid _that_ layer of call-and-return... though
* I think tail-call optimization will help us in that case.</p>
*
* <p>In its current form, this is basically a SAX-to-DTM call adapter.
* SAX events coming in are restructured -- primarily by having their strings
* and text content copied into appropriate storage and re-expressed as
* index numbers. Note that this requires some negotiation between the DTM
* and the builder to agree on which string pools this content will be placed
* into.</p>
*
* <p>We also restructure the call sequence somewhat, by merging
* successive characters() calls into a single text-node creation, and
* by breaking the attributes out of the SAX createEvent call and generating
* create-attribute (and create-namespace-declaration) calls.</p>
*
* <p>Status: MOSTLY DEAD CODE -- Merged into DTM implementations; this
* class is being retained just for reference and can probably be discarded
* some time soon -- Check with Joe.</p>
* */
public class DTMBuilder
implements ContentHandler, LexicalHandler
{
//
// Data
//
/** Document being built.
*
* <p>%TBD% The DTM API doesn't currently have construction calls,
* so this is explicitly defined as a DTMDocumentImpl. That needs to
* be fixed.</p>
* */
private DTMDocumentImpl m_dtm;
private CoroutineManager fCoroutineManager = null;
private int fAppCoroutine = -1;
private int fParserCoroutine = -1;
private CoroutineParser co_parser;
// Scott suggests sharing pools between DTMs. Note that this will require
// threadsafety at the pool level.
private static DTMStringPool commonLocalNames=new DTMSafeStringPool();
private static DTMStringPool commonNamespaceNames=new DTMSafeStringPool();
private static DTMStringPool commonPrefixes=new DTMSafeStringPool();
private DTMStringPool localNames; // For this DTM, may be common
private DTMStringPool namespaceNames; // For this DTM, may be common
private DTMStringPool prefixes;
private FastStringBuffer content; // Unique per DTM
int contentStart=0;
// %TBD% The whole startup sequence has to be resolved --
// How much is passed builder-to-DTM and how much the other way?
// Who decides whether we're sharing string pools or creating new ones?
// When do we hook up to the coroutine parser system?
//
// Constructors
//
/*
* @param dtm DTM object to be written into
* <p>%TBD% The DTM API doesn't currently have construction calls,
* so this is explicitly defined as a DTMDocumentImpl. That needs to
* be fixed.</p>
* @param source SAX InputSource to read the XML document from.
* @param parser SAX XMLReader to be used to parse source into dtm
* */
public DTMBuilder(DTMDocumentImpl dtm, InputSource source, org.xml.sax.XMLReader parser)
{
m_dtm=dtm;
// Start with persistant shared pools unless the DTM expresses
// other preferences
localNames=m_dtm.getLocalNameTable();
if(localNames==null)
m_dtm.setLocalNameTable(localNames=commonLocalNames);
namespaceNames=m_dtm.getNsNameTable();
if(namespaceNames==null)
m_dtm.setNsNameTable(namespaceNames=commonNamespaceNames);
prefixes=m_dtm.getPrefixNameTable();
if(prefixes==null)
m_dtm.setPrefixNameTable(prefixes=commonPrefixes);
// Unlike the other strings, which may be shared and thus should be
// reset elsewhere (if at all), content starts empty each time we parse.
content=m_dtm.getContentBuffer();
if(content==null)
m_dtm.setContentBuffer(content=new FastStringBuffer());
else
content.reset();
contentStart=0;
// Establish incremental parsing hookups
fCoroutineManager=new CoroutineManager();
fAppCoroutine = fCoroutineManager.co_joinCoroutineSet(-1);
// %TBD% parser should be passed in so we can plug in the
// Xalan version of other specific instances.
co_parser=new CoroutineSAXParser(fCoroutineManager,fAppCoroutine,parser);
fParserCoroutine=co_parser.getParserCoroutineID();
co_parser.setContentHandler(this);
co_parser.setLexHandler(this); // Needed for comments, I think.
// %TBD% MAJOR CONCERN: Are we sure we'll have reached the
// startup point before we start the call? CHECK THIS!
// Begin incremental parsing
// Note that this doesn't return until the first chunk of parsing
// has been completed and the parser coroutine yields.
try
{
fCoroutineManager.co_resume(source,fAppCoroutine,fParserCoroutine);
}
catch(NoSuchMethodException e)
{
// Shouldn't happen unless we've miscoded our coroutine logic
// "Shut down the garbage smashers on the detention level!"
e.printStackTrace(System.err);
fCoroutineManager.co_exit(fAppCoroutine);
}
}
//
// Public methods
//
// String accumulator support
private void processAccumulatedText()
{
int len=content.length();
if(len!=contentStart)
{
// The FastStringBuffer has been previously agreed upon
m_dtm.appendTextChild(contentStart,len-contentStart);
contentStart=len;
}
}
//
// ContentHandler methods
// Accept SAX events, reformat as DTM construction calls
public void characters(char[] ch, int start, int length)
throws org.xml.sax.SAXException
{
// Actually creating the text node is handled by
// processAccumulatedText(); here we just accumulate the
// characters into the buffer.
content.append(ch,start,length);
}
public void endDocument()
throws org.xml.sax.SAXException
{
// May need to tell the low-level builder code to pop up a level.
// There _should't_ be any significant pending text at this point.
m_dtm.appendEndDocument();
}
public void endElement(java.lang.String namespaceURI, java.lang.String localName,
java.lang.String qName)
throws org.xml.sax.SAXException
{
processAccumulatedText();
// No args but we do need to tell the low-level builder code to
// pop up a level.
m_dtm.appendEndElement();
}
public void endPrefixMapping(java.lang.String prefix)
throws org.xml.sax.SAXException
{
// No-op
}
public void ignorableWhitespace(char[] ch, int start, int length)
throws org.xml.sax.SAXException
{
// %TBD% I believe ignorable text isn't part of the DTM model...?
}
public void processingInstruction(java.lang.String target, java.lang.String data)
throws org.xml.sax.SAXException
{
processAccumulatedText();
// %TBD% Which pools do target and data go into?
}
public void setDocumentLocator(Locator locator)
{
// No-op for DTM
}
public void skippedEntity(java.lang.String name)
throws org.xml.sax.SAXException
{
processAccumulatedText();
//%TBD%
}
public void startDocument()
throws org.xml.sax.SAXException
{
// No-op for DTM?
//m_dtm.startDocument();
}
public void startElement(java.lang.String namespaceURI, java.lang.String localName,
java.lang.String qName, Attributes atts)
throws org.xml.sax.SAXException
{
processAccumulatedText();
// %TBD% Split prefix off qname
String prefix=null;
int colon=qName.indexOf(':');
if(colon>0)
prefix=qName.substring(0,colon);
// %TBD% Where do we pool expandedName, or is it just the union, or...
m_dtm.appendStartElement(namespaceNames.stringToIndex(namespaceURI),
localNames.stringToIndex(localName),
prefixes.stringToIndex(prefix)); /////// %TBD%
// %TBD% I'm assuming that DTM will require resequencing of
// NS decls before other attrs, hence two passes are taken.
// %TBD% Is there an easier way to test for NSDecl?
int nAtts=atts.getLength();
// %TBD% Countdown is more efficient if nobody cares about sequence.
for(int i=nAtts-1;i>=0;--i)
{
qName=atts.getQName(i);
if(qName.startsWith("xmlns:") || "xmlns".equals(qName))
{
prefix=null;
colon=qName.indexOf(':');
if(colon>0)
{
prefix=qName.substring(0,colon);
}
else
{
prefix=""; // Default prefix
}
m_dtm.appendNSDeclaration(
prefixes.stringToIndex(prefix),
namespaceNames.stringToIndex(atts.getValue(i)),
atts.getType(i).equalsIgnoreCase("ID"));
}
}
for(int i=nAtts-1;i>=0;--i)
{
qName=atts.getQName(i);
if(qName.startsWith("xmlns:") || "xmlns".equals(qName))
{
// %TBD% I hate having to extract the prefix into a new
// string when we may never use it. Consider pooling whole
// qNames, which are already strings?
prefix=null;
colon=qName.indexOf(':');
if(colon>0)
{
prefix=qName.substring(0,colon);
localName=qName.substring(colon+1);
}
else
{
prefix=""; // Default prefix
localName=qName;
}
content.append(atts.getValue(i)); // Single-string value
int contentEnd=content.length();
if(!("xmlns".equals(prefix) || "xmlns".equals(qName)))
m_dtm.appendAttribute(namespaceNames.stringToIndex(atts.getURI(i)),
localNames.stringToIndex(localName),
prefixes.stringToIndex(prefix),
atts.getType(i).equalsIgnoreCase("ID"),
contentStart, contentEnd-contentStart);
contentStart=contentEnd;
}
}
}
public void startPrefixMapping(java.lang.String prefix, java.lang.String uri)
throws org.xml.sax.SAXException
{
// No-op in DTM, handled during element/attr processing?
}
//
// LexicalHandler support. Not all SAX2 parsers support these events
// but we may want to pass them through when they exist...
//
public void comment(char[] ch, int start, int length)
throws org.xml.sax.SAXException
{
processAccumulatedText();
content.append(ch,start,length); // Single-string value
m_dtm.appendComment(contentStart,length);
contentStart+=length;
}
public void endCDATA()
throws org.xml.sax.SAXException
{
// No-op in DTM
}
public void endDTD()
throws org.xml.sax.SAXException
{
// No-op in DTM
}
public void endEntity(java.lang.String name)
throws org.xml.sax.SAXException
{
// No-op in DTM
}
public void startCDATA()
throws org.xml.sax.SAXException
{
// No-op in DTM
}
public void startDTD(java.lang.String name, java.lang.String publicId,
java.lang.String systemId)
throws org.xml.sax.SAXException
{
// No-op in DTM
}
public void startEntity(java.lang.String name)
throws org.xml.sax.SAXException
{
// No-op in DTM
}
//
// coroutine support
//
public int getAppCoroutine() {
return fAppCoroutine;
}
/**
* getMoreNodes() tells the coroutine parser that we need more nodes.
*
* Parameter may be:
*
* null terminate the parser coroutine.
*
* Boolean.TRUE indication to continue parsing the current document.
* Resumes normal SAX parsing.
*
* Boolean.FALSE indication to discontinue parsing and reset.
* Throws UserRequestedStopException
* to return control to the run() loop.
*
* We will be resumed with
* co_resume(Boolean.TRUE, ...) on success with more remaining to parse.
* co_resume(Boolean.FALSE, ...) on success after endDocument.
*/
private boolean co_yield(boolean getmore, boolean shutdown)
{
Object arg=null;
if(!shutdown)
arg= getmore ? Boolean.TRUE : Boolean.FALSE;
try
{
arg = fCoroutineManager.co_resume(arg,fAppCoroutine,fParserCoroutine);
if (arg instanceof Boolean) {
return ((Boolean)arg).booleanValue();
}
else // Unexpected!
{
System.err.println(
"Active CoroutineSAXParser: unexpected resume parameter, "
+arg.getClass
()+" with value=\""+arg+'"');
System.err.println("\tStopping parser rather than risk deadlock");
throw new RuntimeException("Coroutine parameter error ("+arg+')');
}
}
catch(java.lang.NoSuchMethodException e)
{
// Shouldn't happen unless we've miscoded our coroutine logic
// "Shut down the garbage smashers on the detention level!"
e.printStackTrace(System.err);
fCoroutineManager.co_exit(fAppCoroutine);
}
// Only reached if NoSuchMethodException was thrown (no coroutine)
return(false);
}
} // class DTMBuilder