DTMBuilder.java example

Explorer
xalan-j-master
- xalan-j-Bxalan-j_2_2L
/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999,2000 The Apache Software Foundation.  All rights 
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:  
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written 
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

//package org.apache.xerces.parsers;
package org.apache.xml.dtm.ref;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.Attributes;
import org.xml.sax.ext.LexicalHandler;
import java.io.IOException;
import org.apache.xml.dtm.ref.CoroutineManager;
import org.apache.xml.utils.FastStringBuffer;


/** <p>DTMBuilder is a glue layer which accepts input from the
 * CoroutineParser and issues calls to construct DTM nodes.</p>
 *
 * <p>We're not really delighted with this; it's an extra layer of
 * call-and-return per node created. A better approach, since this is
 * so specific to DTM, might be to give it direct ("friend") access to
 * DTM's storage... or to fold it directly into the DTM class. We've
 * broken it out primarily to allow parallel development of the
 * builder and the DTM model. (Or, in fact, to make it a subclass
 * of CoroutineParser to avoid _that_ layer of call-and-return... though
 * I think tail-call optimization will help us in that case.</p>
 *
 * <p>In its current form, this is basically a SAX-to-DTM call adapter.
 * SAX events coming in are restructured -- primarily by having their strings
 * and text content copied into appropriate storage and re-expressed as
 * index numbers. Note that this requires some negotiation between the DTM
 * and the builder to agree on which string pools this content will be placed
 * into.</p>
 *
 * <p>We also restructure the call sequence somewhat, by merging
 * successive characters() calls into a single text-node creation, and
 * by breaking the attributes out of the SAX createEvent call and generating
 * create-attribute (and create-namespace-declaration) calls.</p>
 *
 * <p>Status: MOSTLY DEAD CODE -- Merged into DTM implementations; this
 * class is being retained just for reference and can probably be discarded
 * some time soon -- Check with Joe.</p>
 * */
public class DTMBuilder
implements ContentHandler, LexicalHandler
{

  //
  // Data
  //

  /** Document being built.
   *
   * <p>%TBD% The DTM API doesn't currently have construction calls,
   * so this is explicitly defined as a DTMDocumentImpl. That needs to
   * be fixed.</p>
   * */
  private DTMDocumentImpl m_dtm;

  private CoroutineManager fCoroutineManager = null;
  private int fAppCoroutine = -1;
  private int fParserCoroutine = -1;

  private CoroutineParser co_parser;

  // Scott suggests sharing pools between DTMs. Note that this will require
  // threadsafety at the pool level.
  private static DTMStringPool commonLocalNames=new DTMSafeStringPool();
  private static DTMStringPool commonNamespaceNames=new DTMSafeStringPool();
  private static DTMStringPool commonPrefixes=new DTMSafeStringPool();

  private DTMStringPool localNames; // For this DTM, may be common
  private DTMStringPool namespaceNames; // For this DTM, may be common
  private DTMStringPool prefixes; 
  private FastStringBuffer content; // Unique per DTM
  int contentStart=0;

  // %TBD% The whole startup sequence has to be resolved --
  // How much is passed builder-to-DTM and how much the other way?
  // Who decides whether we're sharing string pools or creating new ones?
  // When do we hook up to the coroutine parser system?

  //
  // Constructors
  //

  /*
   * @param dtm DTM object to be written into
   * <p>%TBD% The DTM API doesn't currently have construction calls,
   * so this is explicitly defined as a DTMDocumentImpl. That needs to
   * be fixed.</p>
   * @param source SAX InputSource to read the XML document from.
   * @param parser SAX XMLReader to be used to parse source into dtm
   * */
  public DTMBuilder(DTMDocumentImpl dtm, InputSource source, org.xml.sax.XMLReader parser)
  {
    m_dtm=dtm;
    
    // Start with persistant shared pools unless the DTM expresses
    // other preferences
    localNames=m_dtm.getLocalNameTable();
    if(localNames==null)
      m_dtm.setLocalNameTable(localNames=commonLocalNames);

    namespaceNames=m_dtm.getNsNameTable();
    if(namespaceNames==null)
      m_dtm.setNsNameTable(namespaceNames=commonNamespaceNames);

    prefixes=m_dtm.getPrefixNameTable();
    if(prefixes==null)
      m_dtm.setPrefixNameTable(prefixes=commonPrefixes);

    // Unlike the other strings, which may be shared and thus should be
    // reset elsewhere (if at all), content starts empty each time we parse.
    content=m_dtm.getContentBuffer();
    if(content==null)
      m_dtm.setContentBuffer(content=new FastStringBuffer());
    else
      content.reset();
    contentStart=0;

    // Establish incremental parsing hookups
    fCoroutineManager=new CoroutineManager();
    fAppCoroutine = fCoroutineManager.co_joinCoroutineSet(-1);
    // %TBD% parser should be passed in so we can plug in the
    // Xalan version of other specific instances.
    co_parser=new CoroutineSAXParser(fCoroutineManager,fAppCoroutine,parser);
    fParserCoroutine=co_parser.getParserCoroutineID();
    co_parser.setContentHandler(this);
    co_parser.setLexHandler(this); // Needed for comments, I think.
    
    // %TBD% MAJOR CONCERN: Are we sure we'll have reached the
    // startup point before we start the call? CHECK THIS!

    // Begin incremental parsing
    // Note that this doesn't return until the first chunk of parsing
    // has been completed and the parser coroutine yields.
    try
      {
        fCoroutineManager.co_resume(source,fAppCoroutine,fParserCoroutine);
      }
    catch(NoSuchMethodException e)
      {
        // Shouldn't happen unless we've miscoded our coroutine logic
        // "Shut down the garbage smashers on the detention level!"
        e.printStackTrace(System.err);
        fCoroutineManager.co_exit(fAppCoroutine);
      }
  }
  
  //
  // Public methods
  //
  
  // String accumulator support
  private void processAccumulatedText()
  {
    int len=content.length();
    if(len!=contentStart)
      {
        // The FastStringBuffer has been previously agreed upon
        m_dtm.appendTextChild(contentStart,len-contentStart);
        contentStart=len;
      }
  }

  //
  // ContentHandler methods
  // Accept SAX events, reformat as DTM construction calls
  public void characters(char[] ch, int start, int length)
       throws org.xml.sax.SAXException
  {
    // Actually creating the text node is handled by
    // processAccumulatedText(); here we just accumulate the
    // characters into the buffer.
    content.append(ch,start,length);
  }
  public void endDocument() 
       throws org.xml.sax.SAXException
  {
    // May need to tell the low-level builder code to pop up a level.
    // There _should't_ be any significant pending text at this point.
    m_dtm.appendEndDocument();
  }
  public void endElement(java.lang.String namespaceURI, java.lang.String localName,
      java.lang.String qName) 
       throws org.xml.sax.SAXException
  {
    processAccumulatedText();
    // No args but we do need to tell the low-level builder code to
    // pop up a level.
    m_dtm.appendEndElement();
  }
  public void endPrefixMapping(java.lang.String prefix) 
       throws org.xml.sax.SAXException
  {
    // No-op
  }
  public void ignorableWhitespace(char[] ch, int start, int length) 
       throws org.xml.sax.SAXException
  {
    // %TBD% I believe ignorable text isn't part of the DTM model...?
  }
  public void processingInstruction(java.lang.String target, java.lang.String data) 
       throws org.xml.sax.SAXException
  {
    processAccumulatedText();
    // %TBD% Which pools do target and data go into?
  }
  public void setDocumentLocator(Locator locator) 
  {
    // No-op for DTM
  }
  public void skippedEntity(java.lang.String name) 
       throws org.xml.sax.SAXException
  {
    processAccumulatedText();
    //%TBD%
  }
  public void startDocument() 
       throws org.xml.sax.SAXException
  {
    // No-op for DTM?
    //m_dtm.startDocument();
  }
  public void startElement(java.lang.String namespaceURI, java.lang.String localName,
      java.lang.String qName, Attributes atts) 
       throws org.xml.sax.SAXException
  {
    processAccumulatedText();

    // %TBD% Split prefix off qname
    String prefix=null;
    int colon=qName.indexOf(':');
    if(colon>0)
      prefix=qName.substring(0,colon);

    // %TBD% Where do we pool expandedName, or is it just the union, or...
    m_dtm.appendStartElement(namespaceNames.stringToIndex(namespaceURI),
                     localNames.stringToIndex(localName),
                     prefixes.stringToIndex(prefix)); /////// %TBD%

    // %TBD% I'm assuming that DTM will require resequencing of
    // NS decls before other attrs, hence two passes are taken.
    // %TBD% Is there an easier way to test for NSDecl?
    int nAtts=atts.getLength();
    // %TBD% Countdown is more efficient if nobody cares about sequence.
    for(int i=nAtts-1;i>=0;--i)	
      {
        qName=atts.getQName(i);
        if(qName.startsWith("xmlns:") || "xmlns".equals(qName))
          {
            prefix=null;
            colon=qName.indexOf(':');
            if(colon>0)
              {
                prefix=qName.substring(0,colon);
              }
            else
              {
                prefix=""; // Default prefix
              }
            

            m_dtm.appendNSDeclaration(
                                    prefixes.stringToIndex(prefix),
                                    namespaceNames.stringToIndex(atts.getValue(i)),
                                    atts.getType(i).equalsIgnoreCase("ID"));
          }
      }
    
    for(int i=nAtts-1;i>=0;--i)	
      {
        qName=atts.getQName(i);
        if(qName.startsWith("xmlns:") || "xmlns".equals(qName))
          {
            // %TBD% I hate having to extract the prefix into a new
            // string when we may never use it. Consider pooling whole
            // qNames, which are already strings?
            prefix=null;
            colon=qName.indexOf(':');
            if(colon>0)
              {
                prefix=qName.substring(0,colon);
                localName=qName.substring(colon+1);
              }
            else
              {
                prefix=""; // Default prefix
                localName=qName;
              }
            
            
            content.append(atts.getValue(i)); // Single-string value
            int contentEnd=content.length();
            
            if(!("xmlns".equals(prefix) || "xmlns".equals(qName)))
              m_dtm.appendAttribute(namespaceNames.stringToIndex(atts.getURI(i)),
                                  localNames.stringToIndex(localName),
                                  prefixes.stringToIndex(prefix),
                                  atts.getType(i).equalsIgnoreCase("ID"),
                                  contentStart, contentEnd-contentStart);
            contentStart=contentEnd;
          }
      }
  }
  public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM, handled during element/attr processing?
  }

  //
  // LexicalHandler support. Not all SAX2 parsers support these events
  // but we may want to pass them through when they exist...
  //
  public void comment(char[] ch, int start, int length) 
       throws org.xml.sax.SAXException
  {
    processAccumulatedText();

    content.append(ch,start,length); // Single-string value
    m_dtm.appendComment(contentStart,length);
    contentStart+=length;    
  }
  public void endCDATA() 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM
  }
  public void endDTD() 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM
  }
  public void endEntity(java.lang.String name) 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM
  }
  public void startCDATA() 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM
  }
  public void startDTD(java.lang.String name, java.lang.String publicId,
      java.lang.String systemId) 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM
  }
  public void startEntity(java.lang.String name) 
       throws org.xml.sax.SAXException
  {
    // No-op in DTM
  }

  //
  // coroutine support
  //

  public int getAppCoroutine() {
    return fAppCoroutine;
  }
  
  /**
   * getMoreNodes() tells the coroutine parser that we need more nodes.
   *
   * Parameter may be:
   *
   *      null            terminate the parser coroutine.
   *
   *      Boolean.TRUE    indication to continue parsing the current document.
   *			  Resumes normal SAX parsing.
   *
   *      Boolean.FALSE   indication to discontinue parsing and reset.
   *			  Throws UserRequestedStopException
   *			  to return control to the run() loop.
   *
   * We will be resumed with 
   *   co_resume(Boolean.TRUE, ...) on success with more remaining to parse.
   *   co_resume(Boolean.FALSE, ...) on success after endDocument.
   */
  private boolean co_yield(boolean getmore, boolean shutdown)
  {
    Object arg=null;
    if(!shutdown)
      arg= getmore ? Boolean.TRUE : Boolean.FALSE;
    try
      {
        arg = fCoroutineManager.co_resume(arg,fAppCoroutine,fParserCoroutine);
        if (arg instanceof Boolean) {
          return ((Boolean)arg).booleanValue();
          }

        else // Unexpected!
          {
            System.err.println(
                  "Active CoroutineSAXParser: unexpected resume parameter, "
                  +arg.getClass
                  ()+" with value=\""+arg+'"');
            System.err.println("\tStopping parser rather than risk deadlock");
            throw new RuntimeException("Coroutine parameter error ("+arg+')');
          }
      }
    catch(java.lang.NoSuchMethodException e)
      {
        // Shouldn't happen unless we've miscoded our coroutine logic
        // "Shut down the garbage smashers on the detention level!"
        e.printStackTrace(System.err);
        fCoroutineManager.co_exit(fAppCoroutine);
      }

    // Only reached if NoSuchMethodException was thrown (no coroutine)
    return(false); 
  }

} // class DTMBuilder