URLStatements.java example

Explorer
mulgara-master
- src
  - jar
  - war
    - server-http
      - java
        HttpServer.java
        HttpServerServlet.java
- tools
  - src
    - org
      - mulgara
        tools
        Sparql.java
        Tql.java
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the Kowari Metadata Store.
 *
 * The Initial Developer of the Original Code is Plugged In Software Pty
 * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
 * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
 * Plugged In Software Pty Ltd. All Rights Reserved.
 *
 * Contributor(s): N/A.
 *
 * [NOTE: The text of this Exhibit A may differ slightly from the text
 * of the notices in the Source Code files of the Original Code. You
 * should use the text of this Exhibit A rather than the text found in the
 * Original Code Source Code for Your Modifications.]
 *
 */

package org.mulgara.resolver.url;

// Java 2 standard packages
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;
import org.xml.sax.*;

// Third party packages
import com.hp.hpl.jena.rdf.arp.ARP;  // ARP (Jena RDF/XML parser)
import com.hp.hpl.jena.rdf.arp.ALiteral;
import com.hp.hpl.jena.rdf.arp.AResource;
import com.hp.hpl.jena.rdf.arp.StatementHandler;
import org.apache.log4j.Logger;      // Apache Log4J
import org.jrdf.graph.*;             // JRDF

// Locally written packages
import org.mulgara.query.Cursor;
import org.mulgara.query.TuplesException;
import org.mulgara.query.Variable;
import org.mulgara.query.rdf.*;
import org.mulgara.resolver.spi.LocalizeException;
import org.mulgara.resolver.spi.ResolverSession;
import org.mulgara.resolver.spi.Statements;
import org.mulgara.resolver.spi.StatementsWrapperResolution;
import org.mulgara.store.tuples.AbstractTuples;
import org.mulgara.store.tuples.Tuples;

/**
 * Parses an {@link InputStream} into {@link Statements}.
 *
 * This particular implementation is complicated by the need to adapt the Jena
 * ARP RDF/XML "push" parser to be a "pull" parser instead.
 *
 * @created 2004-04-02
 * @author <a href="http://staff.pisoftware.com/raboczi">Simon Raboczi</a>
 * @version $Revision: 1.8 $
 * @modified $Date: 2005/01/05 04:58:56 $ @maintenanceAuthor $Author: newmana $
 * @company <a href="mailto:info@PIsoftware.com">Plugged In Software</a>
 * @copyright © 2004 <a href="http://www.PIsoftware.com/">Plugged In
 *      Software Pty Ltd</a>
 * @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
 */
public class URLStatements extends AbstractTuples implements Statements {
  /**
   * Logger.
   */
  @SuppressWarnings("unused")
  private static final Logger logger = Logger.getLogger(URLStatements.class.getName());

  /**
   * The session used to globalize the RDF nodes from the stream.
   */
  private ResolverSession resolverSession;

  private Map<String,BlankNode> blankNodeMap;

  /**
   * The current row.
   *
   * if the cursor is not on a row, this will be <code>null</code>
   */
  private Triple triple;

  /**
   * The location of the RDF/XML document.
   */
  private URL url;

  private Parser parser = null;

  private long rowCount;

  private boolean rowCountIsValid = false;

  //
  // Constructors
  //

  /**
   * Construct an RDF/XML stream parser.
   *
   * @param url  the location of the RDF/XML formatted document
   * @param resolverSession  session against which to localize RDF nodes
   * @param blankNodeMap  map of blank node IDs populated by any earlier
   *   parsings of the same document within the scope of the same
   *   <var>resolverSession</var>
   * @throws IllegalArgumentException if <var>inputStream</var> or
   *   <var>resolverSession</var> are <code>null</code>
   * @throws TuplesException if the <var>inputStream</var> can't be parsed as
   *   RDF/XML
   */
  URLStatements(URL url, ResolverSession resolverSession, Map<String,BlankNode> blankNodeMap) throws TuplesException {
    // Validate "url" parameter
    if (url == null) throw new IllegalArgumentException( "Null \"url\" parameter");

    // Validate "resolverSession" parameter
    if (resolverSession == null) throw new IllegalArgumentException("Null \"resolverSession\" parameter");

    // Validate "blankNodeMap" parameter
    if (blankNodeMap == null) throw new IllegalArgumentException("Null \"blankNodeMap\" parameter");

    // Initialize fields
    this.url             = url;
    this.resolverSession = resolverSession;
    this.blankNodeMap    = blankNodeMap;

    // Fix the magical column names for RDF statements
    setVariables(new Variable[] { new Variable("subject"),
                                  new Variable("predicate"),
                                  new Variable("object") });
  }

  //
  // Methods implementing Statements
  //

  public long getSubject() throws TuplesException
  {
    return getColumnValue(0);
  }

  public long getPredicate() throws TuplesException
  {
    return getColumnValue(1);
  }

  public long getObject() throws TuplesException
  {
    return getColumnValue(2);
  }

  //
  // Methods implementing AbstractTuples
  //

  /**
   * {@inheritDoc}
   *
   * Non-zero length <var>prefix</var> values don't need to be supported by
   * this class because prefix filtration is implemented by the
   * {@link StatementsWrapperResolution} which {@link URLResolver} always
   * applies to this class before returning one.
   *
   * @param prefix {@inheritDoc}; for this particular implementation, non-zero
   *   length prefixes are not supported
   * @throws {@inheritDoc}; also if <var>prefix</var> is non-zero length
   */
  public void beforeFirst(long[] prefix, int suffixTruncation)
    throws TuplesException
  {
    // Validate "prefix" parameter
    if (prefix == null) {
      throw new IllegalArgumentException("Null \"prefix\" parameter");
    }
    if (prefix.length != 0) {
      throw new TuplesException(
        getClass() + ".beforeFirst isn't implemented for non-zero length prefix"
      );
    }

    // Validate "suffixTruncation" parameter
    if (suffixTruncation != 0) {
      throw new IllegalArgumentException("Null \"suffixTruncation\" parameter");
    }

    // Shut down any existing parsing thread
    if (parser != null) {
      stopThread();
    }

    // Create the parser and start the parsing thread
    parser = new Parser(url, blankNodeMap);
    parser.start();

    // TODO skip forward to the first triple that matches prefix
  }

  /**
   * The cursor position isn't cloned by this method.
   */
  public Object clone() {
    URLStatements cloned = (URLStatements) super.clone();

    // Copy immutable fields by reference
    cloned.resolverSession = resolverSession;
    cloned.url             = url;

    // Even though this a mutable field, want to share the blank node IDs
    cloned.blankNodeMap    = blankNodeMap;

    // The cursor position is not cloned.
    cloned.triple          = null;
    cloned.parser          = null;

    return cloned;
  }

  /**
   * Close the RDF/XML formatted input stream.
   */
  public void close() throws TuplesException
  {
    stopThread();
  }

  /**
   * @param column  0 for the subject, 1 for the predicate, 2 for the object
   */
  public long getColumnValue(int column) throws TuplesException
  {
    if (triple == null) {
      throw new TuplesException("There is no current row");
    }

    // Pull the appropriate field from the current triple as a JRDF Node
    Node node;
    switch (column) {
    case 0:  node = triple.getSubject();   break;
    case 1:  node = triple.getPredicate(); break;
    case 2:  node = triple.getObject();    break;
    default: throw new TuplesException("No such column " + column);
    }
    assert node != null;

    // Localize the node
    try {
      return resolverSession.localize(node);
    }
    catch (LocalizeException e) {
      throw new TuplesException("Couldn't get column " + column + " value", e);
    }
  }

  public List<Tuples> getOperands() {
    return Collections.emptyList();
  }

  public int getRowCardinality() throws TuplesException {
    long statementCount;

    if (rowCountIsValid) {
      statementCount = rowCount;
    } else {
      Parser p;
      boolean newParser;
      if (parser != null) {
        // Use the existing parser.
        p = parser;
        newParser = false;
      } else {
        // Create a new parser.
        p = new Parser(url, blankNodeMap);
        p.start();
        newParser = true;
      }

      // We can do this since the queue holds more than two triples.
      try {
        synchronized (p) {
          while (p.getStatementCount() < 2 && !p.isStatementCountTotal()) {
            try {
              // Wait on the parser for changes to the statement count or
              // completion status.
              p.wait();
            } catch (InterruptedException ex) {
              throw new TuplesException("Abort");
            }
          }
          statementCount = p.getStatementCount();
        }
      } catch (TuplesException ex) {
        p.abort();
        if (!newParser) {
          // We just aborted the main parser, so nullify the reference.
          parser = null;
        }
        throw ex; // rethrow.
      } finally {
        if (newParser) {
          // Stop the thread.
          p.abort();
        }
      }
    }

    // Convert the statement count into a cardinality class
    return statementCount == 0 ? Cursor.ZERO :
           statementCount == 1 ? Cursor.ONE :
                                 Cursor.MANY;
  }

  public long getRowCount() throws TuplesException {
    if (!rowCountIsValid) {
      if (parser != null && parser.isStatementCountTotal()) {
        // Get the statement count from the parser.
        rowCount = parser.getStatementCount();
      } else {
        // Create a new parser and consume the entire file.
        Parser p = new Parser(url, blankNodeMap);
        p.start();
        try {
          rowCount = p.waitForStatementTotal();
        } finally {
          p.abort();
        }
      }
      rowCountIsValid = true;
    }
    return rowCount;
  }

  public long getRowUpperBound() throws TuplesException {
    // If the row count isn't yet available, return an absurdly huge value
    return parser != null && parser.isStatementCountTotal() ?
           parser.getStatementCount() : Long.MAX_VALUE;
  }

  /** Guess at a large number */
  private static final Long LARGE_FILE_SIZE = 1000000L;

  public long getRowExpectedCount() throws TuplesException {
    // If the row count isn't yet available, return an absurdly huge value
    return parser != null && parser.isStatementCountTotal() ?
           parser.getStatementCount() : LARGE_FILE_SIZE;
  }


  public boolean hasNoDuplicates() throws TuplesException {
    return false;
  }

  public boolean isColumnEverUnbound(int column) throws TuplesException {
    switch (column) {
    case 0: case 1: case 2:
      return false;
    default:
      throw new TuplesException("No such column " + column);
    }
  }

  public boolean next() throws TuplesException {
    if (parser == null) {
      // no current row
      return false;
    }

    try {
      triple = parser.getTriple();
    } catch (TuplesException ex) {
      stopThread();
      throw ex; // rethrow
    }

    if (triple == null) {
      // Hit the end of the file.
      assert parser.isStatementCountTotal();
      rowCount = parser.getStatementCount();
      rowCountIsValid = true;
      stopThread();
    }
    return triple != null;
  }

  /**
   * Stops the thread if it is running, and clears the current row.
   */
  private void stopThread() {
    if (parser != null) {
      parser.abort();
      parser = null;
    }
    triple = null;
  }

}

/**
 * This {@link Runnable}
 */
class Parser extends Thread implements ErrorHandler, StatementHandler {
  /** Logger. */
  private static final Logger logger =
    Logger.getLogger(Parser.class.getName());

  private final int BUFFER_SIZE = 1000;

  /**
   * Maximum size that {@link #queue} can attain without the
   * parser deliberately blocking and waiting for it to drain.
   */
  private final int QUEUE_MAX_BUFFERS = 10;


  /**
   * The ARP parser instance to use.
   */
  private final ARP arp = new ARP();

  /**
   * Map ARP anonymous node IDs to {@link BlankNode}s.
   */
  private final Map<String,BlankNode> blankNodeMap;

  private URL url;

  private Triple[] headBuffer = null;

  private int headIndex = 0;

  private Triple[] tailBuffer = null;

  private int tailIndex = 0;

  /**
   * The queue of buffers of triples generated by the RDF/XML parser.
   */
  private LinkedList<Triple[]> queue = new LinkedList<Triple[]>();

  /**
   * The number of statements parsed so far.
   *
   * When {@link #complete} is <code>true</code>, this will be the number of
   * statements in the RDF/XML document.
   */
  private long statementCount = 0;

  /**
   * true if statementCount is the count of the total number of statements in
   * the entire file because the parser has reached the end of the file without
   * error.
   */
  private boolean statementCountIsTotal = false;

  /**
   * Flag used to indicate that the end of the RDF/XML file has been reached.
   */
  private boolean complete = false;

  /**
   * The exception which interrupted parsing, or <code>null</code> is parsing
   * is successful.
   */
  private Throwable exception = null;

  //
  // Constructor
  //

  /**
   * Sole constructor.
   */
  @SuppressWarnings("deprecation")
  Parser(URL url, Map<String,BlankNode> blankNodeMap) {
    // Validate "url" parameter
    if (url == null) throw new IllegalArgumentException("Null \"url\" parameter");

    // Validate "blankNodeMap" parameter
    if (blankNodeMap == null) throw new IllegalArgumentException("Null \"blankNodeMap\" parameter");

    // Initialize fields
    this.url          = url;
    this.blankNodeMap = blankNodeMap;

    // Configure the RDF/XML parser
    arp.setEmbedding(true);
    arp.setLaxErrorMode();
    arp.setErrorHandler(this);
    arp.setStatementHandler(this);
  }

  /**
   * @return the number of statements parsed so far
   */
  synchronized long getStatementCount() throws TuplesException
  {
    checkForException();
    return statementCount;
  }

  /**
   * @return the total number of statements in the file
   */
  synchronized long waitForStatementTotal() throws TuplesException {
    while (!complete) {
      checkForException();

      // Keep the LinkedList drained.
      queue.clear();
      notifyAll();

      try {
        wait();
      } catch (InterruptedException ex) {
        throw new TuplesException("Abort");
      }
    }
    checkForException();
    assert statementCountIsTotal;
    return statementCount;
  }

  /**
   * Returns true if getStatementCount() would return the total number
   * of statements in the file.
   */
  synchronized boolean isStatementCountTotal() throws TuplesException {
    checkForException();
    return statementCountIsTotal;
  }

  //
  // Method implementing Runnable
  //

  public void run() {
    Throwable t = null;

    // Parse the stream into RDF statements
    try {
      InputStream in = url.openStream();

      // Guess at transfer encoding (compression scheme) based on file extension
      if (url.getPath().endsWith(".gz")) {
        // The file name ends with ".gz", so assume it's a gzip'ed file
        in = new GZIPInputStream(in);
      } else if (url.getPath().endsWith(".zip")) {
        // The file name ends with ".zip", so assume it's a zip'ed file
        in = new ZipInputStream(in);

        ((ZipInputStream)in).getNextEntry();
      }

      arp.load(in, url.toString());
      if (logger.isDebugEnabled()) {
        logger.debug("Parsed RDF/XML");
      }
      return;
    } catch (Throwable th) {
      t = th;
    } finally {
      flushQueue(t);
    }

    if (logger.isDebugEnabled()) {
      logger.debug("Exception while parsing RDF/XML", exception);
    }
  }

  //
  // Methods implementing StatementHandler
  //

  public void statement(AResource subject,
                        AResource predicate,
                        ALiteral object) {
    if (logger.isDebugEnabled()) {
      logger.debug("Parsed " + subject + " " + predicate + " " + object + " from " + url);
    }

    Triple triple = new TripleImpl((SubjectNode)   toNode(subject),
                                   (PredicateNode) toNode(predicate),
                                   (ObjectNode)    toNode(object));

    addTriple(triple);
  }

  public void statement(AResource subject,
                        AResource predicate,
                        AResource object) {
    if (logger.isDebugEnabled()) {
      logger.debug("Parsed " + subject + " " + predicate + " " + object + " from " + url);
    }

    Triple triple = new TripleImpl((SubjectNode)   toNode(subject),
                                   (PredicateNode) toNode(predicate),
                                   (ObjectNode)    toNode(object));

    addTriple(triple);
  }

  //
  // Methods implementing ErrorHandler
  //

  /**
   * Recoverable error.
   *
   * @param e PARAMETER TO DO
   */
  public synchronized void error(SAXParseException e) {
    exception = e;
    logger.error("Error, " + e.getLineNumber() + ", column " +
                 e.getColumnNumber() + ": " + e.getMessage(), e);
  }

  /**
   * Non-recoverable error.
   *
   * @param e PARAMETER TO DO
   */
  public synchronized void fatalError(SAXParseException e) {
    exception = e;
    logger.error("Fatal error, line " + e.getLineNumber() + ", column " +
                 e.getColumnNumber() + ": " + e.getMessage(), e);
  }

  /**
   * Warning.
   *
   * @param e PARAMETER TO DO
   */
  public void warning(SAXParseException e) {
    logger.warn("Warning, line " + e.getLineNumber() + ", column " +
                e.getColumnNumber() + ": " + e.getMessage(), e);
  }

  //
  // Internal methods
  //

  /**
   * Create a JRDF {@link Literal} object from an ARP literal object.
   *
   * @param literal  the ARP literal
   * @return a JRDF literal
   */
  private Literal toNode(ALiteral literal) {
    URI type = null;
    if (literal.getDatatypeURI() != null) {
      try {
        type = new URI(literal.getDatatypeURI());
      } catch (URISyntaxException e) {
        throw new Error("ARP generated datatype for " + literal + " which isn't a URI", e);
      }
    }

    String lang = literal.getLang();
    if (type == null) {
      if (lang == null) lang = "";
    } else {
      lang = null;
    }

    if (type == null) {
      return new LiteralImpl(literal.toString(), lang);
    } else {
      return new LiteralImpl(literal.toString(), type);
    }
  }

  /**
   * Create a JRDF {@link Node} from an ARP resource object.
   *
   * @param resource  the ARP resource.
   * @return the JRDF {@link URIReference} or {@link BlankNode}
   */
  private Node toNode(AResource resource) {
    if (resource.isAnonymous()) {
      // Generate a blank node
      String anonymousID = resource.getAnonymousID();

      BlankNode blankNode = blankNodeMap.get(anonymousID);
      if (blankNode == null) blankNode = new BlankNodeImpl();
      assert blankNode != null;

      // Associate this new blank node with its anonymous ID and return it
      blankNodeMap.put(anonymousID, blankNode);
      return blankNode;
    } else {
      // Generate a URI reference
      try {
        return new URIReferenceImpl(new URI(resource.getURI().toString()));
      } catch (URISyntaxException e) {
        throw new Error("ARP generated a malformed URI", e);
      }
    }
  }

  /**
   * If an exception occurred in the parser, throws a TuplesException that
   * wraps the exception.
   */
  private void checkForException() throws TuplesException {
    if (exception != null) {
      queue.clear();
      headIndex = 0;
      headBuffer = null;
      throw new TuplesException("Exception while reading " + url, exception);
    }
  }

  /**
   * Returns a new triple from the queue or null if there are no more triples.
   */
  Triple getTriple() throws TuplesException {
    if (headBuffer == null || headIndex >= headBuffer.length) {
      // Get another buffer from the queue.
      headIndex = 0;
      headBuffer = null;
      headBuffer = getBufferFromQueue();
      if (headBuffer == null) {
        // No more triples.
        return null;
      }
      assert headBuffer.length > 0;
    }

    // Get a triple from the headBuffer.
    Triple triple = headBuffer[headIndex];
    headBuffer[headIndex++] = null;
    assert triple != null;
    return triple;
  }

  private synchronized Triple[] getBufferFromQueue() throws TuplesException {
    while (queue.isEmpty()) {
      checkForException();
      if (complete) {
        // No more buffers in the queue.
        return null;
      }

      // Wait for a buffer.
      try {
        wait();
      } catch (InterruptedException ex) {
        throw new TuplesException("Abort");
      }
    }
    checkForException();

    notifyAll();
    return (Triple[]) queue.removeFirst();
  }

  private void addTriple(Triple triple) {
    assert triple != null;
    if (tailBuffer == null) {
      tailBuffer = new Triple[BUFFER_SIZE];
      tailIndex = 0;
    }
    tailBuffer[tailIndex++] = triple;

    if (tailIndex >= tailBuffer.length) {
      // Add the buffer to the queue.
      addBufferToQueue(tailBuffer);
      tailBuffer = null;
      tailIndex = 0;
    }
  }

  private synchronized void flushQueue(Throwable t) {
    if (interrupted()) {
      if (t == null) {
        t = new InterruptedException();
      }
    }

    if (t != null) {
      exception = t;
      queue.clear();
    } else if (exception == null) {
      // End of file has been reached without error.
      if (tailBuffer != null) {
        // There is at least one triple in the tailBuffer.
        assert tailIndex > 0;
        Triple[] buf = new Triple[tailIndex];
        System.arraycopy(tailBuffer, 0, buf, 0, tailIndex);
        addBufferToQueue(buf);
      }
      statementCountIsTotal = true;
    } else {
      // An exception has already been reported.
      queue.clear();
    }
    tailBuffer = null;
    tailIndex = 0;
    complete = true;
    notifyAll();
  }

  private synchronized void addBufferToQueue(Triple[] buffer) {
    assert buffer != null;
    // Wait for the queue to drain a bit if it's too full
    while (queue.size() >= QUEUE_MAX_BUFFERS) {
      try {
        wait();
      } catch (InterruptedException ex) {
        throw new RuntimeException("Abort");
      }
    }
    queue.addLast(buffer);
    statementCount += buffer.length;
    notifyAll();
  }

  /**
   * Stops the thread.
   */
  synchronized void abort() {
    interrupt();

    // Clear the triples list and notify in case ARP uses an internal thread
    // which has become blocked on the list being MAX_TRIPLES in size.
    queue.clear();
    notifyAll();
  }

}