/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.extractor.tag;
//Mulgara utilities
// Java 2 enterprise packages
import java.io.*;
import java.net.*;
import javax.servlet.jsp.*;
import javax.servlet.jsp.tagext.*;
// Third party packages
import org.apache.log4j.*;
// Apache Soap packages
import org.apache.soap.*;
import org.apache.soap.rpc.*;
import org.mulgara.util.*;
/**
* Retrieves metadata for a given document.
*
* @created 2001-10-17
*
* @author Tom Adams
*
* @version $Revision: 1.9 $
*
* @modified $Date: 2005/01/05 04:59:08 $ by $Author: newmana $
*
* @company <a href="mailto:info@PIsoftware.com">Plugged In Software</a>
*
* @copyright ©2001 <a href="http://www.pisoftware.com/">Plugged In
* Software Pty Ltd</a>
*
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
public class MetadataTag extends TagSupport {
private static final boolean DEBUG = false;
/** Serialization ID */
private static final long serialVersionUID = -2907904987343759108L;
/**
* the name of the id attribute
*/
protected final static String ATTR_ID = "id";
/**
* the name of the model attribute
*/
protected final static String ATTR_MODEL = "model";
/**
* the name of the server attribute
*/
protected final static String ATTR_SERVER = "server";
/**
* the name of the document attribute
*/
protected final static String ATTR_DOCUMENT = "document";
//
// Constants
//
/**
* the category to log to
*/
private final static Logger log =
Logger.getLogger(MetadataTag.class.getName());
/**
* the key to retrieve the document queries file
*/
private final static String DOCUMENT_QUERIES_FILE = "mulgara.document.queries";
/**
* the marker of a model URI for replacement
*/
private final static String MODEL_MARKER = "@@model@@";
/**
* the marker of a document URI for replacement
*/
private final static String DOCUMENT_MARKER = "@@document@@";
/**
* the system property that overrides the the query file location
*/
private final static String MULGARAV_QUERY = "MULGARAVQUERY";
//
// Document properties
//
/**
* the title property
*/
@SuppressWarnings("unused")
private final static String TITLE_PROPERTY =
"http://mulgara.org/mulgara/Document#title";
/**
* the HTML title property
*/
private final static String HTML_TITLE_PROPERTY =
"http://mulgara.org/mulgara/tool/HtmlExtractor#title";
/**
* the has HTML property
*/
private final static String HAS_HTML_PROPERTY =
"http://mulgara.org/mulgara/Document#hasHtml";
/**
* the date property
*/
@SuppressWarnings("unused")
private final static String DATE_PROPERTY =
"http://mulgara.org/mulgara/Document#containsDate";
/**
* the processed property
*/
@SuppressWarnings("unused")
private final static String PROCESSED_PROPERTY =
"http://mulgara.org/mulgara/Document#processed";
/**
* the company property
*/
@SuppressWarnings("unused")
private final static String COMPANY_PROPERTY =
"http://mulgara.org/mulgara/Document#hasCompanyName";
/**
* the person property
*/
@SuppressWarnings("unused")
private final static String PERSON_PROPERTY =
"http://mulgara.org/mulgara/Document#hasPersonalName";
/**
* the key phrase property
*/
@SuppressWarnings("unused")
private final static String KEYPHRASE_PROPERTY =
"http://mulgara.org/mulgara/Document#hasKeyPhrase";
/**
* the highlight property
*/
@SuppressWarnings("unused")
private final static String HIGHLIGHT_PROPERTY =
"http://mulgara.org/mulgara/Document#hasHighlight";
/**
* the link property
*/
@SuppressWarnings("unused")
private final static String LINK_PROPERTY = "";
/**
* the generic feature property
*/
@SuppressWarnings("unused")
private final static String GENERIC_FEATURE_PROPERTY =
"http://mulgara.org/mulgara/Document#hasGenericFeature";
/**
* the geographic feature property
*/
@SuppressWarnings("unused")
private final static String GEO_FEATURE_PROPERTY =
"http://mulgara.org/mulgara/Document#hasGeographicFeature";
/**
* the image property
*/
@SuppressWarnings("unused")
private final static String IMAGE_PROPERTY = "";
/**
* the email property
*/
@SuppressWarnings("unused")
private final static String EMAIL_PROPERTY =
"http://mulgara.org/mulgara/Document#hasEmailAddress";
/**
* the case property
*/
@SuppressWarnings("unused")
private final static String CASE_PROPERTY =
"http://mulgara.org/mulgara/Document#hasCaseName";
//
// Members
//
/**
* the model to issue queries against
*/
private URI model = null;
/**
* the Mulgara server containing metadata we're interested in, overrides the value
* set using the {@link InitTag} tag
*/
private URL server = null;
/**
* the URL of the document to retrieve metadata for
*/
private URL document = null;
// release()
//
// Public API
//
/**
* Sets the model to issue queries against. <p>
*
* Note. This method will be called if this tag is invoked with the <code>model</code>
* attribute set, overriding the model URI set using the <code>init</code>
* tag. </p>
*
* @param model the model to issue queries against
* @throws JspTagException if <code>model</code> violates
* <a href="http://www.isi.edu/in-notes/rfc2396.txt">RFC?2396</a>
*/
public void setModel(String model) throws JspTagException {
try {
this.model = new URI(model);
}
catch (URISyntaxException use) {
// log the error
log.error("Invalid model URI in metadata tag");
// wrap it and re-throw!
throw new JspTagException("metadata: Invalid URI specified as model " +
"attribute value");
}
// try-catch
}
// getModel()
/**
* Sets the URL of the SOAP endpoint of the Mulgara server containing metadata
* we're interested in. <p>
*
* Note. This method will be called if this tag is invoked with the <code>server</code>
* attribute set, overriding the server name set using the <code>init</code>
* tag. </p>
*
* @param server the URL of the SOAP endpoint of the Mulgara server containing
* metadata we're interested in
* @throws JspTagException if <code>server</code> specified is not a valid URL
*/
public void setServer(String server) throws JspTagException {
try {
this.server = new URL(server);
}
catch (MalformedURLException mue) {
// log the error
log.error("Invalid SOAP endpoint URL in metadata tag");
// wrap it and re-throw!
throw new JspTagException("metadata: Invalid URL specified as server " +
"attribute value");
}
// try-catch
}
// getServer()
/**
* Sets the URL of the document to retrieve metadata for.
*
* @param document the URL of the document to retrieve metadata for
* @throws JspTagException if the <code>document</code> specified is not a
* valid URL
*/
public void setDocument(String document) throws JspTagException {
try {
this.document = new URL(document);
}
catch (MalformedURLException mue) {
// log the error
log.error("Invalid document URL in metadata tag");
// wrap it and re-throw!
throw new JspTagException("metadata: Invalid URL specified as document " +
"attribute value");
}
// try-catch
}
// setModel()
/**
* Returns the model to issue queries against. <p>
*
* Note. This method may return null if this tag has not been invoked with the
* <code>model</code> attribute set. If this is the case, the model URI may be
* obtained by retrieving the value of the attribute
* "tmex.server.model". </p>
*
* @return the model to issue queries against, or <code>null</code> if the
* server hasn't been set
*/
public String getModel() {
if (this.model == null) {
return null;
}
else {
return this.model.toString();
}
// end if
}
// setServer()
/**
* Returns the URL of the SOAP endpoint of the Mulgara server containing metadata
* we're interested in. <p>
*
* Note. This method may return null if this tag has not been invoked with the
* <code>server</code> attribute set. If this is the case, the server URL may
* be obtained by retrieving the value of the attribute
* "tmex.server.soapendpoint". </p>
*
* @return the URL of the SOAP endpoint of the Mulgara server containing metadata
* we're interested in, or <code>null</code> if the server hasn't been
* set
*/
public String getServer() {
if (this.server == null) {
return null;
}
else {
return this.server.toString();
}
// end if
}
// setServer()
/**
* Returns the URL of the document to retrieve metadata for.
*
* @return the URL of the document to retrieve metadata for, or <code>null</code>
* if the document hasn't been set
*/
public String getDocument() {
if (this.document == null) {
return null;
}
else {
return this.document.toString();
}
// end if
}
//
// Methods overriding TagSupport
//
/**
* Retrieves metadata for a given document.
*
* @return a response code informing the servlet container how to proceed with
* JSP tag execution
* @throws JspTagException
*/
public int doStartTag() throws JspTagException {
try {
// get the SOAP endpoint
URL soapEndpoint = this.getSoapEndpoint();
if (soapEndpoint == null) {
throw new JspTagException("metadata:If invoked without \"server\" " +
"attribute, metadata tag must be preceeded by init tag");
}
// end if
// log that we've found the SOAP endpoint
log.debug("Found Mulgara server SOAP enpoint - " + soapEndpoint);
// retrieve the model we'll be looking for documents in
URI tmexModel = this.getTmexModel();
if (tmexModel == null) {
throw new JspTagException("metadata: If invoked without \"model\" " +
"attribute, metadata tag must be preceeded by init tag");
}
// end if
// log what model we're looking for documents in
log.debug("Retrieving document metadata from " + tmexModel);
// get the document to retrieve metadata for
String documentURL = this.getDocument();
if (documentURL == null) {
throw new JspTagException("metadata: Document URL cannot be null");
}
// end if
// log what document we're retrieving metadata for
log.debug("Retrieving metadata for " + document);
// ask the Mulgara server for the metadata of the document
Response metadataResponse =
this.retrieveDocumentMetadata(soapEndpoint, tmexModel,
new URL(documentURL));
// save the response if an id was set, otherwise output it
if (this.getId() != null) {
// set a page context variable containing the document metadata
pageContext.setAttribute(this.getId(), metadataResponse);
}
else {
pageContext.getOut().println(TagSoapClient.getContent(metadataResponse));
}
// end if
}
catch (MalformedURLException mue) {
// log the error
log.error("Invalid SOAP endpoint or document URL in metadata tag");
// wrap it and re-throw!
throw new JspTagException("metadata: Invalid URL specified as server " +
"or document attribute value");
}
catch (URISyntaxException use) {
// log the error
log.error("Invalid model URI in metadata tag");
// wrap it and re-throw!
throw new JspTagException("metadata: Invalid URI specified as model " +
"attribute value");
}
catch (SOAPException se) {
// log the error
log.error("Unable to package document metadata query into a SOAP message",
se);
// wrap it and re-throw!
throw new JspTagException("metadata: Unable to send documents metadata " +
"query as a SOAP message");
}
catch (IOException ioe) {
// log the error
log.error("Error writing SOAP response to page context output stream", ioe);
// wrap it and re-throw!
throw new JspTagException("metadata: Unable to print document metadata " +
"to JSP output stream");
}
// try-catch
// skip the body of the tag (which should be empty anyway...)
return SKIP_BODY;
}
// doStartTag()
/**
* Resets the tag to its default state.
*/
public void release() {
this.id = null;
this.model = null;
this.server = null;
this.document = null;
}
// retrieveDocumentMetadata()
/**
* Returns the URL of the SOAP endpoint of the Mulgara server containing metadata
* we're interested in. <p>
*
* This tag looks first for an attribute defined on this tag, and then in the
* <code>pageContext</code> for an attribute set using the <code>init</code>
* tag. </p>
*
* @return the URL of the SOAP endpoint of the Mulgara server containing metadata
* we're interested in, or null if the model URI has not been defined as
* an attribute to this tag or using the <code>init</code> tag
* @throws MalformedURLException if the SOAP endpoint is not a valid URL
*/
private URL getSoapEndpoint() throws MalformedURLException {
URL soapEndpoint = null;
// look for the endpoint as an attribute first, then in the page context
if (this.getServer() != null) {
// override the default server with the URL specified as an attribute
soapEndpoint = new URL(this.getServer());
}
else {
// get the endpoint set in the init tag (we don't know the scope...)
soapEndpoint = (URL) this.pageContext.findAttribute(InitTag.KEY_SERVER);
}
// end if
// return the endpoint
return soapEndpoint;
}
// getSoapEndpoint()
/**
* Returns the model containing the documents processed by TMex. <p>
*
* This tag looks first for an attribute defined on this tag, and then in the
* <code>pageContext</code> for an attribute set using the <code>init</code>
* tag. </p>
*
* @return the model containing the documents processed by TMex, or null if
* the model URI has not been defined as an attribute to this tag or
* using the <code>init</code> tag
* @throws URISyntaxException if the model URI violates <a
* href="http://www.isi.edu/in-notes/rfc2396.txt">RFC?2396</a>
*/
private URI getTmexModel() throws URISyntaxException {
URI model = null;
// look for the model as an attribute first, then in the page context
if (this.getModel() != null) {
// override the default server with the URL specified as an attribute
model = new URI(this.getModel());
}
else {
// get the model set in the init tag (we don't know the scope...)
model = (URI) this.pageContext.findAttribute(InitTag.KEY_MODEL);
}
// end if
// return the endpoint
return model;
}
// getTmexModel()
/**
* Returns a new-line terminated query to retreive the value of the HTML title
* property.
*
* @param model the Mulgara model
* @param document the URL of the document to find the title of
* @return a query to retrieve the
*/
@SuppressWarnings("unused")
private String getTitleQuery(URI model, URL document) {
return "select $docNode $predicate $classNode $propertyLabel $value " +
"from <" + model.toString() + "> " + "where ( <" + document.toString() +
"> <" + HAS_HTML_PROPERTY + "> $docNode) " + "and ( $docNode <" +
HTML_TITLE_PROPERTY + "> $value ) " +
"and ( $predicate <http://www.w3.org/2000/01/rdf-schema#label> $propertyLabel ) " +
"and ( $docNode $predicate $value ) ;";
}
// getTitleQuery()
/**
* Retrieves the document properties queries from the query file.
*
* @param model the Mulgara model
* @param document the URL of the document to find the title of
* @return the queries to retrieve document properties
*/
private String getQueries(URI model, URL document) {
// intialise the query buffer
StringBuffer queries = new StringBuffer();
try {
// get the location of the queries file
String virtualLocation =
this.pageContext.getServletContext().getInitParameter(DOCUMENT_QUERIES_FILE);
virtualLocation = System.getProperty(MULGARAV_QUERY, virtualLocation);
if (virtualLocation == null) {
throw new IOException("Unable to locate queries file path for key " +
DOCUMENT_QUERIES_FILE);
}
// end if
// log that we found the queries file
if (log.isDebugEnabled()) {
log.debug("Found queries file location " + virtualLocation);
}
// end if
// get the real location
String queriesFileLocation =
this.pageContext.getServletContext().getRealPath(virtualLocation);
if (queriesFileLocation == null) {
throw new IOException("Unable to retrieve real filename for virtual " +
"path " + virtualLocation);
}
// end if
// log that we found the queries file
if (log.isDebugEnabled()) {
log.debug("Found queries file at " + queriesFileLocation);
}
// end if
// create a buffered reader to read the contents of the file
BufferedReader in =
new BufferedReader(new FileReader(queriesFileLocation));
try {
// read in the contents of the file into the queries buffer
String line = in.readLine();
while (line != null) {
// log that we've read the line
if (log.isDebugEnabled()) {
log.debug("Read line: " + line);
}
// trim the line
line = line.trim();
// ignore comments and blank lines
if (!line.startsWith("#") && !line.equals("")) {
// replace any markers in the text with the real values
line = line.replaceAll(MODEL_MARKER, model.toString());
line = line.replaceAll(DOCUMENT_MARKER, document.toString());
// log that we've replaced the markers in the line
log.debug("Replaced markers in line: " + line);
// save the line
queries.append(line);
}
// end if
// read the next one...
line = in.readLine();
}
} finally {
in.close();
}
// end while
}
catch (IOException ioe) {
// log the error
log.error("Error reading queries file", ioe);
}
// try-catch
// return the queries
return queries.toString();
}
// getDocument()
//
// Internal methods
//
/**
* Issues a query to a Mulgara server asking for metadata on a given document.
*
* @param soapEndpoint the SOAP endpoint
* @param model the Mulgara model
* @param document the URL of the model to find metadata for
* @return RETURNED VALUE TO DO
* @throws SOAPException if an error occurs while sending information to, or
* retrieving information from the SOAP endpoint
* @throws URISyntaxException if a document property URI is not a valid URI
*/
private Response retrieveDocumentMetadata(URL soapEndpoint, URI model,
URL document) throws SOAPException, URISyntaxException {
// construct the query
StringBuffer metadataQuery = new StringBuffer();
if (DEBUG) {
metadataQuery.append(
"select $docNode $predicate $predicateLabel $classLabel $value $score $generator " +
"from <" + model.toString() + "> " + "where ( <" + document.toString() +
"> $predicate $docNode) " +
"and (( $predicate <http://www.w3.org/2000/01/rdf-schema#label> $predicateLabel ) " +
"and ( $z <http://www.w3.org/2000/01/rdf-schema#label> $classLabel ) " +
"and ( $docNode $z $value )) " +
"or (( $id <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> $docNode ) " +
"and ( $id <http://mulgara.org/mulgara/Document#score> $score ) " +
"and ( $id <http://mulgara.org/mulgara/Document#generator> $generator )) ;");
}
else {
// append the queries
metadataQuery.append(this.getQueries(model, document));
/*
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(DATE_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(PROCESSED_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(COMPANY_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(PERSON_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(KEYPHRASE_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(HIGHLIGHT_PROPERTY)));
//metadataQuery.append(this.getPropertyQuery(
// model, document, new URI(LINK_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(GENERIC_FEATURE_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(GEO_FEATURE_PROPERTY)));
//metadataQuery.append(this.getPropertyQuery(
// model, document, new URI(IMAGE_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(EMAIL_PROPERTY)));
metadataQuery.append(this.getPropertyQuery(
model, document, new URI(CASE_PROPERTY)));
*/
}
// end if
// log the query we're sending
if (log.isDebugEnabled()) {
log.debug("Sending query: " + metadataQuery.toString());
log.debug("Sending queries to " + soapEndpoint);
}
return TagSoapClient.invoke(soapEndpoint, metadataQuery.toString());
}
// getQueries()
}
// MetadataTag