/*---------------- FILE HEADER ------------------------------------------
This file is part of deegree.
Copyright (C) 2001-2006 by:
EXSE, Department of Geography, University of Bonn
http://www.giub.uni-bonn.de/deegree/
lat/lon GmbH
http://www.lat-lon.de
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Contact:
Andreas Poth
lat/lon GmbH
Aennchenstr. 19
53115 Bonn
Germany
E-Mail: poth@lat-lon.de
Prof. Dr. Klaus Greve
Department of Geography
University of Bonn
Meckenheimer Allee 166
53115 Bonn
Germany
E-Mail: greve@giub.uni-bonn.de
---------------------------------------------------------------------------*/
package org.deegree.ogcwebservices.csw.manager;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.sql.SQLException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.StringRequestEntity;
import org.deegree.framework.log.ILogger;
import org.deegree.framework.log.LoggerFactory;
import org.deegree.framework.util.StringTools;
import org.deegree.framework.util.TimeTools;
import org.deegree.framework.xml.XMLException;
import org.deegree.framework.xml.XMLFragment;
import org.deegree.framework.xml.XMLParsingException;
import org.deegree.framework.xml.XMLTools;
import org.deegree.io.DBPoolException;
import org.deegree.ogcwebservices.OGCWebServiceException;
import org.deegree.ogcwebservices.csw.manager.HarvestRepository.Record;
import org.deegree.ogcwebservices.csw.manager.HarvestRepository.ResourceType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
/**
*
*
*
* @version $Revision: 1.18 $
* @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
* @author last edited by: $Author: poth $
*
* @version 1.0. $Revision: 1.18 $, $Date: 2006/07/12 14:46:17 $
*
* @since 2.0
*/
public class CatalogueHarvester extends AbstractHarvester {
private static final ILogger LOG = LoggerFactory.getLogger( CatalogueHarvester.class );
private static CatalogueHarvester ch = null;
private enum HarvestOperation {
insert, update, delete, nothing
};
/**
* singelton
*
* @return
*/
public static CatalogueHarvester getInstance() {
if ( ch == null ) {
ch = new CatalogueHarvester();
}
return ch;
}
@Override
public void run() {
LOG.logDebug( "starting harvest iteration for CatalogueHarvester." );
try {
HarvestRepository repository = HarvestRepository.getInstance();
List<URI> sources = repository.getSources();
for ( Iterator iter = sources.iterator(); iter.hasNext(); ) {
URI source = (URI) iter.next();
try {
// determine if source shall be harvested
if ( shallHarvest( source, ResourceType.catalogue ) ) {
// mark source as currently being harvested
inProgress.add( source );
HarvestProcessor processor = new HarvestProcessor( this, source );
processor.start();
}
} catch ( Exception e ) {
e.printStackTrace();
LOG.logError( Messages.format("CatalogueHarvester.exception1", source), e );
informResponseHandlers( source, e );
}
}
} catch ( Exception e ) {
LOG.logError( Messages.getString("CatalogueHarvester.exception2"), e );
}
}
/**
* inner class for processing asynchronous harvesting of a catalogue
*
* @version $Revision: 1.18 $
* @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
* @author last edited by: $Author: poth $
*
* @version 1.0. $Revision: 1.18 $, $Date: 2006/07/12 14:46:17 $
*
* @since 2.0
*/
protected class HarvestProcessor extends AbstractHarvestProcessor {
private Map<String, Record> records = new HashMap( 10000 );
HarvestProcessor( AbstractHarvester owner, URI source ) {
super( owner, source );
}
@Override
public void run() {
records.clear();
try {
HarvestRepository repository = HarvestRepository.getInstance();
int index = 0;
XMLFragment metaData = null;
Date harvestingTimestamp = repository.getNextHarvestingTimestamp( source );
do {
metaData = getNextMetadataRecord( source, index, "dataset" );
if ( metaData != null ) {
Record record = createOrGetRecord( source, metaData );
records.put( record.getFileIdentifier(), record );
String trans = null;
try {
HarvestOperation ho = getHarvestOperation( record, metaData );
if ( ho == HarvestOperation.insert ) {
trans = createInsertRequest( metaData );
} else if ( ho == HarvestOperation.update ) {
trans = createUpdateRequest( getID( metaData ),
getIdentifierXPath( metaData ),
metaData );
}
if ( ho != HarvestOperation.nothing ) {
performTransaction( trans );
repository.storeRecord( record );
} else {
LOG.logInfo( "nothing to Harvest" );
}
} catch ( Exception e ) {
LOG.logError( Messages.format("CatalogueHarvester.exception3", index,
source), e );
try {
e.printStackTrace();
owner.informResponseHandlers( source, e );
} catch ( Exception ee ) {
ee.printStackTrace();
}
records.remove( record.getFileIdentifier() );
}
}
index++;
if ( index % 1000 == 0 ) {
System.gc();
}
} while ( metaData != null );
// delete all records from the target catalogue and the
// from harvest cache
deleteRecordsNoHostedAnymore( source );
// update timestamps just if transaction has been performed
// successfully
writeLastHarvestingTimestamp( source, harvestingTimestamp );
writeNextHarvestingTimestamp( source, harvestingTimestamp );
informResponseHandlers( source );
if ( repository.getHarvestInterval( source ) <= 0 ) {
repository.dropRequest( source );
}
} catch ( Exception e ) {
LOG.logError( Messages.format("CatalogueHarvester.exception4", source ), e );
try {
e.printStackTrace();
owner.informResponseHandlers( source, e );
} catch ( Exception ee ) {
ee.printStackTrace();
}
} finally {
inProgress.remove( source );
}
}
/**
* returns the XPath the metadata records identifier
*
* @param metaData
* @return
*/
private String getIdentifierXPath( XMLFragment metaData ) {
// default is iso 19115
String xpath = "iso19115:fileIdentifier/smXML:CharacterString";
if ( metaData != null ) {
String nspace = metaData.getRootElement().getNamespaceURI();
nspace = StringTools.replace( nspace, "http://", "", true );
xpath = Messages.getString( "Identifier_" + nspace );
}
return xpath;
}
/**
* returns the XPath the metadata records dateStamp
*
* @param metaData
* @return
*/
private String getDateStampXPath( XMLFragment metaData ) {
String xpath = null;
if ( metaData != null ) {
String nspace = metaData.getRootElement().getNamespaceURI();
nspace = StringTools.replace( nspace, "http://", "", true );
xpath = Messages.getString( "dateStamp_" + nspace );
}
return xpath;
}
/**
* returns the identifier of a metadata record to enable its update and deletion
*
* @param metaData
* @return
* @throws XMLParsingException
*/
private String getID( XMLFragment metaData )
throws XMLParsingException {
String xpath = getIdentifierXPath( metaData );
String fileIdentifier = XMLTools.getRequiredNodeAsString( metaData.getRootElement(),
xpath, nsc );
return fileIdentifier;
}
@Override
protected StringBuffer createConstraint( String identifier, String xPath ) {
StringBuffer sb = new StringBuffer( 1000 );
String s = StringTools.concat( 200, "<csw:Constraint><ogc:Filter>",
"<ogc:PropertyIsEqualTo>", "<ogc:PropertyName>", xPath,
"</ogc:PropertyName>", "<ogc:Literal>", identifier,
"</ogc:Literal>", "</ogc:PropertyIsEqualTo>",
"</ogc:Filter></csw:Constraint>" );
sb.append( s );
return sb;
}
/**
* validates if a record stored in the harvester cache if not provided by the harvested
* catalogue any more; if so the record will be removed from the cache and the harvesting
* catalogue.
*
* @throws IOException
* @throws SQLException
* @throws DBPoolException
* @throws XMLParsingException
* @throws SAXException
* @throws OGCWebServiceException
*
*/
private void deleteRecordsNoHostedAnymore( URI source )
throws DBPoolException, SQLException, IOException,
OGCWebServiceException, SAXException {
HarvestRepository repository = HarvestRepository.getInstance();
List<String> cache = repository.getAllRecords( source );
System.out.println("----- cache ------");
System.out.println(cache);
System.out.println("----- records ------");
System.out.println(records);
int id = repository.getSourceID( source );
for ( int i = 0; i < cache.size(); i++ ) {
String fid = cache.get( i );
Record record = records.remove( fid );
if ( record == null ) {
repository.dropRecord( repository.new Record( id, null, fid, source ) );
String trans = createDeleteRequest( fid, null );
performTransaction( trans );
}
}
}
/**
* the method tries to read a record from the harvest repository. If the is not already
* stored in the repository a new record will be created
*
* @param metaData
* @return
* @throws XMLParsingException
* @throws IOException
* @throws SQLException
* @throws DBPoolException
*/
private Record createOrGetRecord( URI source, XMLFragment metaData )
throws XMLParsingException, IOException, DBPoolException,
SQLException {
String xpath = getIdentifierXPath( metaData );
String fileIdentifier = XMLTools.getRequiredNodeAsString( metaData.getRootElement(),
xpath, nsc );
HarvestRepository repository = HarvestRepository.getInstance();
Record record = repository.getRecordByID( source, fileIdentifier );
if ( record == null ) {
xpath = getDateStampXPath( metaData );
String s = XMLTools.getRequiredNodeAsString( metaData.getRootElement(), xpath, nsc );
Date date = TimeTools.createCalendar( s ).getTime();
record = repository.new Record( -1, date, fileIdentifier, source );
}
return record;
}
/**
* determines what operation shall be performed on a metadata record read from a remote
* catalogue
*
* @param metaData
* @return
* @throws IOException
* @throws SQLException
* @throws DBPoolException
* @throws XMLParsingException
*/
private HarvestOperation getHarvestOperation( Record record, XMLFragment metaData )
throws XMLParsingException {
HarvestOperation ho = HarvestOperation.nothing;
if ( record.getSourceId() < 0 ) {
ho = HarvestOperation.insert;
} else {
String xpath = getDateStampXPath( metaData );
String s = XMLTools.getRequiredNodeAsString( metaData.getRootElement(), xpath, nsc );
Date date = TimeTools.createCalendar( s ).getTime();
if ( !date.equals( record.getDatestamp() ) ) {
ho = HarvestOperation.update;
}
}
return ho;
}
/**
* read
*
* @param source
* @return
* @throws IOException
* @throws HttpException
* @throws SAXException
* @throws XMLException
* @throws XMLParsingException
*/
private XMLFragment getNextMetadataRecord( URI source, int index, String type )
throws IOException, XMLException, SAXException, XMLParsingException {
StringBuffer sb = new StringBuffer( 200 );
sb.append( "<csw:GetRecords xmlns:csw=\"http://www.opengis.net/cat/csw\" " );
sb.append( "service=\"CSW\" version=\"2.0.0\" resultType=\"RESULTS\" " );
sb.append( "outputFormat=\"text/xml\" outputSchema=\"csw:profile\" " );
sb.append( "startPosition='" ).append( index ).append( "' " );
sb.append( "maxRecords='1'><csw:Query typeNames='" );
sb.append( type ).append( "'>" );
sb.append( "<csw:ElementSetName>full</csw:ElementSetName>" );
sb.append( "</csw:Query></csw:GetRecords>" );
StringRequestEntity re = new StringRequestEntity( sb.toString() );
PostMethod post = new PostMethod( source.toASCIIString() );
post.setRequestEntity( re );
HttpClient client = new HttpClient();
client.executeMethod( post );
InputStream is = post.getResponseBodyAsStream();
XMLFragment xml = new XMLFragment();
xml.load( is, source.toURL().toExternalForm() );
String xpath = "csw:SearchResults/child::*[1]";
Node node = XMLTools.getNode( xml.getRootElement(), xpath, nsc );
if ( node != null ) {
xml.setRootElement( (Element) node );
} else {
xml = null;
}
return xml;
}
}
}
/* ********************************************************************
Changes to this class. What the people have been up to:
$Log: CatalogueHarvester.java,v $
Revision 1.18 2006/07/12 14:46:17 poth
comment footer added
********************************************************************** */