/* * Constellation - An open source and standard compliant SDI * http://www.constellation-sdi.org * * Copyright 2014 Geomatys. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.constellation.metadata.harvest; // J2SE dependencies import org.constellation.metadata.DistributedResults; import org.constellation.metadata.io.MetadataIoException; import org.constellation.metadata.io.MetadataWriter; import org.constellation.ws.CstlServiceException; import org.geotoolkit.csw.xml.GetRecordsRequest; import org.geotoolkit.csw.xml.v202.GetRecordByIdResponseType; import org.geotoolkit.ows.xml.v100.ExceptionReport; import org.w3c.dom.Node; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import static org.geotoolkit.ows.xml.OWSExceptionCode.NO_APPLICABLE_CODE; import static org.geotoolkit.ows.xml.OWSExceptionCode.OPERATION_NOT_SUPPORTED; // JAXB dependencies // Constellation dependencies // Geotoolkit dependencies /** * This catalogue harvester is a special tool used to harvest a CSW. * we must have a list of identifier stored in n file named id0, id1, ....idn * * Each file must have one identifier by line. * * @author Guilhem Legal */ public class ByIDHarvester extends CatalogueHarvester { /** * Default GET GetRecordById request to complete with an identifier. */ private static final String GET_RECORD_BY_ID_REQUEST = "?service=CSW&request=getRecordbyid&version=2.0.2&outputSchema=http://www.isotc211.org/2005/gmd&outputformat=text/xml&ELEMENTSETNAME=full&id="; /** * The path of the directory containing the files fill with identifiers. */ private String identifierDirectoryPath; /** * Build a new catalogue harvester using a list of identifiers stored in one or more Files. * * @param metadataWriter The Writer allowing to store the metadata in the datasource. * @param identifierDirectory a Path of the directory containing the identifier files. * * @throws MetadataIoException If the parameters identifierDirectory does not point to a valid and existing directory, * or if its {@code null}. */ public ByIDHarvester(MetadataWriter metadataWriter, String identifierDirectory) throws MetadataIoException { super(metadataWriter); if (identifierDirectory != null) { identifierDirectoryPath = identifierDirectory; final File f = new File(identifierDirectoryPath); if (f.exists() && f.isDirectory()) { LOGGER.log(Level.INFO, "Getting identifier file from :{0}", f.getPath()); } else { throw new MetadataIoException("The identifierDirectory does not exist or is not a directory:" + f.getPath()); } } else { throw new MetadataIoException("The identifierDirectory is null."); } } /** * Parse the identifier file named "id + currentPath" and return a List of string. * The list is constitued of each line of the file. * * @param currentFile an integer pointing to the current file to read. * @return A list of identifier correspoundong of each line of the identifier file. */ private List<String> parseIdentifierFile(int currentFile) { final List<String> result = new ArrayList<>(); try { final File f = new File(identifierDirectoryPath + "id" + currentFile); if (!f.exists()) { LOGGER.log(Level.WARNING, "the file " + identifierDirectoryPath + "id{0} does not exist", currentFile); return result; } final FileInputStream in = new FileInputStream(f); final InputStreamReader ipsr = new InputStreamReader(in); final BufferedReader br = new BufferedReader(ipsr); //we skip the character already read String line; while ((line = br.readLine()) != null) { result.add(line); } in.close(); ipsr.close(); br.close(); } catch (FileNotFoundException ex) { LOGGER.log(Level.SEVERE, null, ex); } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); } return result; } /** * Harvest another CSW service by getting all this records ans storing it into the database * * @param sourceURL The URL of the distant CSW service * * @return An array containing: the number of inserted records, the number of updated records and the number of deleted records. */ @Override public int[] harvestCatalogue(String sourceURL) throws MalformedURLException, IOException, CstlServiceException, SQLException { if (metadataWriter == null) { throw new CstlServiceException("The Service can not write into the database", OPERATION_NOT_SUPPORTED, "Harvest"); } //we initialize the getRecords request int nbRecordInserted = 0; final int nbRecordUpdated = 0; boolean succeed = false; //we prepare to store the distant serviceException and send it later if this is necessary final List<CstlServiceException> distantException = new ArrayList<>(); for (int i = 0; i < 100; i++) { final List<String> identifiers = parseIdentifierFile(i); //we make multiple request by pack of 20 record for (String identifier : identifiers) { LOGGER.log(Level.INFO, "current identifier:{0}", identifier); final String currentSourceURL = sourceURL + GET_RECORD_BY_ID_REQUEST + identifier; final Object harvested = sendRequest(currentSourceURL); // if the service respond correctly if (harvested instanceof GetRecordByIdResponseType) { succeed = true; LOGGER.log(Level.INFO, "Response of distant service received for: {0}", identifier); final GetRecordByIdResponseType serviceResponse = (GetRecordByIdResponseType) harvested; //we looking for any record type for (Object otherRecordObj: serviceResponse.getAny()) { if (!(otherRecordObj instanceof Node)){ throw new CstlServiceException("object has been unmarshalled."); } else { LOGGER.log(Level.FINER, "record Type: {0}", otherRecordObj.getClass().getSimpleName()); } final Node otherRecord = (Node)otherRecordObj; //Temporary ugly patch TODO handle update in CSW try { if (metadataWriter.storeMetadata(otherRecord)) { nbRecordInserted++; } else { LOGGER.log(Level.INFO, "The record:{0} has not been recorded", identifier); } } catch (IllegalArgumentException e) { LOGGER.log(Level.WARNING, "Illegal argument while storing the record:" + identifier, e); } catch (MetadataIoException ex) { throw new CstlServiceException(ex, NO_APPLICABLE_CODE); } } /* * We have receved an error */ } else if (harvested instanceof ExceptionReport) { final ExceptionReport ex = (ExceptionReport) harvested; final CstlServiceException exe = new CstlServiceException("The distant service has throw a webService exception: " + ex.getException().get(0), NO_APPLICABLE_CODE); LOGGER.log(Level.WARNING, "The distant service has throw a webService exception: \n{0}", exe.toString()); distantException.add(exe); // if we obtain an object that we don't expect } else if (harvested == null) { final CstlServiceException exe = new CstlServiceException("The distant service does not respond correctly.", NO_APPLICABLE_CODE); LOGGER.severe("The distant service does not respond correctly"); distantException.add(exe); } else { throw new CstlServiceException("The distant service does not respond correctly: unexpected response type: " + harvested.getClass().getSimpleName(), NO_APPLICABLE_CODE); } } if (!succeed && distantException.size() > 0) { throw distantException.get(0); } LOGGER.info("file id" + i + " done. total inserted:" + nbRecordInserted + " total updated:" + nbRecordUpdated); } final int[] result = new int [3]; result[0] = nbRecordInserted; result[1] = nbRecordUpdated; result[2] = 0; return result; } /** * Send a request to another CSW service. * * @param sourceURL the url of the distant web-service * @param request The XML object to send in POST mode (if null the request is GET) * * @return The object correspounding to the XML response of the distant web-service * * @throws java.net.MalformedURLException * @throws java.io.IOException * @throws org.constellation.coverage.web.CstlServiceException */ private Object sendRequest(String sourceURL) throws MalformedURLException, CstlServiceException, IOException { final URL source = new URL(sourceURL); final URLConnection conec = source.openConnection(); Object harvested = null; try { // we get the response document final InputStream in = conec.getInputStream(); try { final Unmarshaller unmarshaller = marshallerPool.acquireUnmarshaller(); harvested = unmarshaller.unmarshal(in); marshallerPool.recycle(unmarshaller); if (harvested instanceof JAXBElement) { harvested = ((JAXBElement) harvested).getValue(); } in.close(); } catch (JAXBException | IllegalAccessError ex) { LOGGER.log(Level.WARNING, "The distant service does not respond correctly: unable to unmarshall response document.\ncause: {0}", ex.getMessage()); } } catch (IOException ex) { LOGGER.log(Level.WARNING, "The Distant service have made an error", ex); return null; } return harvested; } /** * Transfer The request to all the servers specified in distributedServers. * * @return */ @Override public DistributedResults transferGetRecordsRequest(GetRecordsRequest request, List<String> distributedServers, int startPosition, int maxRecords) { throw new UnsupportedOperationException("IGN Harvester only support harvesting"); } @Override protected InputStream getSingleMetadata(String sourceURL) throws CstlServiceException { try { final URL source = new URL(sourceURL); final URLConnection conec = source.openConnection(); return conec.getInputStream(); } catch (IOException ex) { throw new CstlServiceException(ex); } } }