package org.bbaw.wsp.cms.collections; import java.io.File; import java.io.FileOutputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.Hashtable; import java.util.List; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.bbaw.wsp.cms.dochandler.DocumentHandler; import org.bbaw.wsp.cms.dochandler.parser.text.parser.EdocIndexMetadataFetcherTool; import org.bbaw.wsp.cms.document.MetadataRecord; import org.bbaw.wsp.cms.document.XQuery; import org.bbaw.wsp.cms.scheduler.CmsDocOperation; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import com.sun.org.apache.xml.internal.serialize.XMLSerializer; public class CollectionManager { private static Logger LOGGER = Logger.getLogger(CollectionManager.class.getName()); private CollectionReader collectionReader; // has the collection infos of the configuration files private int counter = 0; private static CollectionManager confManager; public static CollectionManager getInstance() throws ApplicationException { if(confManager == null) { confManager = new CollectionManager(); confManager.init(); } return confManager; } private void init() throws ApplicationException { collectionReader = CollectionReader.getInstance(); } /** * Update of all collections (if update parameter in configuration is true) */ public void updateCollections() throws ApplicationException { updateCollections(false); } /** * Update of all collections * @param forceUpdate if true then update is always done (also if update parameter in configuration is false) * @throws ApplicationException */ public void updateCollections(boolean forceUpdate) throws ApplicationException { ArrayList<Collection> collections = collectionReader.getCollections(); for (Collection collection : collections) { updateCollection(collection, forceUpdate); } } /** * Update of the collection with that collectionId (if update parameter in configuration is true) * @param collectionId * @throws ApplicationException */ public void updateCollection(String collectionId) throws ApplicationException { updateCollection(collectionId, false); } /** * Update of the collection with that collectionId * @param collectionId * @param forceUpdate if true then update is always done (also if update parameter in configuration is false) * @throws ApplicationException */ public void updateCollection(String collectionId, boolean forceUpdate) throws ApplicationException { Collection collection = collectionReader.getCollection(collectionId); updateCollection(collection, forceUpdate); } private void updateCollection(Collection collection, boolean forceUpdate) throws ApplicationException { boolean isUpdateNecessary = collection.isUpdateNecessary(); if (isUpdateNecessary || forceUpdate) { String[] collectionDataUrls = collection.getDataUrls(); String excludesStr = collection.getExcludesStr(); if (collectionDataUrls != null) { List<String> collectionDocumentUrls = new ArrayList<String>(); for (int i=0; i<collectionDataUrls.length; i++) { String url = collectionDataUrls[i]; if (url.endsWith("/")) { url = url.substring(0, url.length() - 1); List<String> collectionDocumentUrlsTemp = extractDocumentUrls(url, excludesStr); collectionDocumentUrls.addAll(collectionDocumentUrlsTemp); } else { collectionDocumentUrls.add(url); } } collection.setDocumentUrls(collectionDocumentUrls); } addDocuments(collection); String configFileName = collection.getConfigFileName(); File configFile = new File(configFileName); setUpdate(configFile, false); } } private void addDocuments(Collection collection) throws ApplicationException { DocumentHandler docHandler = new DocumentHandler(); ArrayList<MetadataRecord> mdRecords = getMetadataRecords(collection); for (int i=0; i<mdRecords.size(); i++) { MetadataRecord mdRecord = mdRecords.get(i); String docUrl = mdRecord.getUri(); String docId = mdRecord.getDocId(); String collectionId = mdRecord.getCollectionNames(); counter++; Date now = new Date(); LOGGER.info(counter + ". " + now.toString() + " Collection: " + collectionId + ": Create: " + docId); CmsDocOperation docOp = new CmsDocOperation("create", docUrl, null, docId); docOp.setMdRecord(mdRecord); ArrayList<String> fields = collection.getFields(); if (fields != null) { String[] fieldsArray = new String[fields.size()]; for (int j=0;j<fields.size();j++) { String f = fields.get(j); fieldsArray[j] = f; } docOp.setElementNames(fieldsArray); } String mainLanguage = collection.getMainLanguage(); docOp.setMainLanguage(mainLanguage); try { docHandler.doOperation(docOp); } catch (Exception e) { e.printStackTrace(); } } } private ArrayList<MetadataRecord> getMetadataRecords(Collection collection) throws ApplicationException { ArrayList<MetadataRecord> mdRecords = null; try { String collectionId = collection.getId(); String dataUrlPrefix = collection.getDataUrlPrefix(); String metadataUrlPrefix = collection.getMetadataUrlPrefix(); List<String> documentUrls = collection.getDocumentUrls(); String[] metadataUrls = collection.getMetadataUrls(); String metadataUrlType = collection.getMetadataUrlType(); // single or many Hashtable<String, XQuery> xQueries = collection.getxQueries(); if (metadataUrls != null) { if (metadataUrlType != null && metadataUrlType.equals("single")) { mdRecords = new ArrayList<MetadataRecord>(); for (int i=0; i<metadataUrls.length; i++) { String metadataUrl = metadataUrls[i]; MetadataRecord mdRecord = new MetadataRecord(); String uri = null; String docId = null; if (collectionId.equals("edoc")) { EdocIndexMetadataFetcherTool.fetchHtmlDirectly(metadataUrl, mdRecord); String httpEdocUrl = mdRecord.getRealDocUrl(); if (httpEdocUrl != null) { String docIdTmp = httpEdocUrl.replaceAll(metadataUrlPrefix, ""); docId = "/" + collectionId + docIdTmp; String fileEdocUrl = "file:" + dataUrlPrefix + docIdTmp; uri = fileEdocUrl; } else { LOGGER.severe("Fetching metadata failed for: " + metadataUrl + " (no url in index.html found)"); } } else { // TODO } if (docId != null && uri != null) { mdRecord.setDocId(docId); mdRecord.setUri(uri); mdRecord.setCollectionNames(collectionId); String mainLanguage = collection.getMainLanguage(); mdRecord.setLanguage(mainLanguage); mdRecord.setSchemaName(null); mdRecord.setxQueries(xQueries); mdRecords.add(mdRecord); } } } else { // TODO } } if (metadataUrls == null && documentUrls != null) { mdRecords = new ArrayList<MetadataRecord>(); for (int i=0; i<documentUrls.size(); i++) { MetadataRecord mdRecord = new MetadataRecord(); String docUrl = documentUrls.get(i); URL uri = new URL(docUrl); String uriPath = uri.getPath(); String prefix = collection.getDataUrlPrefix(); if (prefix == null) prefix = "/exist/rest/db"; if(uriPath.startsWith(prefix)){ uriPath = uriPath.substring(prefix.length()); } String docId = "/" + collectionId + uriPath; mdRecord.setDocId(docId); mdRecord.setUri(docUrl); mdRecord.setCollectionNames(collectionId); mdRecord.setxQueries(xQueries); mdRecords.add(mdRecord); } } } catch (MalformedURLException e) { throw new ApplicationException(e); } return mdRecords; } private void setUpdate(File configFile, boolean update) throws ApplicationException { try { // flag im Konfigurations-File auf false setzen durch Serialisierung in das File DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = docFactory.newDocumentBuilder(); Document configFileDocument = builder.parse(configFile); NodeList updateNodeList = configFileDocument.getElementsByTagName("update"); Node n = updateNodeList.item(0); if (update) n.setTextContent("true"); else n.setTextContent("false"); FileOutputStream os = new FileOutputStream(configFile); XMLSerializer ser = new XMLSerializer(os, null); ser.serialize(configFileDocument); // Vorsicht: wenn es auf true ist: es wird alles neu indexiert } catch (Exception e) { throw new ApplicationException(e); } } private List<String> extractDocumentUrls(String collectionDataUrl, String excludesStr) { List<String> documentUrls = null; if (! collectionDataUrl.equals("")){ PathExtractor extractor = new PathExtractor(); documentUrls = extractor.initExtractor(collectionDataUrl, excludesStr); } return documentUrls; } }