/* =============================================================================== * * Part of the InfoGlue Content Management Platform (www.infoglue.org) * * =============================================================================== * * Copyright (C) * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2, as published by the * Free Software Foundation. See the file LICENSE.html for more information. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY, including the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc. / 59 Temple * Place, Suite 330 / Boston, MA 02111-1307 / USA. * * =============================================================================== */ package org.infoglue.cms.controllers.kernel.impl.simple; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.StringReader; import java.io.Writer; import java.nio.channels.OverlappingFileLockException; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.util.Version; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.exolab.castor.jdo.Database; import org.exolab.castor.jdo.OQLQuery; import org.exolab.castor.jdo.QueryResults; import org.infoglue.cms.applications.databeans.ProcessBean; import org.infoglue.cms.entities.content.Content; import org.infoglue.cms.entities.content.ContentCategory; import org.infoglue.cms.entities.content.ContentVO; import org.infoglue.cms.entities.content.ContentVersion; import org.infoglue.cms.entities.content.ContentVersionVO; import org.infoglue.cms.entities.content.DigitalAsset; import org.infoglue.cms.entities.content.DigitalAssetVO; import org.infoglue.cms.entities.content.SmallestContentVersionVO; import org.infoglue.cms.entities.content.impl.simple.ContentImpl; import org.infoglue.cms.entities.content.impl.simple.ContentVersionImpl; import org.infoglue.cms.entities.content.impl.simple.DigitalAssetImpl; import org.infoglue.cms.entities.content.impl.simple.MediumDigitalAssetImpl; import org.infoglue.cms.entities.content.impl.simple.SmallestContentVersionImpl; import org.infoglue.cms.entities.kernel.BaseEntityVO; import org.infoglue.cms.entities.management.CategoryAttribute; import org.infoglue.cms.entities.management.ContentTypeDefinitionVO; import org.infoglue.cms.entities.management.LanguageVO; import org.infoglue.cms.entities.structure.SiteNode; import org.infoglue.cms.entities.structure.SiteNodeVO; import org.infoglue.cms.entities.structure.SiteNodeVersion; import org.infoglue.cms.entities.structure.SiteNodeVersionVO; import org.infoglue.cms.entities.structure.impl.simple.PureSiteNodeImpl; import org.infoglue.cms.entities.structure.impl.simple.SiteNodeImpl; import org.infoglue.cms.entities.structure.impl.simple.SiteNodeVersionImpl; import org.infoglue.cms.entities.structure.impl.simple.SmallSiteNodeImpl; import org.infoglue.cms.exception.SystemException; import org.infoglue.cms.util.CmsPropertyHandler; import org.infoglue.cms.util.NotificationListener; import org.infoglue.cms.util.NotificationMessage; import org.infoglue.deliver.util.CacheController; import org.infoglue.deliver.util.RequestAnalyser; import org.infoglue.deliver.util.Timer; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; public class LuceneController extends BaseController implements NotificationListener { private static Directory directory = null; private static IndexWriter writer = null; private static IndexReader indexReader = null; private static int reopened = 0; private final static Logger logger = Logger.getLogger(LuceneController.class.getName()); private static int indexedDocumentsSinceLastOptimize = 0; private Integer lastCommitedContentVersionId = -1; private static Integer numberOfVersionToIndexInBatch = 1000; private static AtomicBoolean indexingInitialized = new AtomicBoolean(false); private static AtomicBoolean stopIndexing = new AtomicBoolean(false); private static AtomicBoolean deleteIndexOnStop = new AtomicBoolean(false); public static void setNumberOfVersionToIndexInBatch(Integer numberOfVersionToIndexInBatch) { numberOfVersionToIndexInBatch = numberOfVersionToIndexInBatch; } public static void stopIndexing() { stopIndexing.set(true); } /** * Default Constructor */ public static LuceneController getController() { return new LuceneController(); } private static List<NotificationMessage> qeuedMessages = new ArrayList<NotificationMessage>(); private static List<NotificationMessage> maturedQeuedMessages = new ArrayList<NotificationMessage>(); private StandardAnalyzer getStandardAnalyzer() throws Exception { return new StandardAnalyzer(Version.LUCENE_34); } private Directory getDirectory() throws Exception { if(LuceneController.directory != null) return directory; String index = CmsPropertyHandler.getContextDiskPath() + File.separator + "lucene" + File.separator + "index"; index = index.replaceAll("//", "/"); //System.out.println("index:" + index); File INDEX_DIR = new File(index); directory = new NIOFSDirectory(INDEX_DIR); directory.setLockFactory(new SingleInstanceLockFactory()); boolean indexExists = IndexReader.indexExists(directory); if(!indexExists) { createIndex(directory); } return directory; } private void createIndex(Directory directory) throws Exception { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, getStandardAnalyzer()); IndexWriter indexWriter = new IndexWriter(directory, config); indexWriter.deleteDocuments(new Term("initializer", "true")); indexWriter.close(true); } private IndexWriter getIndexWriter() throws Exception { //Singleton returns if(writer != null) return writer; Timer t = new Timer(); Directory directory = getDirectory(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, analyzer); if(getIsIndexedLocked(true)) { logger.warn("Directory is locked - leaving the messages in the qeuedMessages list..."); throw new Exception("Lock not granted"); } else { writer = new IndexWriter(directory, config); return writer; } } private IndexReader getIndexReader() throws Exception { if(indexReader == null) { indexReader = IndexReader.open(getDirectory(), true); } synchronized (indexReader) { if(!indexReader.isCurrent()) { reopened++; indexReader.close(); indexReader = IndexReader.open(getDirectory(), true); //indexReader = IndexReader.openIfChanged(indexReader, true); logger.info("reopened:" + reopened); } } return indexReader; } private IndexSearcher getIndexSearcher() throws Exception { return new IndexSearcher(getIndexReader()); } private Boolean getIsIndexedLocked() throws Exception { return getIsIndexedLocked(false); } private Boolean getIsIndexedLocked(boolean returnIfFileLockException) throws Exception { Directory directory = getDirectory(); try { return IndexWriter.isLocked(directory); } catch (OverlappingFileLockException e) { return returnIfFileLockException; } } private void unlockIndex() throws Exception { Directory directory = getDirectory(); IndexWriter.unlock(directory); } public Map<String,Object> getIndexInformation() throws Exception { Map<String,Object> info = new HashMap<String,Object>(); try { Directory directory = getDirectory(); IndexReader reader = getIndexReader(); int maxDoc = reader.maxDoc(); int numDoc = reader.numDocs(); long lastModified = getIndexReader().lastModified(directory); info.put("maxDoc", new Integer(maxDoc)); info.put("numDoc", new Integer(numDoc)); info.put("lastModified", new Date(lastModified)); info.put("lastCommitedContentVersionId", getLastCommitedContentVersionId()); List<LanguageVO> languageVOList = LanguageController.getController().getLanguageVOList(); Iterator<LanguageVO> languageVOListIterator = languageVOList.iterator(); outer:while(languageVOListIterator.hasNext()) { LanguageVO languageVO = (LanguageVO)languageVOListIterator.next(); info.put("indexAllLastCommittedContentVersionId_" + languageVO.getId(), getIndexAllLastCommittedContentVersionId(languageVO.getId())); info.put("indexAllLastCommittedMetaContentVersionId_" + languageVO.getId(), getIndexAllLastCommittedMetaContentVersionId(languageVO.getId())); } //reader.close(); //directory.close(); } catch (Exception e) { logger.error("Error creating index:" + e.getMessage(), e); throw e; } return info; } public Integer getIndexAllLastCommittedContentVersionId(Integer languageId) throws Exception { Integer indexAllLastCommittedContentVersionId = null; try { Document indexAllDocumentMetaData = getIndexAllStatusDocument(); if(indexAllDocumentMetaData != null && indexAllDocumentMetaData.get("lastCommitedContentVersionId_" + languageId) != null && !indexAllDocumentMetaData.get("lastCommitedContentVersionId_" + languageId).equals("null")) indexAllLastCommittedContentVersionId = new Integer(indexAllDocumentMetaData.get("lastCommitedContentVersionId_" + languageId)); } catch (Exception e) { logger.error("Error creating index:" + e.getMessage(), e); throw e; } return indexAllLastCommittedContentVersionId; } public Integer getIndexAllLastCommittedMetaContentVersionId(Integer languageId) throws Exception { Integer indexAllLastCommittedSiteNodeVersionId = null; try { Document indexAllDocumentMetaData = getIndexAllStatusDocument(); if(indexAllDocumentMetaData != null && indexAllDocumentMetaData.get("lastCommitedMetaContentVersionId_" + languageId) != null && !indexAllDocumentMetaData.get("lastCommitedMetaContentVersionId_" + languageId).equals("null")) indexAllLastCommittedSiteNodeVersionId = new Integer(indexAllDocumentMetaData.get("lastCommitedMetaContentVersionId_" + languageId)); } catch (Exception e) { logger.error("Error creating index:" + e.getMessage(), e); throw e; } return indexAllLastCommittedSiteNodeVersionId; } public Document createStatusDocument(Integer lastCommitedContentVersionId) throws Exception { Document doc = new Document(); doc.add(new Field("lastCommitedContentVersionId", "" + lastCommitedContentVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastCommitedModifiedDate", "" + new Date().getTime(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("meta", new StringReader("lastCommitedContentVersionId"))); return doc; } public Document getStatusDocument() throws Exception { List<Document> docs = queryDocuments("meta", "lastCommitedContentVersionId", 5); logger.info(docs.size() + " total matching documents for 'lastCommitedContentVersionId'"); return (docs != null && docs.size() > 0 ? docs.get(0) : null); } public Document getIndexAllStatusDocument() throws Exception { List<Document> docs = queryDocuments(new Term("meta", "indexAllRunning"), 5); logger.info(docs.size() + " total matching documents for 'indexAllRunning'"); return (docs != null && docs.size() > 0 ? docs.get(0) : null); } public Integer getLastCommitedContentVersionId() throws Exception { Integer lastCommitedContentVersionId = -1; Document doc = getStatusDocument(); logger.info("STATUS doc:" + doc); if(doc != null) { String lastCommitedContentVersionIdString = doc.get("lastCommitedContentVersionId"); logger.info("doc:" + doc); logger.info("lastCommitedContentVersionId:" + lastCommitedContentVersionIdString); lastCommitedContentVersionId = Integer.parseInt(lastCommitedContentVersionIdString); } return lastCommitedContentVersionId; } private void setLastCommitedContentVersionId(IndexWriter writer, Integer lastCommitedContentVersionId) throws Exception { Integer prevLastCommitedContentVersionId = getLastCommitedContentVersionId(); logger.info("prevLastCommitedContentVersionId:" + prevLastCommitedContentVersionId); logger.info("lastCommitedContentVersionId:" + lastCommitedContentVersionId); if(lastCommitedContentVersionId == -1 || prevLastCommitedContentVersionId > lastCommitedContentVersionId) return; logger.info("setLastCommitedContentVersionId:" + lastCommitedContentVersionId); Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("lastCommitedContentVersionId"); writer.deleteDocuments(query); writer.addDocument(createStatusDocument(lastCommitedContentVersionId)); } public Date getLastCommitedModifiedDate() throws Exception { Date lastCommitedModifiedDate = new Date(10000); Document doc = getStatusDocument(); if(doc != null) { String lastCommitedModifiedDateString = doc.get("lastCommitedModifiedDate"); logger.info("doc:" + doc); logger.info("lastCommitedModifiedDate:" + lastCommitedModifiedDateString); Date d = new Date(); d.setTime(Long.parseLong(lastCommitedModifiedDateString)); lastCommitedModifiedDate = d; } return lastCommitedModifiedDate; } private void registerIndexAllProcessOngoing(Integer lastCommitedContentVersionId, Integer lastCommitedSiteNodeVersionId, Integer languageId) throws Exception { //Document doc = new Document(); IndexWriter writer = getIndexWriter(); IndexSearcher searcher = getIndexSearcher(); Term term = new Term("meta", "indexAllRunning"); TermQuery query = new TermQuery(term); //Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("indexAllRunning"); TopDocs hits = searcher.search(query, 50); //System.out.println("hits:" + hits); //System.out.println("hits.scoreDocs.length:" + hits.scoreDocs.length); if(hits.scoreDocs.length > 1) System.out.println("Must be wrong - should only be one of these docs:" + hits.scoreDocs.length); if(hits.scoreDocs.length > 0) { for(ScoreDoc scoreDoc : hits.scoreDocs) { org.apache.lucene.document.Document docExisting = searcher.doc(scoreDoc.doc); //System.out.println("Updating doc...:" + docExisting); //System.out.println("lastCommitedContentVersionId:" + lastCommitedContentVersionId); //System.out.println("lastCommitedSiteNodeVersionId:" + lastCommitedSiteNodeVersionId); //System.out.println("languageId:" + languageId); if(lastCommitedContentVersionId != null && lastCommitedContentVersionId != -1) { docExisting.removeFields("lastCommitedContentVersionId_" + languageId); docExisting.add(new Field("lastCommitedContentVersionId_" + languageId, "" + lastCommitedContentVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED)); } if(lastCommitedSiteNodeVersionId != null && lastCommitedSiteNodeVersionId != -1) { docExisting.removeFields("lastCommitedMetaContentVersionId_" + languageId); docExisting.add(new Field("lastCommitedMetaContentVersionId_" + languageId, "" + lastCommitedSiteNodeVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED)); } docExisting.removeFields("lastCommitedModifiedDate"); docExisting.add(new Field("lastCommitedModifiedDate", "" + new Date().getTime(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //docExisting.add(new Field("meta", new StringReader("indexAllRunning"))); //docExisting.add(new Field("meta", "indexAllRunning", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.updateDocument(term, docExisting); //System.out.println("Updating doc...:" + docExisting); //Term t = new Term("meta", "indexAllRunning"); break; } } else { Document docExisting = new Document(); //System.out.println("lastCommitedContentVersionId:" + lastCommitedContentVersionId); //System.out.println("lastCommitedSiteNodeVersionId:" + lastCommitedSiteNodeVersionId); //System.out.println("languageId:" + languageId); if(lastCommitedContentVersionId != null) docExisting.add(new Field("lastCommitedContentVersionId_" + languageId, "" + lastCommitedContentVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED)); if(lastCommitedSiteNodeVersionId != null) docExisting.add(new Field("lastCommitedMetaContentVersionId_" + languageId, "" + lastCommitedSiteNodeVersionId, Field.Store.YES, Field.Index.NOT_ANALYZED)); docExisting.add(new Field("lastCommitedModifiedDate", "" + new Date().getTime(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //docExisting.add(new Field("meta", new StringReader("indexAllRunning"))); docExisting.add(new Field("meta", "indexAllRunning", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(docExisting); } searcher.close(); //Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("indexAllRunning"); //writer.deleteDocuments(query); //writer.updateDocument(term, doc); //writer.addDocument(doc); //writer.close(true); writer.commit(); } private void registerIndexAllProcessDone() throws Exception { IndexWriter writer = getIndexWriter(); //Query query = new QueryParser(Version.LUCENE_34, "meta", getStandardAnalyzer()).parse("indexAllRunning"); Term term = new Term("meta", "indexAllRunning"); TermQuery query = new TermQuery(term); writer.deleteDocuments(query); writer.commit(); } public void clearIndex() throws Exception { if (indexingInitialized.compareAndSet(false, true)) { logger.warn("Clearing index.."); try { logger.info("NumDocs:" + getIndexReader().numDocs()); IndexWriter writer = getIndexWriter(); writer.deleteAll(); //writer.close(true); writer.commit(); logger.info("NumDocs after delete:" + getIndexReader().numDocs()); } catch (Exception e) { stopIndexing.set(true); deleteIndexOnStop.set(true); logger.error("Error clearing index:" + e.getMessage(), e); } finally { logger.info("Releasing indexing flag"); this.indexingInitialized.set(false); stopIndexing.set(false); } } else { stopIndexing.set(true); deleteIndexOnStop.set(true); logger.error("Could not delete index while indexing. Queueing it...."); } } public TopDocs query(String text, Integer numberOfHits) throws Exception { return query("contents", text, numberOfHits); } public TopDocs query(String field, String text, Integer numberOfHits) throws Exception { IndexSearcher searcher = getIndexSearcher(); Query query = new QueryParser(Version.LUCENE_34, "contents", getStandardAnalyzer()).parse(text); TopDocs hits = searcher.search(query, numberOfHits); logger.info(hits.totalHits + " total matching documents for '" + text + "'"); return hits; } public List<Document> queryDocuments(Term term, Integer numberOfHits) throws Exception { IndexSearcher searcher = getIndexSearcher(); Query query = new TermQuery(term); TopDocs hits = searcher.search(query, numberOfHits); logger.info(hits.totalHits + " total matching documents for '" + term.field() + ":" + term.text() + "'"); List<Document> docs = new ArrayList<Document>(); for(ScoreDoc scoreDoc : hits.scoreDocs) { org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc); docs.add(doc); } searcher.close(); return docs; } public List<Document> queryDocuments(String field, String text, Integer numberOfHits) throws Exception { IndexSearcher searcher = getIndexSearcher(); Query query = new QueryParser(Version.LUCENE_34, field, getStandardAnalyzer()).parse(text); logger.info("query:" + query); TopDocs hits = searcher.search(query, numberOfHits); logger.info(hits.totalHits + " total matching documents for '" + field + ":" + text + "'"); List<Document> docs = new ArrayList<Document>(); for(ScoreDoc scoreDoc : hits.scoreDocs) { org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc); docs.add(doc); } searcher.close(); return docs; } public TopDocs query(String[] fields, BooleanClause.Occur[] flags, String[] queries, Sort sort, Integer numberOfHits) throws Exception { IndexSearcher searcher = getIndexSearcher(); Query query = MultiFieldQueryParser.parse(Version.LUCENE_34, queries, fields, flags, getStandardAnalyzer()); //Query query = new QueryParser(Version.LUCENE_34, "contents", getStandardAnalyzer()).parse(text); TopDocs hits = searcher.search(query, numberOfHits); logger.info(hits.totalHits + " total matching documents for '" + queries + "'"); return hits; } public List<Document> queryDocuments(String[] fields, BooleanClause.Occur[] flags, String[] queries, Sort sort, Integer numberOfHits, Map searchMetaData) throws Exception { IndexSearcher searcher = getIndexSearcher(); Query query = MultiFieldQueryParser.parse(Version.LUCENE_34, queries, fields, flags, getStandardAnalyzer()); logger.info("query:" + query); //Query query = new QueryParser(Version.LUCENE_34, "contents", getStandardAnalyzer()).parse(text); TopDocs hits = searcher.search(query, numberOfHits); searchMetaData.put("totalHits", hits.totalHits); logger.info(hits.totalHits + " total matching documents for '" + query + "'"); //System.out.println(hits.totalHits + " total matching documents for '" + queries + "'"); List<Document> docs = new ArrayList<Document>(); for(ScoreDoc scoreDoc : hits.scoreDocs) { org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc); docs.add(doc); } searcher.close(); return docs; } private void query(IndexSearcher searcher, Analyzer analyzer, String text) throws Exception { Query query = new QueryParser(Version.LUCENE_34, "contents", analyzer).parse(text); TopDocs hits = searcher.search(query, 50); logger.info(hits.totalHits + " total matching documents for '" + text + "'"); for(ScoreDoc scoreDoc : hits.scoreDocs) { org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc); String cvId = doc.get("contentVersionId"); logger.info("cvId: " + cvId); } } public boolean indexAll() throws Exception { if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene")) return false; logger.warn("INDEXING ALL - correct: " + indexingInitialized + "/" + deleteIndexOnStop + "/" + stopIndexing + "?"); Thread.currentThread().setPriority(Thread.MIN_PRIORITY); if(deleteIndexOnStop.get()) { clearIndex(); deleteIndexOnStop.set(false); stopIndexing.set(false); } else { stopIndexing.set(false); } logger.warn("Resetting stopIndexing to false...."); logger.warn("------------------------------Got indexAll directive...."); if (indexingInitialized.compareAndSet(false, true)) { //createTestIndex(); //indexingInitialized.set(false); //if(true) // return true; try { Timer t = new Timer(); Timer t2 = new Timer(); //Indexing all normal contents now logger.info("Indexing all normal contents: " + CmsPropertyHandler.getContextDiskPath()); List<LanguageVO> languageVOList = LanguageController.getController().getLanguageVOList(); Iterator<LanguageVO> languageVOListIterator = languageVOList.iterator(); outer:while(languageVOListIterator.hasNext()) { LanguageVO languageVO = (LanguageVO)languageVOListIterator.next(); logger.info("Getting notification messages for " + languageVO.getName()); Integer previousIndexAllLastContentVersionId = getIndexAllLastCommittedContentVersionId(languageVO.getId()); int startID = 0; if(previousIndexAllLastContentVersionId != null) startID = previousIndexAllLastContentVersionId; logger.info("Starting from " + startID); int newLastContentVersionId = getContentNotificationMessages(languageVO, startID); logger.info("newLastContentVersionId: " + newLastContentVersionId + " on " + languageVO.getName()); registerIndexAllProcessOngoing(newLastContentVersionId, null, languageVO.getId()); //previousIndexAllLastContentVersionId = newLastContentVersionId; RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessages", t.getElapsedTime()); logger.info("newLastContentVersionId " + newLastContentVersionId); while(newLastContentVersionId != -1) { logger.info("stopIndexing.get():" + stopIndexing.get()); if(stopIndexing.get()) break outer; Thread.sleep(5000); newLastContentVersionId = getContentNotificationMessages(languageVO, newLastContentVersionId); logger.info("newLastContentVersionId: " + newLastContentVersionId + " on " + languageVO.getName()); registerIndexAllProcessOngoing(newLastContentVersionId, null, languageVO.getId()); //previousIndexAllLastContentVersionId = newLastContentVersionId; RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessages 2", t.getElapsedTime()); logger.info("newLastContentVersionId " + newLastContentVersionId); } } languageVOList = LanguageController.getController().getLanguageVOList(); languageVOListIterator = languageVOList.iterator(); outer:while(languageVOListIterator.hasNext()) { LanguageVO languageVO = (LanguageVO)languageVOListIterator.next(); logger.info("languageVO from " + languageVO); List<NotificationMessage> notificationMessages = new ArrayList<NotificationMessage>(); Integer previousIndexAllLastMetaContentVersionId = getIndexAllLastCommittedMetaContentVersionId(languageVO.getId()); logger.info("previousIndexAllLastMetaContentVersionId: " + previousIndexAllLastMetaContentVersionId); int startID = 0; if(previousIndexAllLastMetaContentVersionId != null) startID = previousIndexAllLastMetaContentVersionId; logger.info("Starting from " + startID); int newLastMetaContentVersionId = getPageNotificationMessages(notificationMessages, languageVO, startID); logger.info("newLastSiteNodeVersionId " + newLastMetaContentVersionId + " on " + languageVO.getName()); logger.info("notificationMessages: " + notificationMessages.size()); registerIndexAllProcessOngoing(null, newLastMetaContentVersionId, languageVO.getId()); //previousIndexAllLastMetaContentVersionId = newLastMetaContentVersionId; RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessagesForStructure", t.getElapsedTime()); logger.info("newLastMetaContentVersionId " + newLastMetaContentVersionId); while(newLastMetaContentVersionId != -1) { logger.info("stopIndexing.get():" + stopIndexing.get()); if(stopIndexing.get()) break outer; Thread.sleep(5000); newLastMetaContentVersionId = getPageNotificationMessages(notificationMessages, languageVO, newLastMetaContentVersionId); logger.info("newLastMetaContentVersionId " + newLastMetaContentVersionId + " on " + languageVO.getName()); logger.info("notificationMessages: " + notificationMessages.size()); registerIndexAllProcessOngoing(null, newLastMetaContentVersionId, languageVO.getId()); //previousIndexAllLastMetaContentVersionId = newLastMetaContentVersionId; RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getNotificationMessages 2", t.getElapsedTime()); logger.info("newLastMetaContentVersionId " + newLastMetaContentVersionId); } } registerIndexAllProcessDone(); t2.printElapsedTime("All indexing took"); } catch (Exception e) { logger.error("Error indexing notifications:" + e.getMessage(), e); } finally { logger.error("Releasing indexing flag"); this.indexingInitialized.set(false); } } else { logger.warn("-------------------: Allready running index all..."); return false; } return true; } private void createTestIndex() { System.out.println("STARTING TEST"); try { clearIndex(); IndexWriter writer = getIndexWriter(); for(int i=0; i<10000; i++) { // make a new, empty document Document doc = new Document(); doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(23423423423L)); doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(23423423423L)); doc.add(new Field("modified", DateTools.timeToString(23423423423L, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentVersionId", "324234234", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentId", "324234234", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentTypeDefinitionId", "344", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("languageId", "33", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("repositoryId", "22", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastModifier", "Mattias Bogeblad", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("stateId", "3", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contents", new StringReader(i + " fwe foweif oiwejfoijweoifiweuhfi uehwiufh weiuhfiuwehfiew iufiuwehfi ewiufh iuwehfiuehwiufiweuhfiu ehwifhw eifew efiwehfiuwe" + "ff wehfiuehwiufiuwehfiuehw iufhwei uhfiehwiufweiuhf iwefihw eifiuwe ifhwe ifihew iufi weuhfiuwe" + "dfbsdjfsjdjfjksdf s f jdsjkfs dkjfh ksdfk sdkfhkds fksd " + "fjsd fsdhf uiweo p fiieowhf iehwiufiewhfiewfhw efn ewfowe ifioewf owehfowe"))); doc.add(new Field("uid", "" + i, Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); if(i == 1000 || i == 2000 ||i == 3000 ||i == 4000 ||i == 5000 ||i == 6000 ||i == 7000 ||i == 8000 ||i == 9000) { //writer.optimize(); //writer.optimize(true); logger.info("Sleeping...:" + getIndexInformation().get("numDoc")); Thread.sleep(5000); } } //writer.close(true); writer.commit(); } catch (Exception e) { e.printStackTrace(); } } /** * This method gets called when a new notification has come. * It then iterates through the listeners and notifies them. */ public void addNotificationMessage(NotificationMessage notificationMessage) { if(notificationMessage.getClassName().equals(ContentImpl.class.getName()) || notificationMessage.getClassName().equals(ContentVersionImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNodeVersionImpl.class.getName()) || notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName()) || notificationMessage.getClassName().equals(MediumDigitalAssetImpl.class.getName())) { if(qeuedMessages.size() == 1000) { logger.warn("qeuedMessages went over 1000 - seems wrong"); //Thread.dumpStack(); } synchronized (qeuedMessages) { qeuedMessages.add(notificationMessage); } } else { logger.info("Skipping indexing:" + notificationMessage.getClassName()); } } /** * This method gets called when a new NotificationMessage is available. * The writer just calls the transactionHistoryController which stores it. */ public void notify(NotificationMessage notificationMessage) { try { if(logger.isInfoEnabled()) logger.info("Indexing:" + notificationMessage.getName() + ":" + notificationMessage.getType() + ":" + notificationMessage.getObjectId() + ":" + notificationMessage.getObjectName()); addNotificationMessage(notificationMessage); } catch(Exception e) { logger.error("Error notifying: " + e.getMessage()); } } public void process() throws Exception { logger.info("Process inside LuceneController"); notifyListeners(false, true); } public void notifyListeners(boolean forceVersionIndexing, boolean checkForIndexingJobs) throws IOException, Exception { if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene") || CmsPropertyHandler.getContextDiskPath().contains("@deploy.dir")) return; boolean initDoneLocally = false; boolean finishDoneLocally = false; logger.info("------------------------------->notifyListeners before check in " + CmsPropertyHandler.getContextRootPath() + "/" + deleteIndexOnStop.get() + "/" + stopIndexing.get()); if(deleteIndexOnStop.get()) { clearIndex(); deleteIndexOnStop.set(false); stopIndexing.set(false); } else { stopIndexing.set(false); } if (!checkForIndexingJobs || indexingInitialized.compareAndSet(false, true)) { if(checkForIndexingJobs) initDoneLocally = true; List<NotificationMessage> internalMessageList = new ArrayList<NotificationMessage>(); List<NotificationMessage> revisitedInternalMessageList = new ArrayList<NotificationMessage>(); synchronized (qeuedMessages) { //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size()); internalMessageList.addAll(qeuedMessages); //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size()); qeuedMessages.clear(); //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size()); } synchronized (maturedQeuedMessages) { logger.info("maturedQeuedMessages:" + maturedQeuedMessages.size()); if(maturedQeuedMessages.size() > 0) { logger.info("Was a matured message - let's take it also"); internalMessageList.addAll(maturedQeuedMessages); revisitedInternalMessageList.addAll(maturedQeuedMessages); //logger.error("internalMessageList: " + internalMessageList.size() + "/" + qeuedMessages.size()); maturedQeuedMessages.clear(); } } //Should implement equals on NotificationMessage later List<NotificationMessage> baseEntitiesToIndexMessageList = new ArrayList<NotificationMessage>(); List<String> existingSignatures = new ArrayList<String>(); logger.info("Before AAAAA:" + internalMessageList.size() + ":" + existingSignatures.size()); Iterator<NotificationMessage> cleanupInternalMessageListIterator = internalMessageList.iterator(); while(cleanupInternalMessageListIterator.hasNext()) { NotificationMessage notificationMessage = cleanupInternalMessageListIterator.next(); logger.info("Indexing........:" + notificationMessage.getClassName()); if(notificationMessage.getClassName().equals(ContentImpl.class.getName()) || notificationMessage.getClassName().equals(Content.class.getName())) { ContentVO contentVO = ContentController.getContentController().getLocklessContentVOWithId((Integer)notificationMessage.getObjectId()); //ContentVO contentVO = ContentController.getContentController().getContentVOWithId((Integer)notificationMessage.getObjectId()); if(contentVO != null) { ContentTypeDefinitionVO ctdVO = null; try { ctdVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithId(contentVO.getContentTypeDefinitionId()); } catch (SystemException sex) { logger.warn("Failed to get the content type definition for content with Id: " + contentVO.getContentId() + ". The content will not be indexed. Message: " + sex.getMessage()); logger.info("Failed to get the content type definition for content with Id: " + contentVO.getContentId(), sex); } if(ctdVO != null && ctdVO.getName().equals("Meta info")) { SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithMetaInfoContentId(contentVO.getContentId()); if(siteNodeVO != null && notificationMessage != null) { NotificationMessage newNotificationMessage = new NotificationMessage("" + siteNodeVO.getName(), SiteNodeImpl.class.getName(), "SYSTEM", notificationMessage.getType(), siteNodeVO.getId(), "" + siteNodeVO.getName()); String key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + "_" + newNotificationMessage.getType(); if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an META PAGE notification - just adding it AS A PAGE instead: " + newNotificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(newNotificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId()); } } } else { String key = "" + notificationMessage.getClassName() + "_" + notificationMessage.getObjectId() + "_" + "_" + notificationMessage.getType(); if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an Content notification - just adding it: " + notificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(notificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId()); } } } else { logger.info("The content seems to be missing from the database. A guess is that it's new or deleted. Let's try later."); if(!revisitedInternalMessageList.contains(notificationMessage)) maturedQeuedMessages.add(notificationMessage); else logger.info("No - allready tried it again.. skipping."); } } else if(notificationMessage.getClassName().equals(ContentVersionImpl.class.getName()) || notificationMessage.getClassName().equals(ContentVersion.class.getName())) { logger.info("++++++++++++++Got an ContentVersion notification - focus on content: " + notificationMessage.getObjectId()); //ContentVersionVO contentVersionVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId((Integer)notificationMessage.getObjectId()); ContentVersionVO contentVersionVO = ContentVersionController.getContentVersionController().getLocklessContentVersionVOWithId((Integer)notificationMessage.getObjectId()); if(contentVersionVO != null) { ContentVO contentVO = ContentController.getContentController().getLocklessContentVOWithId(contentVersionVO.getContentId()); if(contentVO.getContentTypeDefinitionId() != null) { ContentTypeDefinitionVO ctdVO = null; try { ctdVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithId(contentVO.getContentTypeDefinitionId()); } catch (SystemException sex) { logger.warn("Failed to get the content type definition for content with Id: " + contentVO.getContentId() + ". The content version will not be indexed. Message: " + sex.getMessage()); logger.info("Failed to get the content type definition for content with Id: " + contentVO.getContentId(), sex); } if(ctdVO != null && ctdVO.getName().equals("Meta info")) { SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithMetaInfoContentId(contentVO.getContentId()); if (siteNodeVO == null) { logger.warn("Got meta info notification but could not find a page for the Content-id. Content.id: " + contentVO.getContentId()); } else { NotificationMessage newNotificationMessage = new NotificationMessage("" + siteNodeVO.getName(), SiteNodeImpl.class.getName(), "SYSTEM", notificationMessage.getType(), siteNodeVO.getId(), "" + siteNodeVO.getName()); String key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + newNotificationMessage.getType(); if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an META PAGE notification - just adding it AS A PAGE instead: " + newNotificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(newNotificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId()); } } } else { NotificationMessage newNotificationMessage = new NotificationMessage("" + contentVersionVO.getContentName(), ContentImpl.class.getName(), "SYSTEM", notificationMessage.getType(), contentVersionVO.getContentId(), "" + contentVersionVO.getContentName()); String key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + newNotificationMessage.getType(); if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an Content notification - just adding it: " + newNotificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(newNotificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping Content notification - duplicate existed: " + notificationMessage.getObjectId()); } } } } else { logger.info("The content version seems to be missing from the database. A guess is that it's new or deleted. Let's try later."); if(!revisitedInternalMessageList.contains(notificationMessage)) maturedQeuedMessages.add(notificationMessage); else logger.info("No - allready tried it again.. skipping."); } } else if(notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName()) || notificationMessage.getClassName().equals(MediumDigitalAssetImpl.class.getName()) || notificationMessage.getClassName().equals(DigitalAsset.class.getName()) || notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNode.class.getName()) || notificationMessage.getClassName().equals(SiteNodeVersionImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNodeVersion.class.getName())) { logger.info("notificationMessage.getClassName():" + notificationMessage.getClassName()); String key = "" + notificationMessage.getClassName() + "_" + notificationMessage.getObjectId() + "_" + "_" + notificationMessage.getType(); if(notificationMessage.getClassName().equals(SiteNodeVersionImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNodeVersion.class.getName())) { logger.info("PPPPPPPPPPPPPPPPPPPPPPPPPP:" + notificationMessage.getObjectId()); try { SiteNodeVersionVO siteNodeVersionVO = SiteNodeVersionController.getController().getSiteNodeVersionVOWithId((Integer)notificationMessage.getObjectId()); SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(siteNodeVersionVO.getSiteNodeId()); NotificationMessage newNotificationMessage = new NotificationMessage("" + siteNodeVO.getName(), SiteNodeImpl.class.getName(), "SYSTEM", notificationMessage.getType(), siteNodeVO.getId(), "" + siteNodeVO.getName()); key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + newNotificationMessage.getType(); if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an SiteNodeVersionImpl notification - just adding it as SiteNodeImpl: " + newNotificationMessage.getClassName() + ":" + newNotificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(newNotificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping notification - duplicate existed: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId()); } } catch(Exception e) { logger.warn("Got an error handling SiteNodeVersion with ID: " + notificationMessage.getObjectId() + ":" + e.getMessage()); } } else if(notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNode.class.getName()) || notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName())) { if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an Page notification - just adding it: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(notificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping notification - duplicate existed: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId()); } } else { NotificationMessage newNotificationMessage = new NotificationMessage("" + notificationMessage.getName(), DigitalAssetImpl.class.getName(), "SYSTEM", notificationMessage.getType(), notificationMessage.getObjectId(), "" + notificationMessage.getName()); key = "" + newNotificationMessage.getClassName() + "_" + newNotificationMessage.getObjectId() + "_" + "_" + newNotificationMessage.getType(); if(!existingSignatures.contains(key)) { logger.info("++++++++++++++Got an Content notification - just adding it: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId()); baseEntitiesToIndexMessageList.add(newNotificationMessage); existingSignatures.add(key); } else { logger.info("++++++++++++++Skipping notification - duplicate existed: " + notificationMessage.getClassName() + ":" + notificationMessage.getObjectId()); } } } } internalMessageList = baseEntitiesToIndexMessageList; logger.info("After in [" + CmsPropertyHandler.getContextRootPath() + "]:" + internalMessageList.size() + ":" + existingSignatures.size()); try { logger.info("notifyListeners actually running"); if(getIsIndexedLocked()) { logger.warn("The index should not be locked as no indexing is registered to be carried out. Lets unlock it as it may be the result of a crash."); unlockIndex(); } //logger.error("Starting indexin of " + qeuedMessages.size()); Timer t = new Timer(); IndexWriter writer = getIndexWriter(); //t.printElapsedTime("Creating writer took"); Database db = CastorDatabaseService.getDatabase(); beginTransaction(db); try { int numberOfMessages = internalMessageList.size(); Iterator internalMessageListIterator = internalMessageList.iterator(); while(internalMessageListIterator.hasNext()) { NotificationMessage notificationMessage = (NotificationMessage)internalMessageListIterator.next(); try { if(logger.isInfoEnabled()) logger.info("Starting indexin of " + notificationMessage); indexInformation(notificationMessage, writer, internalMessageList, forceVersionIndexing, db); internalMessageListIterator.remove(); } catch (Exception e) { e.printStackTrace(); } } //t.printElapsedTime("Indexing " + numberOfMessages + " documents took"); //Map<String,String> commitUserData = new HashMap<String,String>(); //internalMessageList.clear(); //writer.commit(commitUserData); logger.info("##############lastCommitedContentVersionId before close:" + lastCommitedContentVersionId); if(lastCommitedContentVersionId > -1) { Integer previousLastCommittedContentVersionId = getLastCommitedContentVersionId(); logger.info("##############previousLastCommittedContentVersionId before close:" + previousLastCommittedContentVersionId); if(previousLastCommittedContentVersionId < lastCommitedContentVersionId) { try { logger.info("*************ADDING status doc " + lastCommitedContentVersionId + "**************"); setLastCommitedContentVersionId(writer, lastCommitedContentVersionId); } catch (Exception e) { logger.error("*************ERROR: ADDING status doc**************", e); } } else { logger.warn("The content version was not a higher number than what was allready indexed - lets not add status...."); } } commitTransaction(db); } catch(Exception e) { logger.error("An error occurred so we should not complete the transaction:" + e.getMessage(), e); rollbackTransaction(db); } finally { writer.commit(); //writer.close(true); } logger.info("OOOOOOOOOOOOOO:" + getLastCommitedContentVersionId()); } catch (Exception e) { logger.error("Error indexing notifications:" + e.getMessage()); logger.warn("Error indexing notifications:" + e.getMessage(), e); } finally { logger.info("Releasing indexing flag"); try { if(internalMessageList.size() > 0) { synchronized (qeuedMessages) { logger.info("Returning internalMessageList:" + internalMessageList.size() + " to qeuedMessages as some failed."); qeuedMessages.addAll(internalMessageList); internalMessageList.clear(); } } } catch (Exception e) { e.printStackTrace(); } if(checkForIndexingJobs) { this.indexingInitialized.set(false); finishDoneLocally = true; } } if(initDoneLocally && !finishDoneLocally) logger.error("EEEEEEEEEEEEEEERRRRRRRRRRRRRRROOOOOOOOOOOORRRRRRRR aaaaaaa"); logger.info("internalMessageList 1:" + internalMessageList.size() + " / " + qeuedMessages.size()); } else { logger.info("------------------------------->Indexing job allready running... skipping in " + CmsPropertyHandler.getContextRootPath()); } logger.info("queued messages 1:" + qeuedMessages.size()); } public void index() throws Exception { if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene")) return; logger.info("Start index: " + CmsPropertyHandler.getContextRootPath() + "/" + deleteIndexOnStop.get() + "/" + stopIndexing.get()); if(deleteIndexOnStop.get()) { clearIndex(); deleteIndexOnStop.set(false); stopIndexing.set(false); } else { stopIndexing.set(false); } logger.info("################# starting index"); //if (indexStarted.compareAndSet(false, true)) //{ IndexReader indexReader = null; try { Integer lastCommitedContentVersionId = getLastCommitedContentVersionId(); Document indexAllDocumentMetaData = getIndexAllStatusDocument(); //Integer previousIndexAllLastContentVersionId = getIndexAllLastCommittedContentVersionId(); logger.info("lastCommitedContentVersionId:" + lastCommitedContentVersionId); Date lastCommitedModifiedDate = getLastCommitedModifiedDate(); Calendar yesterday = Calendar.getInstance(); yesterday.add(Calendar.HOUR_OF_DAY, -1); logger.info("lastCommitedContentVersionId: " + lastCommitedContentVersionId); logger.info("lastCommitedModifiedDate: " + lastCommitedModifiedDate); indexReader = getIndexReader(); boolean didIndex = false; if(lastCommitedContentVersionId == -1 || indexAllDocumentMetaData != null || indexReader.numDocs() < 100) { logger.info("indexAll as it seemed to be not ready....."); logger.info("###########################IndexAll"); didIndex = indexAll(); } else //Skipping indexing for now.. { logger.info("###########################indexIncremental"); didIndex = indexIncremental(lastCommitedContentVersionId, yesterday.getTime()); } if(didIndex) { CacheController.clearCache("pageCache"); CacheController.clearCache("pageCacheExtra"); } } catch (Exception e) { logger.error("Error indexing notifications:" + e.getMessage()); logger.warn("Error indexing notifications:" + e.getMessage(), e); } /* } else { logger.error("################# skipping index, was allready started"); } */ } public boolean indexIncremental(Integer lastCommitedContentVersionId, Date lastCommitedDateTime) throws Exception { if(!CmsPropertyHandler.getInternalSearchEngine().equalsIgnoreCase("lucene")) return false; Timer t = new Timer(); Timer t2 = new Timer(); logger.info("Indexing incremental:" + lastCommitedContentVersionId + "/" + lastCommitedDateTime); //Map<String,String> lastCommitData = reader.getCommitUserData(); List<LanguageVO> languageVOList = LanguageController.getController().getLanguageVOList(); Iterator<LanguageVO> languageVOListIterator = languageVOList.iterator(); outer:while(languageVOListIterator.hasNext()) { LanguageVO languageVO = (LanguageVO)languageVOListIterator.next(); List<NotificationMessage> notificationMessages = new ArrayList<NotificationMessage>(); //logger.error("Getting notification messages for " + languageVO.getName()); int newLastContentVersionId = getNotificationMessages(notificationMessages, languageVO, lastCommitedContentVersionId, lastCommitedDateTime, 1000); while(newLastContentVersionId != -1) { Thread.sleep(5000); if(stopIndexing.get()) break outer; logger.info("Queueing " + notificationMessages.size() + " notificationMessages for indexing"); for(NotificationMessage notificationMessage : notificationMessages) { notify(notificationMessage); } notifyListeners(true, false); notificationMessages.clear(); //t.printElapsedTime("Indexing size():" + notificationMessages.size() + " took"); Integer newLastContentVersionIdCandidate = getNotificationMessages(notificationMessages, languageVO, newLastContentVersionId, lastCommitedDateTime, 1000); logger.info("newLastContentVersionIdCandidate:" + newLastContentVersionIdCandidate + "=" + newLastContentVersionId); if(newLastContentVersionIdCandidate > newLastContentVersionId) newLastContentVersionId = newLastContentVersionIdCandidate; else break; //t.printElapsedTime("newLastContentVersionId:" + newLastContentVersionId + " took"); } } if(logger.isInfoEnabled()) t2.printElapsedTime("All indexing took"); return true; } private int getNotificationMessagesForStructure(List<NotificationMessage> notificationMessages, LanguageVO languageVO, int lastSiteNodeVersionId) throws Exception { Timer t = new Timer(); logger.info("getNotificationMessages:" + lastSiteNodeVersionId); int newLastSiteNodeVersionId = -1; Database db = CastorDatabaseService.getDatabase(); try { beginTransaction(db); ContentTypeDefinitionVO contentTypeDefinitionVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db); ContentVersionVO lastContentVersionVO = ContentVersionController.getContentVersionController().getLatestContentVersionVO(languageVO.getId(), db); Integer maxContentVersionId = (lastContentVersionVO == null ? 1000 : lastContentVersionVO.getId()); logger.info("maxContentVersionId:" + maxContentVersionId + " for " + languageVO.getName()); List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>(); if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms")) { versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, 0, newLastSiteNodeVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId); } else { versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, Integer.parseInt(CmsPropertyHandler.getOperatingMode()), newLastSiteNodeVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId); } RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Index all : getContentVersionVOList", t.getElapsedTime()); logger.info("versions in getNotificationMessagesForStructure:" + versions.size()); logger.info("Looping versions:" + versions.size()); for(ContentVersionVO version : versions) { NotificationMessage notificationMessage = new NotificationMessage("LuceneController", ContentVersionImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, version.getId(), "dummy"); notificationMessages.add(notificationMessage); newLastSiteNodeVersionId = version.getId().intValue(); } logger.info("Finished round 1:" + notificationMessages.size() + ":" + newLastSiteNodeVersionId); } catch ( Exception e ) { rollbackTransaction(db); throw new SystemException("An error occurred when we tried to fetch a list of users in this role. Reason:" + e.getMessage(), e); } commitTransaction(db); return newLastSiteNodeVersionId; } private int getContentNotificationMessages(LanguageVO languageVO, int lastContentVersionId) throws Exception { Timer t = new Timer(); logger.info("getNotificationMessages:" + languageVO.getName() + " : " + lastContentVersionId); logger.info("notifyListeners actually running"); if(getIsIndexedLocked()) { logger.info("The index should not be locked as no indexing is registered to be carried out. Lets unlock it as it may be the result of a crash."); unlockIndex(); } IndexWriter writer = getIndexWriter(); //t.printElapsedTime("Creating writer took"); int newLastContentVersionId = -1; Database db = CastorDatabaseService.getDatabase(); try { beginTransaction(db); logger.info("lastContentVersionId:" + lastContentVersionId); if(lastContentVersionId < 1) { SmallestContentVersionVO firstContentVersionVO = ContentVersionController.getContentVersionController().getFirstContentVersionId(languageVO.getId(), db); if(firstContentVersionVO != null) lastContentVersionId = firstContentVersionVO.getId(); } logger.info("lastContentVersionId 2:" + lastContentVersionId); ContentTypeDefinitionVO contentTypeDefinitionVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db); ContentVersionVO lastContentVersionVO = ContentVersionController.getContentVersionController().getLatestContentVersionVO(languageVO.getId(), db); Integer maxContentVersionId = (lastContentVersionVO == null ? 1000 : lastContentVersionVO.getId()); logger.info("maxContentVersionId 1:" + maxContentVersionId + " for " + languageVO.getName()); List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>(); if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms")) { versions = ContentVersionController.getContentVersionController().getContentVersionVOList(null, contentTypeDefinitionVO.getId(), languageVO.getId(), false, 0, lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, false, maxContentVersionId); } else { versions = ContentVersionController.getContentVersionController().getContentVersionVOList(null, contentTypeDefinitionVO.getId(), languageVO.getId(), false, Integer.parseInt(CmsPropertyHandler.getOperatingMode()), lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, false, maxContentVersionId); } RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Index all : getContentVersionVOList", t.getElapsedTime()); logger.info("versions in getContentNotificationMessages:" + versions.size()); logger.info("Looping versions:" + versions.size()); for(ContentVersionVO version : versions) { if(stopIndexing.get()) return newLastContentVersionId; Document document = getDocumentFromContentVersion(version, db); String uid = document.get("uid"); logger.info("document: " + document); writer.deleteDocuments(new Term("uid", "" + uid)); if(logger.isDebugEnabled()) logger.debug("Adding document with uid:" + uid + " - " + document); if(document != null) writer.addDocument(document); logger.info("version assetCount:" + version.getAssetCount()); if(version.getAssetCount() == null || version.getAssetCount() > 0) { List digitalAssetVOList = DigitalAssetController.getDigitalAssetVOList(version.getId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDigitalAssetVOList", (t.getElapsedTimeNanos() / 1000)); if(digitalAssetVOList.size() > 0) { logger.info("digitalAssetVOList:" + digitalAssetVOList.size()); Iterator digitalAssetVOListIterator = digitalAssetVOList.iterator(); while(digitalAssetVOListIterator.hasNext()) { DigitalAssetVO assetVO = (DigitalAssetVO)digitalAssetVOListIterator.next(); Document assetDocument = getDocumentFromDigitalAsset(assetVO, version, db); String assetUid = assetDocument.get("uid"); writer.deleteDocuments(new Term("uid", "" + assetUid)); if(logger.isDebugEnabled()) logger.debug("Adding document with assetUid:" + assetUid + " - " + assetDocument); if(assetDocument != null) writer.addDocument(assetDocument); } } } newLastContentVersionId = version.getId().intValue(); } //logger.info("Finished round 2:" + notificationMessages.size() + ":" + newLastContentVersionId); } catch ( Exception e ) { logger.error("Error in lucene indexing: " + e.getMessage(), e); rollbackTransaction(db); throw new SystemException("An error occurred when we tried to getContentNotificationMessages. Reason:" + e.getMessage(), e); } finally { try{setLastCommitedContentVersionId(writer, newLastContentVersionId); writer.commit(); /*writer.close(true);*/}catch (Exception e) {e.printStackTrace();} } commitTransaction(db); return newLastContentVersionId; } private int getPageNotificationMessages(List notificationMessages, LanguageVO languageVO, int lastContentVersionId) throws Exception { Timer t = new Timer(); logger.info("getNotificationMessages:" + languageVO.getName() + " : " + lastContentVersionId); logger.info("notifyListeners actually running"); if(getIsIndexedLocked()) { logger.info("The index should not be locked as no indexing is registered to be carried out. Lets unlock it as it may be the result of a crash."); unlockIndex(); } IndexWriter writer = getIndexWriter(); //t.printElapsedTime("Creating writer took"); int newLastContentVersionId = -1; Database db = CastorDatabaseService.getDatabase(); try { beginTransaction(db); ContentTypeDefinitionVO contentTypeDefinitionVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db); ContentVersionVO lastContentVersionVO = ContentVersionController.getContentVersionController().getLatestContentVersionVO(languageVO.getId(), db); Integer maxContentVersionId = (lastContentVersionVO == null ? 1000 : lastContentVersionVO.getId()); logger.info("maxContentVersionId:" + maxContentVersionId + " for " + languageVO.getName()); List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>(); if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms")) { versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, 0, lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId); } else { versions = ContentVersionController.getContentVersionController().getContentVersionVOList(contentTypeDefinitionVO.getId(), null, languageVO.getId(), false, Integer.parseInt(CmsPropertyHandler.getOperatingMode()), lastContentVersionId, numberOfVersionToIndexInBatch, numberOfVersionToIndexInBatch*10, true, db, true, maxContentVersionId); } logger.info("versions:" + versions.size()); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Index all : getContentVersionVOList", t.getElapsedTime()); logger.info("versions in getContentNotificationMessages:" + versions.size()); logger.info("Looping versions:" + versions.size()); for(ContentVersionVO version : versions) { if(stopIndexing.get()) return newLastContentVersionId; Document documents = getSiteNodeDocument(version, writer, db); if (documents != null) { String uid = documents.get("uid"); logger.debug("Regging doc: " + documents); writer.deleteDocuments(new Term("uid", "" + uid)); if(logger.isDebugEnabled()) logger.debug("Adding document with uid:" + uid + " - " + documents); writer.addDocument(documents); } else if(logger.isInfoEnabled()) { logger.info("Failed to get document for SiteNode. Meta info content.id: " + version.getContentVersionId()); } /* logger.info("version assetCount:" + version.getAssetCount()); if(version.getAssetCount() == null || version.getAssetCount() > 0) { List digitalAssetVOList = DigitalAssetController.getDigitalAssetVOList(version.getId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDigitalAssetVOList", (t.getElapsedTimeNanos() / 1000)); if(digitalAssetVOList.size() > 0) { logger.info("digitalAssetVOList:" + digitalAssetVOList.size()); Iterator digitalAssetVOListIterator = digitalAssetVOList.iterator(); while(digitalAssetVOListIterator.hasNext()) { DigitalAssetVO assetVO = (DigitalAssetVO)digitalAssetVOListIterator.next(); NotificationMessage assetNotificationMessage = new NotificationMessage("LuceneController", DigitalAssetImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, assetVO.getId(), "dummy"); notificationMessages.add(assetNotificationMessage); } } } NotificationMessage notificationMessage = new NotificationMessage("LuceneController", ContentVersionImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, version.getId(), "dummy"); notificationMessages.add(notificationMessage); */ newLastContentVersionId = version.getId().intValue(); } logger.info("Finished round 3:" + notificationMessages.size() + ":" + newLastContentVersionId); } catch ( Exception e ) { rollbackTransaction(db); throw new SystemException("An error occurred when we tried to fetch a list of users in this role. Reason:" + e.getMessage(), e); } finally { try{setLastCommitedContentVersionId(writer, newLastContentVersionId); writer.commit(); /*writer.close(true);*/}catch (Exception e) {e.printStackTrace();} } commitTransaction(db); return newLastContentVersionId; } public void testSQL() { try { getNotificationMessages(new ArrayList(), LanguageController.getController().getLanguageVOWithCode("sv"), 100000, new Date(), 1000); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); logger.error("Errro:" + e.getMessage(), e); } } private int getNotificationMessages(List notificationMessages, LanguageVO languageVO, int lastContentVersionId, Date lastCheckDateTime, int batchSize) throws Exception { Timer t = new Timer(); logger.info("getNotificationMessages:" + languageVO.getName() + " : " + lastContentVersionId + ":" + lastCheckDateTime); int newLastContentVersionId = -1; Database db = CastorDatabaseService.getDatabase(); try { beginTransaction(db); logger.info("**************Getting contents start:" + t.getElapsedTime() + ":" + lastCheckDateTime); Calendar date = Calendar.getInstance(); date.setTime(lastCheckDateTime); date.add(Calendar.DAY_OF_YEAR, -1); //String SQL = "select cv.contentVersionId, cv.stateId, cv.modifiedDateTime, cv.versionComment, cv.isCheckedOut, cv.isActive, cv.contentId, cv.languageId, cv.versionModifier FROM cmContentVersion cv where cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contentVersionId > $3 AND cv.contentVersionId < $4) OR cv.modifiedDateTime > $5) ORDER BY cv.contentVersionId"; //if(CmsPropertyHandler.getUseShortTableNames() != null && CmsPropertyHandler.getUseShortTableNames().equalsIgnoreCase("true")) // SQL = "select cv.contVerId, cv.stateId, cv.modifiedDateTime, cv.verComment, cv.isCheckedOut, cv.isActive, cv.contId, cv.languageId, cv.versionModifier FROM cmContVer cv where cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contVerId > $3 AND cv.contVerId < $4) OR cv.modifiedDateTime > TO_DATE('2013-03-20','YYYY-MM-DD')) ORDER BY cv.contVerId"; //System.out.println("SQL:" + SQL); //OQLQuery oql = db.getOQLQuery("CALL SQL " + SQL + " AS org.infoglue.cms.entities.content.impl.simple.SmallestContentVersionImpl"); //if(CmsPropertyHandler.getUseShortTableNames() != null && CmsPropertyHandler.getUseShortTableNames().equalsIgnoreCase("true")) // oql = db.getOQLQuery("CALL SQL " + SQL + " AS org.infoglue.cms.entities.content.impl.simple.SmallestContentVersionImpl"); //oracle.sql.DATE oracleDate = new oracle.sql.DATE(new java.sql.Date(date.getTime().getTime())); OQLQuery oql = db.getOQLQuery( "SELECT cv FROM " + SmallestContentVersionImpl.class.getName() + " cv WHERE cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contentVersionId > $3 AND cv.contentVersionId < $4) OR cv.modifiedDateTime > $5) ORDER BY cv.contentVersionId limit $6"); //OQLQuery oql = db.getOQLQuery( "SELECT cv FROM " + SmallestContentVersionImpl.class.getName() + " cv WHERE cv.languageId = $1 AND cv.isActive = $2 AND ((cv.contentVersionId > $3 AND cv.contentVersionId < $4)) ORDER BY cv.contentVersionId limit $5"); oql.bind(languageVO.getId()); oql.bind(true); oql.bind(lastContentVersionId); oql.bind(lastContentVersionId+(batchSize*10)); //oql.bind(date.getTime()); oql.bind(date.getTime()); oql.bind(batchSize); QueryResults results = oql.execute(Database.READONLY); if(logger.isInfoEnabled()) logger.info("Getting contents took: " + t.getElapsedTime()); int processedItems = 0; Integer previousContentId = null; while (results.hasMore()) { SmallestContentVersionImpl smallestContentVersionImpl = (SmallestContentVersionImpl)results.next(); if(previousContentId == null || !previousContentId.equals(smallestContentVersionImpl.getContentId())) { List digitalAssetVOList = DigitalAssetController.getDigitalAssetVOList(smallestContentVersionImpl.getId(), db); if(digitalAssetVOList.size() > 0) { logger.info("digitalAssetVOList:" + digitalAssetVOList.size()); Iterator digitalAssetVOListIterator = digitalAssetVOList.iterator(); while(digitalAssetVOListIterator.hasNext()) { DigitalAssetVO assetVO = (DigitalAssetVO)digitalAssetVOListIterator.next(); if(assetVO.getAssetFileSize() < 10000000) //Do not index large files { NotificationMessage assetNotificationMessage = new NotificationMessage("LuceneController", DigitalAssetImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, assetVO.getId(), "dummy"); notificationMessages.add(assetNotificationMessage); } } } NotificationMessage notificationMessage = new NotificationMessage("LuceneController", ContentVersionImpl.class.getName(), "SYSTEM", NotificationMessage.TRANS_UPDATE, smallestContentVersionImpl.getId(), "dummy"); notificationMessages.add(notificationMessage); previousContentId = smallestContentVersionImpl.getContentId(); } newLastContentVersionId = smallestContentVersionImpl.getId().intValue(); lastCommitedContentVersionId = newLastContentVersionId; processedItems++; logger.info("previousContentId:" + previousContentId + "/" + processedItems); if(processedItems > batchSize) { System.out.println("Batch full..."); break; } } results.close(); logger.info("Finished round 4:" + processedItems + ":" + newLastContentVersionId); } catch ( Exception e ) { rollbackTransaction(db); throw new SystemException("An error occurred when we tried to fetch a list of users in this role. Reason:" + e.getMessage(), e); } commitTransaction(db); return newLastContentVersionId; } private void indexInformation(NotificationMessage notificationMessage, IndexWriter writer, List<NotificationMessage> internalMessageList, Boolean forceVersionIndexing, Database db) { Timer t = new Timer(); try { try { //writer.setMaxMergeDocs(500000); if(logger.isInfoEnabled()) logger.info("Indexing to directory '" + writer.getDirectory().toString() + "'..."); List<Document> documents = getDocumentsForIncremental(notificationMessage, writer, forceVersionIndexing, db); Iterator<Document> documentsIterator = documents.iterator(); while(documentsIterator.hasNext()) { Document indexingDocument = documentsIterator.next(); String uid = indexingDocument.get("uid"); if(logger.isDebugEnabled()) logger.debug("Adding document with uid:" + uid + " - " + indexingDocument); //logger.error("Adding document with uid:" + uid + " - " + indexingDocument); if(indexingDocument != null) writer.addDocument(indexingDocument); } } catch (Exception e) { logger.error("Error indexing:" + e.getMessage(), e); } finally { indexedDocumentsSinceLastOptimize++; if(indexedDocumentsSinceLastOptimize > 1000) { indexedDocumentsSinceLastOptimize = 0; } } } catch (Exception e) { logger.error("Error indexing:" + e.getMessage(), e); } } private List<Document> getDocumentsForIncremental(NotificationMessage notificationMessage, IndexWriter writer, Boolean forceVersionIndexing, Database db) throws Exception { Timer t = new Timer(); List<Document> returnDocuments = new ArrayList<Document>(); logger.info("2222222222 notificationMessage.getClassName():" + notificationMessage.getClassName() + " in " + CmsPropertyHandler.getApplicationName()); Set<Integer> contentIdsToIndex = new HashSet<Integer>(); Set<Integer> siteNodeIdsToIndex = new HashSet<Integer>(); if(notificationMessage.getClassName().equals(ContentImpl.class.getName()) || notificationMessage.getClassName().equals(Content.class.getName())) { logger.info("++++++++++++++Got an Content notification: " + notificationMessage.getObjectId()); ContentVO contentVO = ContentController.getContentController().getContentVOWithId((Integer)notificationMessage.getObjectId(), db); //ContentVO contentVO = ContentController.getContentController().getContentVOWithId((Integer)notificationMessage.getObjectId()); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000)); contentIdsToIndex.add(contentVO.getId()); } else if(notificationMessage.getClassName().equals(ContentVersionImpl.class.getName()) || notificationMessage.getClassName().equals(ContentVersion.class.getName())) { logger.info("++++++++++++++Got an ContentVersion notification: " + notificationMessage.getObjectId()); ContentVersionVO contentVersionVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId((Integer)notificationMessage.getObjectId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVersionVOWithId", t.getElapsedTime()); contentIdsToIndex.add(contentVersionVO.getContentId()); } else if(notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName()) || notificationMessage.getClassName().equals(DigitalAsset.class.getName())) { logger.info("++++++++++++++Got an DigitalAssetImpl notification: " + notificationMessage.getObjectId()); Database db2 = CastorDatabaseService.getDatabase(); beginTransaction(db2); try { DigitalAssetVO asset = DigitalAssetController.getController().getLocklessSmallDigitalAssetVOWithId((Integer)notificationMessage.getObjectId(), db2); //DigitalAssetVO asset = DigitalAssetController.getSmallDigitalAssetVOWithId((Integer)notificationMessage.getObjectId(), db2); if(asset != null) { List<SmallestContentVersionVO> contentVersionVOList = DigitalAssetController.getContentVersionVOListConnectedToAssetWithId((Integer)notificationMessage.getObjectId()); if(logger.isInfoEnabled()) logger.info("contentVersionVOList:" + contentVersionVOList.size()); Iterator<SmallestContentVersionVO> contentVersionsIterator = contentVersionVOList.iterator(); while(contentVersionsIterator.hasNext()) { SmallestContentVersionVO version = contentVersionsIterator.next(); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("contentVersionsIterator", t.getElapsedTime()); ContentVersionVO cvVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId(version.getId(), db2); Document document = getDocumentFromDigitalAsset(asset, cvVO, db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromDigitalAsset", t.getElapsedTime()); logger.info("00000000000000000: Adding asset document:" + document); if(document != null) returnDocuments.add(document); } } else { logger.info("Asset was probably deleted - ignoring it."); } commitTransaction(db2); } catch(Exception e) { logger.error("An error occurred so we should not complete the transaction:" + e, e); rollbackTransaction(db2); throw new SystemException(e.getMessage()); } } else if(notificationMessage.getClassName().equals(SiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(SiteNode.class.getName()) || notificationMessage.getClassName().equals(SmallSiteNodeImpl.class.getName()) || notificationMessage.getClassName().equals(PureSiteNodeImpl.class.getName())) { SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId((Integer)notificationMessage.getObjectId(), db); if (siteNodeVO == null) { logger.warn("Could not find SiteNode with id: " + notificationMessage.getObjectId()); } else { siteNodeIdsToIndex.add(siteNodeVO.getId()); } } logger.info("Indexing:" + siteNodeIdsToIndex.size()); for(Integer siteNodeId : siteNodeIdsToIndex) { //Deleting all info based on content Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); logger.info("Deleting all info on:" + siteNodeId); Query query = new QueryParser(Version.LUCENE_34, "siteNodeId", analyzer).parse("" + siteNodeId); writer.deleteDocuments(query); //End logger.info("QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ:" + notificationMessage.getObjectId()); SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId((Integer)notificationMessage.getObjectId(), db); logger.info("$$$$$$$$$$Getting doc for " + siteNodeVO.getName()); Document document = getDocumentFromSiteNode(siteNodeVO, writer, db); logger.info("document " + document); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromSiteNode", t.getElapsedTime()); if(document != null) returnDocuments.add(document); } logger.info("Indexing contentIdsToIndex:" + contentIdsToIndex.size()); for(Integer contentId : contentIdsToIndex) { //Deleting all info based on content Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); logger.info("Deleting all info on:" + contentId); String[] fields = new String[]{"isAsset","contentId"}; String[] queries = new String[]{"true","" + contentId}; BooleanClause.Occur[] flags = new BooleanClause.Occur[]{BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.MUST}; Query query = MultiFieldQueryParser.parse(Version.LUCENE_34, queries, fields, flags, analyzer); //Query query = new QueryParser(Version.LUCENE_34, "contentId", analyzer).parse("" + contentId); writer.deleteDocuments(query); //End ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentId, db); Document document = getDocumentFromContent(contentVO, notificationMessage, writer, forceVersionIndexing, db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromContent", (t.getElapsedTimeNanos() / 1000)); if(document != null) { returnDocuments.add(document); logger.info("++++++++++++++Forcing cv indexing"); List<ContentVersionVO> versions = new ArrayList<ContentVersionVO>(); if(CmsPropertyHandler.getApplicationName().equalsIgnoreCase("cms")) { //List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(contentVO.getRepositoryId()); List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(contentVO.getRepositoryId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLanguageVOList", (t.getElapsedTimeNanos() / 1000)); for(LanguageVO language : languages) { ContentVersionVO latestVersion = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(contentVO.getId(), language.getId(), Integer.parseInt(CmsPropertyHandler.getOperatingMode()), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLatestActiveContentVersionVO", (t.getElapsedTimeNanos() / 1000)); if(latestVersion != null) versions.add(latestVersion); ContentVersionVO latestVersionPublishedVersion = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(contentVO.getId(), language.getId(), ContentVersionVO.PUBLISHED_STATE, db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLatestActiveContentVersionVO", (t.getElapsedTimeNanos() / 1000)); if(latestVersionPublishedVersion != null && latestVersionPublishedVersion.getId().intValue() != latestVersion.getId().intValue()) versions.add(latestVersionPublishedVersion); } } else { List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(contentVO.getRepositoryId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLanguageVOList", (t.getElapsedTimeNanos() / 1000)); for(LanguageVO language : languages) { ContentVersionVO version = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(contentVO.getId(), language.getId(), Integer.parseInt(CmsPropertyHandler.getOperatingMode()), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getLatestActiveContentVersionVO", (t.getElapsedTimeNanos() / 1000)); if(version != null) versions.add(version); } } logger.info("versions:" + versions.size()); for(ContentVersionVO version : versions) { logger.info("version:" + version.getId()); Document versionDocument = getDocumentFromContentVersion(version, db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromContentVersion", t.getElapsedTime()); if(versionDocument != null) returnDocuments.add(versionDocument); if(version.getId() > this.lastCommitedContentVersionId) lastCommitedContentVersionId = version.getId(); } } } return returnDocuments; } private List<Document> getDocumentsForContentVersion(ContentVersionVO contentVersionVO, Database db) throws Exception { Timer t = new Timer(); List<Document> returnDocuments = new ArrayList<Document>(); //ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db); //RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000)); Document document = getDocumentFromContentVersion(contentVersionVO, db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDocumentFromContentVersion", t.getElapsedTime()); if(document != null) returnDocuments.add(document); return returnDocuments; } public Document getDocumentFromSiteNode(SiteNodeVO siteNodeVO, IndexWriter writer, Database db) throws Exception, InterruptedException { logger.info("getDocumentFromSiteNode:" + siteNodeVO.getName() + ":" + siteNodeVO.getIsDeleted()); if(siteNodeVO == null || siteNodeVO.getIsDeleted()) { logger.info("Adding a delete directive to the indexer"); String uid = "siteNodeId_" + siteNodeVO.getId(); logger.info("Deleting documents:" + "uid=" + uid); logger.info("Before delete:" + writer.numDocs()); //writer.deleteDocuments(new Term("uid", "" + uid)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); Query query = new QueryParser(Version.LUCENE_34, "siteNodeId", analyzer).parse("" + siteNodeVO.getId()); writer.deleteDocuments(query); logger.info("Before delete:" + writer.numDocs()); return null; } // make a new, empty document Document doc = new Document(); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(siteNodeVO.getPublishDateTime().getTime())); doc.add(new Field("modified", DateTools.timeToString(new Date().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("siteNodeId", "" + siteNodeVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("repositoryId", "" + siteNodeVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastModifier", "" + siteNodeVO.getCreatorName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isSiteNode", "true", Field.Store.YES, Field.Index.NOT_ANALYZED)); SiteNodeVersionVO siteNodeVersionVO = SiteNodeVersionController.getController().getLatestActiveSiteNodeVersionVO(db, siteNodeVO.getId()); if(siteNodeVersionVO != null) { doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(siteNodeVersionVO.getModifiedDateTime().getTime())); doc.add(new Field("siteNodeVersionId", "" + siteNodeVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("stateId", "" + siteNodeVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path", "" + getSiteNodePath(siteNodeVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED)); } // Add the uid as a field, so that index can be incrementally // maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", "siteNodeId_" + siteNodeVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the tag-stripped contents as a Reader-valued Text field so it // will // get tokenized and indexed. doc.add(new Field("contents", new StringReader(siteNodeVO.getName()))); if(siteNodeVO.getMetaInfoContentId() != null && siteNodeVO.getMetaInfoContentId() > -1) { List<LanguageVO> languages = LanguageController.getController().getLanguageVOList(siteNodeVO.getRepositoryId(), db); for(LanguageVO language : languages) { ContentVersionVO cvVO = ContentVersionController.getContentVersionController().getLatestActiveContentVersionVO(siteNodeVO.getMetaInfoContentId(), language.getId(), Integer.parseInt(CmsPropertyHandler.getOperatingMode()), db); if(cvVO != null) doc.add(new Field("contents", new StringReader(cvVO.getVersionValue()))); } } // return the document return doc; } public Document getSiteNodeDocument(ContentVersionVO contentVersionVO, IndexWriter writer, Database db) throws Exception, InterruptedException { Timer t = new Timer(); ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000)); if(contentVO.getIsDeleted()) return null; if (contentVersionVO.getSiteNodeId() == null || contentVersionVO.getSiteNodeName() == null) { logger.warn("Content version does not have a SiteNode connected. Will not index content version. ContentVersion.id: " + contentVersionVO.getContentVersionId()); return null; } // make a new, empty document Document doc = new Document(); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime())); doc.add(new Field("modified", DateTools.timeToString(new Date().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("siteNodeId", "" + contentVersionVO.getSiteNodeId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastModifier", "" + contentVersionVO.getVersionModifier(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isSiteNode", "true", Field.Store.YES, Field.Index.NOT_ANALYZED)); //doc.add(new Field("contentTypeDefinitionId", "" + ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithName("Meta info", db).getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); try { SiteNodeVersionVO siteNodeVersionVO = SiteNodeVersionController.getController().getLatestActiveSiteNodeVersionVO(db, contentVersionVO.getSiteNodeId()); if(siteNodeVersionVO != null) doc.add(new Field("siteNodeVersionId", "" + siteNodeVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); else logger.warn("No site node version found on siteNode: " + contentVersionVO.getSiteNodeId()); } catch (Exception e) { e.printStackTrace(); } doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime())); doc.add(new Field("stateId", "" + contentVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path", "" + getSiteNodePath(contentVersionVO.getSiteNodeId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the uid as a field, so that index can be incrementally // maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", "siteNodeId_" + contentVersionVO.getSiteNodeId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the tag-stripped contents as a Reader-valued Text field so it // will // get tokenized and indexed. String pageName = contentVersionVO.getSiteNodeName(); if(pageName == null) { logger.info("Have to read again..."); SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(contentVersionVO.getSiteNodeId(), db); pageName = siteNodeVO.getName(); } String versionValue = contentVersionVO.getVersionValue(); if(versionValue == null) { logger.info("Have to read version again..."); ContentVersionVO cvVO = ContentVersionController.getContentVersionController().getContentVersionVOWithId(contentVersionVO.getContentVersionId(), db); versionValue = cvVO.getVersionValue(); } doc.add(new Field("contents", new StringReader(versionValue))); doc.add(new Field("contents", new StringReader(pageName))); // return the document return doc; } public Document getDocumentFromContent(ContentVO contentVO, NotificationMessage message, IndexWriter writer, boolean indexVersions, Database db) throws Exception, InterruptedException { logger.info("getDocumentFromContent:" + contentVO.getName() + ":" + contentVO.getIsDeleted()); if(contentVO == null || contentVO.getIsDeleted()) { //NotificationMessage notificationMessage = new NotificationMessage(message.getName(), message.getClassName(), message.getSystemUserName(), NotificationMessage.TRANS_DELETE, message.getObjectId(), message.getObjectName()); logger.info("Adding a delete directive to the indexer"); //internalMessageList.add(notificationMessage); String uid = "contentId_" + contentVO.getId(); logger.info("Deleting documents:" + "uid=" + uid); logger.info("Before delete:" + writer.numDocs()); //writer.deleteDocuments(new Term("uid", "" + uid)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); Query query = new QueryParser(Version.LUCENE_34, "contentId", analyzer).parse("" + contentVO.getId()); writer.deleteDocuments(query); logger.info("Before delete:" + writer.numDocs()); return null; } // make a new, empty document Document doc = new Document(); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(contentVO.getPublishDateTime().getTime())); doc.add(new Field("modified", DateTools.timeToString(new Date().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentId", "" + contentVO.getContentId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentTypeDefinitionId", "" + contentVO.getContentTypeDefinitionId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastModifier", "" + contentVO.getCreatorName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path", "" + getContentPath(contentVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the uid as a field, so that index can be incrementally // maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", "contentId_" + contentVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the tag-stripped contents as a Reader-valued Text field so it // will // get tokenized and indexed. doc.add(new Field("contents", new StringReader(contentVO.getName()))); // return the document return doc; } public Document getDocumentFromContentVersion(ContentVersionVO contentVersionVO, Database db) throws Exception, InterruptedException { logger.info("getting document from content version:" + contentVersionVO.getContentName()); Timer t = new Timer(); //ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId()); ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentVOWithId", (t.getElapsedTimeNanos() / 1000)); if(contentVO.getIsDeleted()) return null; // make a new, empty document Document doc = new Document(); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. logger.info("contentVersionVO:" + contentVersionVO.getContentName()); doc.add(new NumericField("publishDateTime", Field.Store.YES, true).setLongValue(contentVO.getPublishDateTime().getTime())); doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime())); doc.add(new Field("modified", DateTools.timeToString(contentVersionVO.getModifiedDateTime().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentVersionId", "" + contentVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentId", "" + contentVersionVO.getContentId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentTypeDefinitionId", "" + contentVO.getContentTypeDefinitionId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("languageId", "" + contentVersionVO.getLanguageId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastModifier", "" + contentVersionVO.getVersionModifier(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("stateId", "" + contentVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isAsset", "false", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path", "" + getContentPath(contentVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED)); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing normalFields", (t.getElapsedTimeNanos() / 1000)); //Testing adding the categories for this version try { if(contentVO.getContentTypeDefinitionId() != null) { ContentTypeDefinitionVO ctdVO = null; try { ctdVO = ContentTypeDefinitionController.getController().getContentTypeDefinitionVOWithId(contentVO.getContentTypeDefinitionId(), db); } catch (SystemException sex) { logger.warn("Failed to get the content type definition for content with Id: " + contentVO.getContentId() + ". The categories for the content will not be indexed. Message: " + sex.getMessage()); logger.info("Failed to get the content type definition for content with Id: " + contentVO.getContentId(), sex); } if (ctdVO != null) { RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getContentTypeDefinitionVOWithId", (t.getElapsedTimeNanos() / 1000)); List<CategoryAttribute> categoryKeys = ContentTypeDefinitionController.getController().getDefinedCategoryKeys(ctdVO, true); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("getDefinedCategoryKeys", (t.getElapsedTimeNanos() / 1000)); for(CategoryAttribute categoryKey : categoryKeys) { logger.info("categoryKey:" + categoryKey.getValue() + " for content:" + contentVO.getName()); //List<ContentCategoryVO> contentCategoryVOList = ContentCategoryController.getController().findByContentVersionAttribute(categoryKey.getValue(), contentVersionVO.getId()); List<ContentCategory> contentCategoryVOList = ContentCategoryController.getController().findByContentVersionAttribute(categoryKey.getValue(), contentVersionVO.getId(), db, true); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing categories", (t.getElapsedTimeNanos() / 1000)); logger.info("contentCategoryVOList:" + contentCategoryVOList.size()); for(ContentCategory contentCategory : contentCategoryVOList) { doc.add(new Field("categories", "" + contentCategory.getAttributeName().replaceAll(" ", "_").toLowerCase() + "eq" + contentCategory.getCategory().getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("categories", "" + contentCategory.getAttributeName() + "=" + contentCategory.getCategory().getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("" + contentCategory.getAttributeName() + "_categoryId", "" + contentCategory.getCategory().getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } } } } } catch (Exception e) { logger.error("Problem indexing categories for contentVO: " + contentVO.getName() + "(" + contentVO.getId() + "): " + e.getMessage(), e); } RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing categories", (t.getElapsedTimeNanos() / 1000)); //End test // Add the uid as a field, so that index can be incrementally // maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", "contentVersionId_" + contentVersionVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the tag-stripped contents as a Reader-valued Text field so it // will // get tokenized and indexed. doc.add(new Field("contents", new StringReader(contentVersionVO.getVersionValue()))); doc.add(new Field("contents", new StringReader(contentVersionVO.getContentName()))); RequestAnalyser.getRequestAnalyser().registerComponentStatistics("Indexing end fields", (t.getElapsedTimeNanos() / 1000)); // return the document return doc; } public Document getDocumentFromDigitalAsset(DigitalAssetVO digitalAssetVO, ContentVersionVO contentVersionVO, Database db) throws Exception, InterruptedException { //ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentVersionVO.getContentId(), db); ContentVO contentVO = ContentController.getContentController().getLocklessContentVOWithId(contentVersionVO.getContentId(), db); if(contentVO == null || contentVO.getIsDeleted()) return null; // make a new, empty document Document doc = new Document(); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. //doc.add(new Field("modified", DateTools.timeToString(contentVersionVO.getModifiedDateTime().getTime(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new NumericField("modificationDateTime", Field.Store.YES, true).setLongValue(contentVersionVO.getModifiedDateTime().getTime())); doc.add(new Field("digitalAssetId", "" + digitalAssetVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentVersionId", "" + contentVersionVO.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentId", "" + contentVersionVO.getContentId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contentTypeDefinitionId", "" + contentVO.getContentTypeDefinitionId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("languageId", "" + contentVersionVO.getLanguageId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("repositoryId", "" + contentVO.getRepositoryId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("lastModifier", "" + contentVersionVO.getVersionModifier(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("stateId", "" + contentVersionVO.getStateId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("isAsset", "true", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path", "" + getContentPath(contentVO.getId(), db), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Add the uid as a field, so that index can be incrementally // maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", "digitalAssetId_" + digitalAssetVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); //doc.add(new Field("uid", "" + contentVersionVO.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the tag-stripped contents as a Reader-valued Text field so it // will // get tokenized and indexed. doc.add(new Field("contents", new StringReader(digitalAssetVO.getAssetKey() + " " + digitalAssetVO.getAssetFileName() + " " + digitalAssetVO.getAssetContentType()))); if (CmsPropertyHandler.getIndexDigitalAssetContent()) { //String url = DigitalAssetController.getController().getDigitalAssetUrl(digitalAssetVO, db); //if(logger.isInfoEnabled()) // logger.info("url if we should index file:" + url); try { String filePath = DigitalAssetController.getController().getDigitalAssetFilePath(digitalAssetVO, db); if(logger.isInfoEnabled()) logger.info("filePath if we should index file:" + filePath); File file = new File(filePath); String text = extractTextToIndex(digitalAssetVO, file); doc.add(new Field("contents", new StringReader(text))); } catch(Exception e) { logger.warn("Problem getting asset:" + digitalAssetVO.getId() + ": " + e.getMessage()); } } return doc; } private String extractTextToIndex(DigitalAssetVO digitalAssetVO, File file) { String text = ""; if(logger.isInfoEnabled()) logger.info("Asset content type:" + digitalAssetVO.getAssetContentType()); if(digitalAssetVO.getAssetContentType().equalsIgnoreCase("application/pdf")) { try { Writer output = null; PDDocument document = null; try { document = PDDocument.load(file); ByteArrayOutputStream baos = new ByteArrayOutputStream(); if(!document.isEncrypted()) { output = new OutputStreamWriter(baos, "UTF-8"); PDFTextStripper stripper = new PDFTextStripper(); //stripper.setSortByPosition( sort ); //stripper.setStartPage( startPage ); //stripper.setEndPage( endPage ); stripper.writeText( document, output ); text = baos.toString("UTF-8"); if(logger.isInfoEnabled()) logger.info("PDF Document has " + text.length() + " chars\n\n" + text); } } catch (Exception e) { logger.warn("Error indexing file: " + file + "\nMessage: " + e.getMessage()); } finally { if( output != null ) { output.close(); } if( document != null ) { document.close(); } } } catch (Exception e) { logger.warn("Error indexing:" + e.getMessage()); } } else if(digitalAssetVO.getAssetContentType().equalsIgnoreCase("application/msword")) { try { InputStream is = new FileInputStream(file); POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(file)); is.close(); // Create a document for this file HWPFDocument doc = new HWPFDocument(fs); // Create a WordExtractor to read the text of the word document WordExtractor we = new WordExtractor(doc); // Extract all paragraphs in the document as strings text = we.getText(); // Output the document if(logger.isInfoEnabled()) logger.info("Word Document has " + text.length() + " chars\n\n" + text); } catch (Exception e) { logger.warn("Error indexing file: " + file + "\nMessage: " + e.getMessage()); } } return text; } public void deleteVersionFromIndex(String contentVersionId) { try { IndexWriter writer = getIndexWriter(); logger.info("Deleting contentVersionId:" + contentVersionId); writer.deleteDocuments(new Term("contentVersionId", "" + contentVersionId)); writer.commit(); } catch (Exception e) { logger.error("Error deleteVersionFromIndex:" + e.getMessage(), e); } } public String getContentPath(Integer contentId, Database db) throws Exception { StringBuffer sb = new StringBuffer(); ContentVO contentVO = ContentController.getContentController().getContentVOWithId(contentId, db); if (contentVO.getName() == null || contentVO.getName().equals("")) { sb.insert(0, "]"); sb.insert(0, contentVO.getId()); sb.insert(0, "["); } else { sb.insert(0, contentVO.getName()); } while(contentVO.getParentContentId() != null) { contentVO = ContentController.getContentController().getContentVOWithId(contentVO.getParentContentId(), db); sb.insert(0, "/"); if (contentVO.getName() == null || contentVO.getName().equals("")) { sb.insert(0, "]"); sb.insert(0, contentVO.getId()); sb.insert(0, "["); } else { sb.insert(0, contentVO.getName()); } } sb.insert(0, "/"); return sb.toString(); } public String getSiteNodePath(Integer siteNodeId, Database db) throws Exception { StringBuffer sb = new StringBuffer(); SiteNodeVO siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(siteNodeId, db); while(siteNodeVO != null) { sb.insert(0, "/" + siteNodeVO.getName()); if(siteNodeVO.getParentSiteNodeId() != null) siteNodeVO = SiteNodeController.getController().getSiteNodeVOWithId(siteNodeVO.getParentSiteNodeId(), db); else siteNodeVO = null; } return sb.toString(); } /** * This is a method that never should be called. */ public BaseEntityVO getNewVO() { return null; } public void setContextParameters(Map map) { // TODO Auto-generated method stub } }