/* * Copyright (C) 2008-2013 University of Dundee & Open Microscopy Environment. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package ome.services.fulltext; import ome.model.IAnnotated; import ome.model.IGlobal; import ome.model.IMutable; import ome.model.IObject; import ome.model.meta.EventLog; import ome.services.eventlogs.EventLogFailure; import ome.services.eventlogs.EventLogLoader; import ome.services.eventlogs.PersistentEventLogLoader; import ome.services.util.Executor.SimpleWork; import ome.system.OmeroContext; import ome.system.ServiceFactory; import ome.system.metrics.Histogram; import ome.system.metrics.Metrics; import ome.system.metrics.NullMetrics; import ome.system.metrics.Timer; import ome.tools.hibernate.QueryBuilder; import ome.util.SqlAction; import org.hibernate.CacheMode; import org.hibernate.FlushMode; import org.hibernate.Session; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContextAware; import org.springframework.transaction.annotation.Isolation; import org.springframework.transaction.annotation.Transactional; /** * Simple action which can be done in an asynchronous thread in order to index * Hibernate entities. Attempts to index each {@link EventLog} passed from the * {@link EventLogLoader} multiple times on failure. Eventually * * @author Josh Moore, josh at glencoesoftware.com * @since 3.0-Beta3 */ public class FullTextIndexer extends SimpleWork implements ApplicationContextAware { private final static Logger log = LoggerFactory.getLogger(FullTextIndexer.class); /** * Default number of loops to wait if no external value is set. */ public final static int DEFAULT_REPORTING_LOOPS = 100; abstract class Action { Class type; long id; IObject obj; abstract void go(FullTextSession session); abstract void log(Logger log); } class Purge extends Action { Purge(Class type, long id) { this.type = type; this.id = id; } @Override void go(FullTextSession session) { session.purge(type, id); session.flushToIndexes(); } @Override void log(Logger log) { if (log.isDebugEnabled()) { log.debug(String.format("Purged: %s:Id_%d", type, id)); } } } class Index extends Action { Index(IObject obj) { this.obj = obj; } @Override void go(FullTextSession session) { session.index(obj); session.flushToIndexes(); } @Override void log(Logger log) { if (log.isDebugEnabled()) { log.debug(String.format("Indexed: %s", obj)); } } } final protected EventLogLoader loader; final protected ParserSession parserSession; final protected Timer batchTimer; final protected Histogram completeSlow, completeFast; protected int reps = 5; protected long batch; /** * Frequency with which the percentage done should be calculated. */ protected int reportingLoops = DEFAULT_REPORTING_LOOPS; protected boolean dryRun = false; protected OmeroContext context = null; /** * Spring injector. Sets the number of indexing runs will be made if there * is a substantial backlog. */ public void setRepetitions(int reps) { this.reps = reps; } public void setReportingLoops(int loops) { this.reportingLoops = loops; } public void setDryRun(boolean dryRun) { this.dryRun = dryRun; } public void setApplicationContext(ApplicationContext ctx) { this.context = (OmeroContext) ctx; } public FullTextIndexer(EventLogLoader ll) { this(ll, new NullMetrics()); } public FullTextIndexer(EventLogLoader ll, Metrics metrics) { super("FullTextIndexer", "index"); this.loader = ll; this.parserSession = new ParserSession(); this.batchTimer = metrics.timer(this, "batch"); this.completeSlow= metrics.histogram(this, "percentCompleteSlow"); this.completeFast = metrics.histogram(this, "percentCompleteFast"); } /** * Since these instances are used repeatedly, we need to check for * already set SqlAction */ @Override public synchronized void setSqlAction(SqlAction sql) { if (getSqlAction() == null) { super.setSqlAction(sql); } } /** * Runs {@link #doIndexing(FullTextSession)} within a Lucene transaction. * {@link #doIndexing(FullTextSession)} will also be called */ @Transactional(readOnly = false, isolation = Isolation.SERIALIZABLE) public Object doWork(Session session, ServiceFactory sf) { int count = 1; int perbatch = 0; long start = System.currentTimeMillis(); Timer.Context timer = null; do { batch++; timer = batchTimer.time(); try { // ticket:1254 - // The following is non-portable and can later be refactored // for a more general solution. getSqlAction().deferConstraints(); // s.execute("set statement_timeout=10000"); // The Postgresql Driver does not currently support the // "timeout" value on @Transactional and so if a query timeout // is required, then this must be set. FullTextSession fullTextSession = Search .getFullTextSession(session); fullTextSession.setFlushMode(FlushMode.MANUAL); fullTextSession.setCacheMode(CacheMode.IGNORE); perbatch = doIndexingWithWorldRead(sf, fullTextSession); } finally { timer.stop(); count++; } } while (doMore(count)); if (perbatch == 0) { log.debug("No objects indexed"); } else { final long elapsed = (System.currentTimeMillis() - start); if (loader instanceof PersistentEventLogLoader) { long currId = ((PersistentEventLogLoader) loader).getCurrentId(); long lastId = loader.lastEventLog().getId(); String which = "~"; double perc = 0.0f; if (batchTimer.getCount() % reportingLoops == 0) { which = ""; perc = getSqlAction().getEventLogPercent( ((PersistentEventLogLoader) loader).getKey()); completeSlow.update((int) perc); } else { perc = 100.0 * ((float) currId) / ((float) lastId); completeFast.update((int) perc); } log.info(String.format("INDEXED %4s objects in batch#%-6s " + "[%7d ms.] %s%2d%% done (%d of %d)", perbatch, batch, elapsed, which, ((int) perc), currId, lastId)); } else { log.info(String.format("INDEXED %4s objects in batch#%-6s " + "[%7d ms.]", perbatch, batch, elapsed)); } } return null; } private int doIndexingWithWorldRead(ServiceFactory sf, FullTextSession session) { int rc = doIndexing(session); return rc; } public int doIndexing(FullTextSession session) { int count = 0; for (EventLog eventLog : loader) { if (dryRun) { continue; } if (eventLog != null) { handleEventLog(session, eventLog); count++; } session.flush(); parserSession.closeParsedFiles(); } return count; } protected void handleEventLog(FullTextSession session, EventLog eventLog) { String act = eventLog.getAction(); Class type = asClassOrNull(eventLog.getEntityType()); if (type != null) { long id = eventLog.getEntityId(); Action action = null; if ("DELETE".equals(act)) { action = new Purge(type, id); } else if ("REINDEX".equals(act) || "UPDATE".equals(act) || "INSERT".equals(act)) { IObject obj = get(session, type, id); if (obj == null) { // This object was deleted before the indexer caught up with // the INSERT/UDPDATE log. Though this isn't a problem itself, // this does mean that the indexer is likely going too slow. log.debug(String.format("Null returned! Purging " + "since cannot index %s:Id_%s for %s", type .getName(), id, eventLog)); action = new Purge(type, id); } else { action = new Index(obj); } } else { // Likely CHGRP-VALIDATION, PIXELDATA or similar. if (log.isDebugEnabled()) { log.debug("Unknown action type: " + act); } } if (action != null) { try { action.go(session); } catch (Exception e) { try { this.context.publishMessage(new EventLogFailure(loader, eventLog, e)); } catch (RuntimeException re) { throw re; } catch (Throwable e1) { throw new RuntimeException(e1); } } action.log(log); } } } /** * Default implementation suggests doing more if fewer than {@link #reps} * runs have been made and if there are still more than * {@link EventLogLoader#batchSize} x 100 backlog entries. * * This is based on the assumption that indexing runs roughly 120 times an * hour, so if there are more than an hours worth of batches, do extra work * to catch up. */ public boolean doMore(int count) { if (count < this.reps && loader.more() > loader.getBatchSize() * 100) { log.info(String .format("Suggesting round %s of " + "indexing to reduce backlog of %s:", count, loader.more())); return true; } return false; } protected Class asClassOrNull(String str) { try { return Class.forName(str); } catch (ClassNotFoundException e) { log.warn("Unknown entity type found in database: " + str); return null; } } protected IObject get(Session session, Class type, long id) { QueryBuilder qb = new QueryBuilder(); qb.select("this").from(type.getName(), "this"); if (IAnnotated.class.isAssignableFrom(type)) { qb.join("this.annotationLinks", "l1", true, true); qb.join("l1.child", "a1", true, true); qb.join("a1.annotationLinks", "l2", true, true); qb.join("l2.child", "a2", true, true); } if (!IGlobal.class.isAssignableFrom(type)) { if (IMutable.class.isAssignableFrom(type)) { qb.join("this.details.updateEvent", "update", false, true); } qb.join("this.details.creationEvent", "create", false, true); qb.join("this.details.owner", "owner", false, true); qb.join("this.details.group", "group", false, true); } qb.where().and("this.id = :id"); qb.param("id", id); return (IObject) qb.query(session).uniqueResult(); } }