/******************************************************************************* * Copyright (c) 2013 aegif. * * This file is part of NemakiWare. * * NemakiWare is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * NemakiWare is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with NemakiWare. * If not, see <http://www.gnu.org/licenses/>. * * Contributors: * linzhixing(https://github.com/linzhixing) - initial API and implementation ******************************************************************************/ package jp.aegif.nemaki.tracker; import static org.apache.solr.handler.extraction.ExtractingParams.UNKNOWN_FIELD_PREFIX; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Set; import jp.aegif.nemaki.NemakiCoreAdminHandler; import jp.aegif.nemaki.util.CmisSessionFactory; import jp.aegif.nemaki.util.Constant; import jp.aegif.nemaki.util.PropertyKey; import jp.aegif.nemaki.util.PropertyManager; import jp.aegif.nemaki.util.StringPool; import jp.aegif.nemaki.util.NemakiTokenManager; import jp.aegif.nemaki.util.impl.PropertyManagerImpl; import jp.aegif.nemaki.util.yaml.RepositorySettings; import org.apache.chemistry.opencmis.client.api.ChangeEvent; import org.apache.chemistry.opencmis.client.api.ChangeEvents; import org.apache.chemistry.opencmis.client.api.Session; import org.apache.chemistry.opencmis.commons.exceptions.CmisRuntimeException; import org.apache.chemistry.opencmis.commons.spi.CmisBinding; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.core.CloseHook; import org.apache.solr.core.SolrCore; /** * Index tracking class * * @author linzhixing * */ public class CoreTracker extends CloseHook { private static final Object LOCK = new Object(); Logger logger = Logger.getLogger(CoreTracker.class); NemakiCoreAdminHandler adminHandler; SolrCore core; SolrServer indexServer; SolrServer tokenServer; CmisBinding cmisBinding; NemakiTokenManager nemakiTokenManager; public CoreTracker(NemakiCoreAdminHandler adminHandler, SolrCore core, SolrServer indexServer, SolrServer tokenServer) { super(); this.adminHandler = adminHandler; this.core = core; this.indexServer = indexServer; this.tokenServer = tokenServer; this.nemakiTokenManager = new NemakiTokenManager(); } public SolrServer getIndexServer() { return indexServer; } @Override public void preClose(SolrCore core) { } @Override public void postClose(SolrCore core) { } /** * Initialize a specified Solr core */ public void initCore() { synchronized (LOCK) { try { // Initialize all documents indexServer.deleteByQuery("*:*"); indexServer.commit(); logger.info(core.getName() + ":Successfully initialized!"); tokenServer.deleteByQuery("*:*"); tokenServer.commit(); logger.info(core.getName() + ":Successfully initialized!"); } catch (SolrServerException e) { logger.error(core.getName() + ":Initialization failed!", e); } catch (IOException e) { logger.error(core.getName() + ":Initialization failed!", e); } } } public void initCore(String repositoryId) { synchronized (LOCK) { try { // Initialize all documents indexServer.deleteByQuery(Constant.FIELD_REPOSITORY_ID + ":" + repositoryId); indexServer.commit(); logger.info(core.getName() + ":Successfully initialized!"); storeLatestChangeToken("", repositoryId); } catch (SolrServerException e) { logger.error(core.getName() + ":Initialization failed!", e); } catch (IOException e) { logger.error(core.getName() + ":Initialization failed!", e); } } } /** * Read CMIS change logs and Index them * * @param trackingType */ public void index(String trackingType) { RepositorySettings settings = CmisSessionFactory.getRepositorySettings(); for (String repositoryId : settings.getIds()) { index(trackingType, repositoryId); // TODO multi-threding } } public void index(String trackingType, String repositoryId) { synchronized (LOCK) { ChangeEvents changeEvents = getCmisChangeLog(trackingType, repositoryId); if (changeEvents == null) { return; } List<ChangeEvent> events = changeEvents.getChangeEvents(); // After 2nd crawling, discard the first item // Because the specs say that it's included in the results String token = readLatestChangeToken(repositoryId); if (!StringUtils.isEmpty(token)) { if (!org.apache.commons.collections.CollectionUtils.isEmpty(events)) { events.remove(0); } } if (events.isEmpty()) return; // Parse filtering configuration PropertyManager pm = new PropertyManagerImpl(StringPool.PROPERTIES_NAME); boolean fulltextEnabled = Boolean.TRUE.toString() .equalsIgnoreCase(pm.readValue(PropertyKey.SOLR_TRACKING_FULLTEXT_ENABLED)); boolean mimeTypeFilterEnabled = false; // default List<String> allowedMimeTypeFilter = new ArrayList<String>(); // default if (fulltextEnabled) { String _filter = pm.readValue(PropertyKey.SOLR_TRACKING_MIMETYPE_FILTER_ENABLED); mimeTypeFilterEnabled = Boolean.TRUE.toString().equalsIgnoreCase(_filter); if (mimeTypeFilterEnabled) { allowedMimeTypeFilter = pm.readValues(PropertyKey.SOLR_TRACKING_MIMETYPE); } } // Extract only the last events of each objectId List<ChangeEvent> list = extractChangeEvent(events); PropertyManager propMgr = new PropertyManagerImpl(StringPool.PROPERTIES_NAME); int numberOfThread = Integer.valueOf(propMgr.readValue(PropertyKey.SOLR_TRACKING_NUMBER_OF_THREAD)); int numberPerThread = list.size() / numberOfThread; if (list.size() < numberOfThread) { numberOfThread = list.size(); numberPerThread = 1; } for (int i = 0; i <= numberOfThread; i++) { int toIndex = (numberPerThread * (i + 1) > list.size()) ? list.size() : numberPerThread * (i + 1); List<ChangeEvent> listPerThread = list.subList(numberPerThread * i, toIndex); Session cmisSession = CmisSessionFactory.getSession(repositoryId); Registration registration = new Registration(cmisSession, core, indexServer, listPerThread, fulltextEnabled, mimeTypeFilterEnabled, allowedMimeTypeFilter); Thread t = new Thread(registration); t.start(); try { t.join(); } catch (InterruptedException e) { logger.error(e); } } // Save the latest token storeLatestChangeToken(changeEvents.getLatestChangeLogToken(), repositoryId); // In case of FUll mode, repeat until indexing all change logs if (Constant.MODE_FULL.equals(trackingType)) { index(Constant.MODE_FULL, repositoryId); } } } /** * Get the last change token stored in Solr * * @return */ private String readLatestChangeToken(String repositoryId) { SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(Constant.FIELD_REPOSITORY_ID + ":" + repositoryId); QueryResponse resp = null; try { resp = tokenServer.query(solrQuery); } catch (SolrServerException e) { e.printStackTrace(); } String latestChangeToken = ""; if (resp != null && resp.getResults() != null && resp.getResults().getNumFound() != 0) { SolrDocument doc = resp.getResults().get(0); latestChangeToken = (String) doc.get(Constant.FIELD_TOKEN); } else { logger.info("No latest change token found for repository: " + repositoryId); logger.info("Set blank latest change token for repository: " + repositoryId); storeLatestChangeToken("", repositoryId); } return latestChangeToken; } /** * Store the last change token in Solr * * @return */ private void storeLatestChangeToken(String token, String repositoryId) { Map<String, Object> map = new HashMap<String, Object>(); map.put(Constant.FIELD_REPOSITORY_ID, repositoryId); map.put(Constant.FIELD_TOKEN, token); AbstractUpdateRequest req = buildUpdateRequest(map); try { tokenServer.request(req); } catch (SolrServerException e) { logger.error("Failed to store latest change token in Solr!", e); } catch (IOException e) { logger.error("Failed to store latest change token in Solr!", e); } } /** * Get CMIS change logs * * @param trackingType * @return */ private ChangeEvents getCmisChangeLog(String trackingType, String repositoryId) { PropertyManager propMgr = new PropertyManagerImpl(StringPool.PROPERTIES_NAME); String _latestToken = readLatestChangeToken(repositoryId); String latestToken = (StringUtils.isEmpty(_latestToken)) ? null : _latestToken; long _numItems = 0; if (Constant.MODE_DELTA.equals(trackingType)) { _numItems = Long.valueOf(propMgr.readValue(PropertyKey.CMIS_CHANGELOG_ITEMS_DELTA)); } else if (Constant.MODE_FULL.equals(trackingType)) { _numItems = Long.valueOf(propMgr.readValue(PropertyKey.CMIS_CHANGELOG_ITEMS_FULL)); } long numItems = (-1 == _numItems) ? Long.MAX_VALUE : Long.valueOf(_numItems); Session cmisSession = CmisSessionFactory.getSession(repositoryId); if (cmisSession == null) { return null; } try { // No need for Sorting // (Specification requires they are returned by ASCENDING) return cmisSession.getContentChanges(latestToken, false, numItems); } catch (CmisRuntimeException ex) { // On error reset session. CmisSessionFactory.clearSession(repositoryId); throw ex; } } /** * * @param events * @return */ private List<ChangeEvent> extractChangeEvent(List<ChangeEvent> events) { List<ChangeEvent> list = new ArrayList<ChangeEvent>(); Set<String> objectIds = new HashSet<String>(); int size = events.size(); ListIterator<ChangeEvent> iterator = events.listIterator(size); while (iterator.hasPrevious()) { ChangeEvent event = iterator.previous(); if (objectIds.contains(event.getObjectId())) { continue; } else { objectIds.add(event.getObjectId()); list.add(event); } } Collections.reverse(list); return list; } /** * Build an update request to Solr without file * * @param content * @return */ // TODO Unify that of Registration class private AbstractUpdateRequest buildUpdateRequest(Map<String, Object> map) { UpdateRequest up = new UpdateRequest(); SolrInputDocument sid = new SolrInputDocument(); // Set SolrDocument parameters Iterator<String> iterator = map.keySet().iterator(); while (iterator.hasNext()) { String key = iterator.next(); sid.addField(key, map.get(key)); } // Set UpdateRequest up.add(sid); // Ignored(for schema.xml, ignoring some SolrCell meta fields) up.setParam(UNKNOWN_FIELD_PREFIX, "ignored_"); // Set Solr action parameter up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); return up; } }