/*******************************************************************************
* Copyright (c) 2013 aegif.
*
* This file is part of NemakiWare.
*
* NemakiWare is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NemakiWare is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with NemakiWare.
* If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* linzhixing(https://github.com/linzhixing) - initial API and implementation
******************************************************************************/
package jp.aegif.nemaki.tracker;
import static org.apache.solr.handler.extraction.ExtractingParams.UNKNOWN_FIELD_PREFIX;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import jp.aegif.nemaki.NemakiCoreAdminHandler;
import jp.aegif.nemaki.util.CmisSessionFactory;
import jp.aegif.nemaki.util.Constant;
import jp.aegif.nemaki.util.PropertyKey;
import jp.aegif.nemaki.util.PropertyManager;
import jp.aegif.nemaki.util.StringPool;
import jp.aegif.nemaki.util.NemakiTokenManager;
import jp.aegif.nemaki.util.impl.PropertyManagerImpl;
import jp.aegif.nemaki.util.yaml.RepositorySettings;
import org.apache.chemistry.opencmis.client.api.ChangeEvent;
import org.apache.chemistry.opencmis.client.api.ChangeEvents;
import org.apache.chemistry.opencmis.client.api.Session;
import org.apache.chemistry.opencmis.commons.exceptions.CmisRuntimeException;
import org.apache.chemistry.opencmis.commons.spi.CmisBinding;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.CloseHook;
import org.apache.solr.core.SolrCore;
/**
* Index tracking class
*
* @author linzhixing
*
*/
public class CoreTracker extends CloseHook {
private static final Object LOCK = new Object();
Logger logger = Logger.getLogger(CoreTracker.class);
NemakiCoreAdminHandler adminHandler;
SolrCore core;
SolrServer indexServer;
SolrServer tokenServer;
CmisBinding cmisBinding;
NemakiTokenManager nemakiTokenManager;
public CoreTracker(NemakiCoreAdminHandler adminHandler, SolrCore core, SolrServer indexServer,
SolrServer tokenServer) {
super();
this.adminHandler = adminHandler;
this.core = core;
this.indexServer = indexServer;
this.tokenServer = tokenServer;
this.nemakiTokenManager = new NemakiTokenManager();
}
public SolrServer getIndexServer() {
return indexServer;
}
@Override
public void preClose(SolrCore core) {
}
@Override
public void postClose(SolrCore core) {
}
/**
* Initialize a specified Solr core
*/
public void initCore() {
synchronized (LOCK) {
try {
// Initialize all documents
indexServer.deleteByQuery("*:*");
indexServer.commit();
logger.info(core.getName() + ":Successfully initialized!");
tokenServer.deleteByQuery("*:*");
tokenServer.commit();
logger.info(core.getName() + ":Successfully initialized!");
} catch (SolrServerException e) {
logger.error(core.getName() + ":Initialization failed!", e);
} catch (IOException e) {
logger.error(core.getName() + ":Initialization failed!", e);
}
}
}
public void initCore(String repositoryId) {
synchronized (LOCK) {
try {
// Initialize all documents
indexServer.deleteByQuery(Constant.FIELD_REPOSITORY_ID + ":" + repositoryId);
indexServer.commit();
logger.info(core.getName() + ":Successfully initialized!");
storeLatestChangeToken("", repositoryId);
} catch (SolrServerException e) {
logger.error(core.getName() + ":Initialization failed!", e);
} catch (IOException e) {
logger.error(core.getName() + ":Initialization failed!", e);
}
}
}
/**
* Read CMIS change logs and Index them
*
* @param trackingType
*/
public void index(String trackingType) {
RepositorySettings settings = CmisSessionFactory.getRepositorySettings();
for (String repositoryId : settings.getIds()) {
index(trackingType, repositoryId); // TODO multi-threding
}
}
public void index(String trackingType, String repositoryId) {
synchronized (LOCK) {
ChangeEvents changeEvents = getCmisChangeLog(trackingType, repositoryId);
if (changeEvents == null) {
return;
}
List<ChangeEvent> events = changeEvents.getChangeEvents();
// After 2nd crawling, discard the first item
// Because the specs say that it's included in the results
String token = readLatestChangeToken(repositoryId);
if (!StringUtils.isEmpty(token)) {
if (!org.apache.commons.collections.CollectionUtils.isEmpty(events)) {
events.remove(0);
}
}
if (events.isEmpty())
return;
// Parse filtering configuration
PropertyManager pm = new PropertyManagerImpl(StringPool.PROPERTIES_NAME);
boolean fulltextEnabled = Boolean.TRUE.toString()
.equalsIgnoreCase(pm.readValue(PropertyKey.SOLR_TRACKING_FULLTEXT_ENABLED));
boolean mimeTypeFilterEnabled = false; // default
List<String> allowedMimeTypeFilter = new ArrayList<String>(); // default
if (fulltextEnabled) {
String _filter = pm.readValue(PropertyKey.SOLR_TRACKING_MIMETYPE_FILTER_ENABLED);
mimeTypeFilterEnabled = Boolean.TRUE.toString().equalsIgnoreCase(_filter);
if (mimeTypeFilterEnabled) {
allowedMimeTypeFilter = pm.readValues(PropertyKey.SOLR_TRACKING_MIMETYPE);
}
}
// Extract only the last events of each objectId
List<ChangeEvent> list = extractChangeEvent(events);
PropertyManager propMgr = new PropertyManagerImpl(StringPool.PROPERTIES_NAME);
int numberOfThread = Integer.valueOf(propMgr.readValue(PropertyKey.SOLR_TRACKING_NUMBER_OF_THREAD));
int numberPerThread = list.size() / numberOfThread;
if (list.size() < numberOfThread) {
numberOfThread = list.size();
numberPerThread = 1;
}
for (int i = 0; i <= numberOfThread; i++) {
int toIndex = (numberPerThread * (i + 1) > list.size()) ? list.size() : numberPerThread * (i + 1);
List<ChangeEvent> listPerThread = list.subList(numberPerThread * i, toIndex);
Session cmisSession = CmisSessionFactory.getSession(repositoryId);
Registration registration = new Registration(cmisSession, core, indexServer, listPerThread,
fulltextEnabled, mimeTypeFilterEnabled, allowedMimeTypeFilter);
Thread t = new Thread(registration);
t.start();
try {
t.join();
} catch (InterruptedException e) {
logger.error(e);
}
}
// Save the latest token
storeLatestChangeToken(changeEvents.getLatestChangeLogToken(), repositoryId);
// In case of FUll mode, repeat until indexing all change logs
if (Constant.MODE_FULL.equals(trackingType)) {
index(Constant.MODE_FULL, repositoryId);
}
}
}
/**
* Get the last change token stored in Solr
*
* @return
*/
private String readLatestChangeToken(String repositoryId) {
SolrQuery solrQuery = new SolrQuery();
solrQuery.setQuery(Constant.FIELD_REPOSITORY_ID + ":" + repositoryId);
QueryResponse resp = null;
try {
resp = tokenServer.query(solrQuery);
} catch (SolrServerException e) {
e.printStackTrace();
}
String latestChangeToken = "";
if (resp != null && resp.getResults() != null && resp.getResults().getNumFound() != 0) {
SolrDocument doc = resp.getResults().get(0);
latestChangeToken = (String) doc.get(Constant.FIELD_TOKEN);
} else {
logger.info("No latest change token found for repository: " + repositoryId);
logger.info("Set blank latest change token for repository: " + repositoryId);
storeLatestChangeToken("", repositoryId);
}
return latestChangeToken;
}
/**
* Store the last change token in Solr
*
* @return
*/
private void storeLatestChangeToken(String token, String repositoryId) {
Map<String, Object> map = new HashMap<String, Object>();
map.put(Constant.FIELD_REPOSITORY_ID, repositoryId);
map.put(Constant.FIELD_TOKEN, token);
AbstractUpdateRequest req = buildUpdateRequest(map);
try {
tokenServer.request(req);
} catch (SolrServerException e) {
logger.error("Failed to store latest change token in Solr!", e);
} catch (IOException e) {
logger.error("Failed to store latest change token in Solr!", e);
}
}
/**
* Get CMIS change logs
*
* @param trackingType
* @return
*/
private ChangeEvents getCmisChangeLog(String trackingType, String repositoryId) {
PropertyManager propMgr = new PropertyManagerImpl(StringPool.PROPERTIES_NAME);
String _latestToken = readLatestChangeToken(repositoryId);
String latestToken = (StringUtils.isEmpty(_latestToken)) ? null : _latestToken;
long _numItems = 0;
if (Constant.MODE_DELTA.equals(trackingType)) {
_numItems = Long.valueOf(propMgr.readValue(PropertyKey.CMIS_CHANGELOG_ITEMS_DELTA));
} else if (Constant.MODE_FULL.equals(trackingType)) {
_numItems = Long.valueOf(propMgr.readValue(PropertyKey.CMIS_CHANGELOG_ITEMS_FULL));
}
long numItems = (-1 == _numItems) ? Long.MAX_VALUE : Long.valueOf(_numItems);
Session cmisSession = CmisSessionFactory.getSession(repositoryId);
if (cmisSession == null) {
return null;
}
try {
// No need for Sorting
// (Specification requires they are returned by ASCENDING)
return cmisSession.getContentChanges(latestToken, false, numItems);
} catch (CmisRuntimeException ex) {
// On error reset session.
CmisSessionFactory.clearSession(repositoryId);
throw ex;
}
}
/**
*
* @param events
* @return
*/
private List<ChangeEvent> extractChangeEvent(List<ChangeEvent> events) {
List<ChangeEvent> list = new ArrayList<ChangeEvent>();
Set<String> objectIds = new HashSet<String>();
int size = events.size();
ListIterator<ChangeEvent> iterator = events.listIterator(size);
while (iterator.hasPrevious()) {
ChangeEvent event = iterator.previous();
if (objectIds.contains(event.getObjectId())) {
continue;
} else {
objectIds.add(event.getObjectId());
list.add(event);
}
}
Collections.reverse(list);
return list;
}
/**
* Build an update request to Solr without file
*
* @param content
* @return
*/
// TODO Unify that of Registration class
private AbstractUpdateRequest buildUpdateRequest(Map<String, Object> map) {
UpdateRequest up = new UpdateRequest();
SolrInputDocument sid = new SolrInputDocument();
// Set SolrDocument parameters
Iterator<String> iterator = map.keySet().iterator();
while (iterator.hasNext()) {
String key = iterator.next();
sid.addField(key, map.get(key));
}
// Set UpdateRequest
up.add(sid);
// Ignored(for schema.xml, ignoring some SolrCell meta fields)
up.setParam(UNKNOWN_FIELD_PREFIX, "ignored_");
// Set Solr action parameter
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
return up;
}
}