/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.clarin.webanno.api.dao;
import static de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.ANNOTATION;
import static de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.DOCUMENT;
import static de.tudarmstadt.ukp.clarin.webanno.api.ProjectService.PROJECT;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Resource;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.comparator.LastModifiedFileComparator;
import org.apache.uima.UIMAException;
import org.apache.uima.cas.CAS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.CasCreationUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.dao.DataRetrievalFailureException;
import org.springframework.stereotype.Component;
import de.tudarmstadt.ukp.clarin.webanno.api.CasStorageService;
import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor;
import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctorException;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
import de.tudarmstadt.ukp.clarin.webanno.security.model.User;
import de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
@Component(CasStorageService.SERVICE_NAME)
public class CasStorageServiceImpl
implements CasStorageService
{
private final Logger log = LoggerFactory.getLogger(getClass());
private final Object lock = new Object();
@Value(value = "${repository.path}")
private File dir;
@Value(value = "${backup.keep.time}")
private long backupKeepTime;
@Value(value = "${backup.interval}")
private long backupInterval;
@Value(value = "${backup.keep.number}")
private int backupKeepNumber;
@Resource(name = "casDoctor")
private CasDoctor casDoctor;
public CasStorageServiceImpl()
{
// Nothing to do
}
/**
* Creates an annotation document (either user's annotation document or CURATION_USER's
* annotation document)
*
* @param aDocument
* the {@link SourceDocument}
* @param aJcas
* The annotated CAS object
* @param aUserName
* the user who annotates the document if it is user's annotation document OR the
* CURATION_USER
*/
@Override
public void writeCas(SourceDocument aDocument, JCas aJcas, String aUserName)
throws IOException
{
log.debug("Writing annotation document [{}]({}) for user [{}] in project [{}]({})",
aDocument.getName(), aDocument.getId(), aUserName, aDocument.getProject().getName(),
aDocument.getProject().getId());
// DebugUtils.smallStack();
try {
casDoctor.analyze(aDocument.getProject(), aJcas.getCas());
}
catch (CasDoctorException e) {
StringBuilder detailMsg = new StringBuilder();
detailMsg.append("CAS Doctor found problems for user ["
+ aUserName + "] in source document [" + aDocument.getName() + "] ("
+ aDocument.getId() + ") in project["
+ aDocument.getProject().getName() + "] ("
+ aDocument.getProject().getId() + ")\n");
e.getDetails().forEach(m -> detailMsg.append(
String.format("- [%s] %s%n", m.level, m.message)));
throw new DataRetrievalFailureException(detailMsg.toString());
}
catch (Exception e) {
throw new DataRetrievalFailureException("Error analyzing CAS of user ["
+ aUserName + "] in source document [" + aDocument.getName() + "] ("
+ aDocument.getId() + ") in project ["
+ aDocument.getProject().getName() + "] ("
+ aDocument.getProject().getId() + ")", e);
}
synchronized (lock) {
File annotationFolder = getAnnotationFolder(aDocument);
FileUtils.forceMkdir(annotationFolder);
final String username = aUserName;
File currentVersion = new File(annotationFolder, username + ".ser");
File oldVersion = new File(annotationFolder, username + ".ser.old");
// Save current version
try {
// Make a backup of the current version of the file before overwriting
if (currentVersion.exists()) {
renameFile(currentVersion, oldVersion);
}
// Now write the new version to "<username>.ser" or CURATION_USER.ser
DocumentMetaData md;
try {
md = DocumentMetaData.get(aJcas);
}
catch (IllegalArgumentException e) {
md = DocumentMetaData.create(aJcas);
}
md.setDocumentId(aUserName);
File targetPath = getAnnotationFolder(aDocument);
CasPersistenceUtils.writeSerializedCas(aJcas,
new File(targetPath, aUserName + ".ser"));
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID,
String.valueOf(aDocument.getProject().getId()))) {
Project project = aDocument.getProject();
log.info(
"Updated annotations for user [{}] on document [{}]({}) in project [{}]({})",
aUserName, aDocument.getName(), aDocument.getId(), project.getName(),
project.getId());
}
// If the saving was successful, we delete the old version
if (oldVersion.exists()) {
FileUtils.forceDelete(oldVersion);
}
}
catch (IOException e) {
// If we could not save the new version, restore the old one.
FileUtils.forceDelete(currentVersion);
// If this is the first version, there is no old version, so do not restore anything
if (oldVersion.exists()) {
renameFile(oldVersion, currentVersion);
}
// Now abort anyway
throw e;
}
// Manage history
if (backupInterval > 0) {
// Determine the reference point in time based on the current version
long now = currentVersion.lastModified();
// Get all history files for the current user
File[] history = annotationFolder.listFiles(new FileFilter()
{
private final Matcher matcher = Pattern.compile(
Pattern.quote(username) + "\\.ser\\.[0-9]+\\.bak").matcher("");
@Override
public boolean accept(File aFile)
{
// Check if the filename matches the pattern given above.
return matcher.reset(aFile.getName()).matches();
}
});
// Sort the files (oldest one first)
Arrays.sort(history, LastModifiedFileComparator.LASTMODIFIED_COMPARATOR);
// Check if we need to make a new history file
boolean historyFileCreated = false;
File historyFile = new File(annotationFolder, username + ".ser." + now + ".bak");
if (history.length == 0) {
// If there is no history yet but we should keep history, then we create a
// history file in any case.
FileUtils.copyFile(currentVersion, historyFile);
historyFileCreated = true;
}
else {
// Check if the newest history file is significantly older than the current one
File latestHistory = history[history.length - 1];
if (latestHistory.lastModified() + backupInterval < now) {
FileUtils.copyFile(currentVersion, historyFile);
historyFileCreated = true;
}
}
// Prune history based on number of backup
if (historyFileCreated) {
// The new version is not in the history, so we keep that in any case. That
// means we need to keep one less.
int toKeep = Math.max(backupKeepNumber - 1, 0);
if ((backupKeepNumber > 0) && (toKeep < history.length)) {
// Copy the oldest files to a new array
File[] toRemove = new File[history.length - toKeep];
System.arraycopy(history, 0, toRemove, 0, toRemove.length);
// Restrict the history to what is left
File[] newHistory = new File[toKeep];
if (toKeep > 0) {
System.arraycopy(history, toRemove.length, newHistory, 0,
newHistory.length);
}
history = newHistory;
// Remove these old files
for (File file : toRemove) {
FileUtils.forceDelete(file);
try (MDC.MDCCloseable closable = MDC.putCloseable(
Logging.KEY_PROJECT_ID,
String.valueOf(aDocument.getProject().getId()))) {
Project project = aDocument.getProject();
log.info(
"Removed surplus history file [{}] of user [{}] for "
+ "document [{}]({}) in project [{}]({})",
file.getName(), aUserName, aDocument.getName(),
aDocument.getId(), project.getName(), project.getId());
}
}
}
// Prune history based on time
if (backupKeepTime > 0) {
for (File file : history) {
if ((file.lastModified() + backupKeepTime) < now) {
FileUtils.forceDelete(file);
try (MDC.MDCCloseable closable = MDC.putCloseable(
Logging.KEY_PROJECT_ID,
String.valueOf(aDocument.getProject().getId()))) {
Project project = aDocument.getProject();
log.info(
"Removed outdated history file [{}] of user [{}] for "
+ "document [{}]({}) in project [{}]({})",
file.getName(), aUserName, aDocument.getName(),
aDocument.getId(), project.getName(), project.getId());
}
}
}
}
}
}
}
}
/**
* For a given {@link SourceDocument}, return the {@link AnnotationDocument} for the user or for
* the CURATION_USER
*
* @param aDocument
* the {@link SourceDocument}
* @param aUsername
* the {@link User} who annotates the {@link SourceDocument} or the CURATION_USER
*/
@Override
public JCas readCas(SourceDocument aDocument, String aUsername)
throws IOException
{
log.debug("Reading annotation document [{}] ({}) for user [{}] in project [{}] ({})",
aDocument.getName(), aDocument.getId(), aUsername, aDocument.getProject().getName(),
aDocument.getProject().getId());
// DebugUtils.smallStack();
synchronized (lock) {
File annotationFolder = getAnnotationFolder(aDocument);
String file = aUsername + ".ser";
try {
File serializedCasFile = new File(annotationFolder, file);
if (!serializedCasFile.exists()) {
throw new FileNotFoundException("Annotation document of user [" + aUsername
+ "] for source document [" + aDocument.getName() + "] ("
+ aDocument.getId() + ") not found in project["
+ aDocument.getProject().getName() + "] ("
+ aDocument.getProject().getId() + ")");
}
CAS cas = CasCreationUtils.createCas((TypeSystemDescription) null, null, null);
CasPersistenceUtils.readSerializedCas(cas.getJCas(), serializedCasFile);
analyzeAndRepair(aDocument, aUsername, cas);
return cas.getJCas();
}
catch (UIMAException e) {
throw new DataRetrievalFailureException("Unable to parse annotation", e);
}
}
}
@Override
public void analyzeAndRepair(SourceDocument aDocument, String aUsername, CAS aCas)
{
// Check if repairs are active - if this is the case, we only need to run the repairs
// because the repairs do an analysis as a pre- and post-condition.
if (casDoctor.isRepairsActive()) {
try {
casDoctor.repair(aDocument.getProject(), aCas);
}
catch (Exception e) {
throw new DataRetrievalFailureException("Error repairing CAS of user ["
+ aUsername + "] for document ["
+ aDocument.getName() + "] (" + aDocument.getId() + ") in project["
+ aDocument.getProject().getName() + "] ("
+ aDocument.getProject().getId() + ")", e);
}
}
// If the repairs are not active, then we run the analysis explicitly
else {
try {
casDoctor.analyze(aDocument.getProject(), aCas);
}
catch (CasDoctorException e) {
StringBuilder detailMsg = new StringBuilder();
detailMsg.append("CAS Doctor found problems for user ["
+ aUsername + "] in document [" + aDocument.getName() + "] ("
+ aDocument.getId() + ") in project["
+ aDocument.getProject().getName() + "] ("
+ aDocument.getProject().getId() + ")\n");
e.getDetails().forEach(m -> detailMsg.append(
String.format("- [%s] %s%n", m.level, m.message)));
throw new DataRetrievalFailureException(detailMsg.toString());
}
catch (Exception e) {
throw new DataRetrievalFailureException("Error analyzing CAS of user ["
+ aUsername + "] in document [" + aDocument.getName() + "] ("
+ aDocument.getId() + ") in project["
+ aDocument.getProject().getName() + "] ("
+ aDocument.getProject().getId() + ")", e);
}
}
}
/**
* Get the folder where the annotations are stored. Creates the folder if necessary.
*
* @throws IOException
* if the folder cannot be created.
*/
@Override
public File getAnnotationFolder(SourceDocument aDocument)
throws IOException
{
File annotationFolder = new File(dir, PROJECT + aDocument.getProject().getId() + DOCUMENT
+ aDocument.getId() + ANNOTATION);
FileUtils.forceMkdir(annotationFolder);
return annotationFolder;
}
/**
* Renames a file.
*
* @throws IOException
* if the file cannot be renamed.
* @return the target file.
*/
private static File renameFile(File aFrom, File aTo)
throws IOException
{
if (!aFrom.renameTo(aTo)) {
throw new IOException("Cannot renamed file [" + aFrom + "] to [" + aTo + "]");
}
// We are not sure if File is mutable. This makes sure we get a new file
// in any case.
return new File(aTo.getPath());
}
}