/*
* Copyright 2017
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.clarin.webanno.ui.project;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.uima.UIMAException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService;
import de.tudarmstadt.ukp.clarin.webanno.api.DocumentService;
import de.tudarmstadt.ukp.clarin.webanno.api.ImportExportService;
import de.tudarmstadt.ukp.clarin.webanno.api.ProjectService;
import de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst;
import de.tudarmstadt.ukp.clarin.webanno.automation.model.MiraTemplate;
import de.tudarmstadt.ukp.clarin.webanno.automation.service.AutomationService;
import de.tudarmstadt.ukp.clarin.webanno.constraints.ConstraintsService;
import de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationDocument;
import de.tudarmstadt.ukp.clarin.webanno.export.model.ProjectPermission;
import de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocumentState;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer;
import de.tudarmstadt.ukp.clarin.webanno.model.ConstraintSet;
import de.tudarmstadt.ukp.clarin.webanno.model.Mode;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocumentState;
import de.tudarmstadt.ukp.clarin.webanno.model.Tag;
import de.tudarmstadt.ukp.clarin.webanno.model.TagSet;
import de.tudarmstadt.ukp.clarin.webanno.security.UserDao;
import de.tudarmstadt.ukp.clarin.webanno.security.model.User;
import de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Writer;
import de.tudarmstadt.ukp.clarin.webanno.ui.project.ImportUtil;
import de.tudarmstadt.ukp.clarin.webanno.ui.project.ProjectExportException;
import de.tudarmstadt.ukp.clarin.webanno.ui.project.ProjectPage;
import de.tudarmstadt.ukp.clarin.webanno.ui.project.ProjectExportPanel.ProjectExportModel;
public class ExportUtil
{
private static final Logger LOG = LoggerFactory.getLogger(ProjectPage.class);
private static final String FORMAT_AUTO = "AUTO";
private static final String ANNOTATION_ORIGINAL_FOLDER = "/annotation/";
private static final String CONSTRAINTS = "/constraints/";
private static final String LOG_FOLDER = "/" + ProjectService.LOG_DIR;
private static final String GUIDELINES_FOLDER = "/" + ImportUtil.GUIDELINE;
private static final String ANNOTATION_CAS_FOLDER = "/"
+ ImportUtil.ANNOTATION_AS_SERIALISED_CAS + "/";
private static final String META_INF = "/" + ImportUtil.META_INF;
private static final String SOURCE_FOLDER = "/" + ImportUtil.SOURCE;
private static final String CORRECTION_USER = "CORRECTION_USER";
private static final String CURATION_AS_SERIALISED_CAS = "/"
+ ImportUtil.CURATION_AS_SERIALISED_CAS + "/";
private static final String CURATION_FOLDER = "/curation/";
public ExportUtil()
{
// TODO Auto-generated constructor stub
}
public static de.tudarmstadt.ukp.clarin.webanno.export.model.Project exportProjectSettings(
AnnotationSchemaService annotationService, AutomationService automationService,
DocumentService documentService, ProjectService projectService, Project aProject,
File aProjectSettings, File aExportTempDir)
{
de.tudarmstadt.ukp.clarin.webanno.export.model.Project exProjekt = new de.tudarmstadt.ukp.clarin.webanno.export.model.Project();
exProjekt.setDescription(aProject.getDescription());
exProjekt.setName(aProject.getName());
// In older versions of WebAnno, the mode was an enum which was serialized as upper-case
// during export but as lower-case in the database. This is compensating for this case.
exProjekt.setMode(StringUtils.upperCase(aProject.getMode(), Locale.US));
exProjekt.setScriptDirection(aProject.getScriptDirection());
exProjekt.setVersion(aProject.getVersion());
exProjekt.setDisableExport(aProject.isDisableExport());
List<de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationLayer> exLayers = new ArrayList<>();
// Store map of layer and its equivalent exLayer so that the attach type is attached later
Map<AnnotationLayer, de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationLayer> layerToExLayers = new HashMap<>();
// Store map of feature and its equivalent exFeature so that the attach feature is attached
// later
Map<AnnotationFeature, de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationFeature> featureToExFeatures = new HashMap<>();
for (AnnotationLayer layer : annotationService.listAnnotationLayer(aProject)) {
exLayers.add(ImportUtil.exportLayerDetails(layerToExLayers, featureToExFeatures,
layer, annotationService));
}
// add the attach type and attache feature to the exported layer and
// exported feature
for (AnnotationLayer layer : layerToExLayers.keySet()) {
if (layer.getAttachType() != null) {
layerToExLayers.get(layer).setAttachType(
layerToExLayers.get(layer.getAttachType()));
}
if (layer.getAttachFeature() != null) {
layerToExLayers.get(layer).setAttachFeature(
featureToExFeatures.get(layer.getAttachFeature()));
}
}
exProjekt.setLayers(exLayers);
List<de.tudarmstadt.ukp.clarin.webanno.export.model.TagSet> extTagSets = new ArrayList<>();
for (TagSet tagSet : annotationService.listTagSets(aProject)) {
de.tudarmstadt.ukp.clarin.webanno.export.model.TagSet exTagSet = new de.tudarmstadt.ukp.clarin.webanno.export.model.TagSet();
exTagSet.setCreateTag(tagSet.isCreateTag());
exTagSet.setDescription(tagSet.getDescription());
exTagSet.setLanguage(tagSet.getLanguage());
exTagSet.setName(tagSet.getName());
List<de.tudarmstadt.ukp.clarin.webanno.export.model.Tag> exTags = new ArrayList<>();
for (Tag tag : annotationService.listTags(tagSet)) {
de.tudarmstadt.ukp.clarin.webanno.export.model.Tag exTag = new de.tudarmstadt.ukp.clarin.webanno.export.model.Tag();
exTag.setDescription(tag.getDescription());
exTag.setName(tag.getName());
exTags.add(exTag);
}
exTagSet.setTags(exTags);
extTagSets.add(exTagSet);
}
exProjekt.setTagSets(extTagSets);
List<SourceDocument> sourceDocuments = new ArrayList<SourceDocument>();
List<AnnotationDocument> annotationDocuments = new ArrayList<AnnotationDocument>();
// Store map of source document and exSourceDocument
Map<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument, SourceDocument> exDocuments = new HashMap<>();
// add source documents to a project
List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService
.listSourceDocuments(aProject);
documents.addAll(automationService.listTabSepDocuments(aProject));
for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
SourceDocument exDocument = new SourceDocument();
exDocument.setFormat(sourceDocument.getFormat());
exDocument.setName(sourceDocument.getName());
exDocument.setState(sourceDocument.getState());
exDocument.setProcessed(sourceDocument.isProcessed());
exDocument.setTimestamp(sourceDocument.getTimestamp());
exDocument.setTrainingDocument(sourceDocument.isTrainingDocument());
exDocument.setSentenceAccessed(sourceDocument.getSentenceAccessed());
exDocument.setProcessed(false);
if (sourceDocument.getFeature() != null) {
exDocument.setFeature(featureToExFeatures.get(sourceDocument.getFeature()));
}
// add annotation document to Project
for (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument annotationDocument : documentService
.listAnnotationDocuments(sourceDocument)) {
AnnotationDocument annotationDocumentToExport = new AnnotationDocument();
annotationDocumentToExport.setName(annotationDocument.getName());
annotationDocumentToExport.setState(annotationDocument.getState());
annotationDocumentToExport.setUser(annotationDocument.getUser());
annotationDocumentToExport.setTimestamp(annotationDocument.getTimestamp());
annotationDocumentToExport
.setSentenceAccessed(annotationDocument.getSentenceAccessed());
annotationDocuments.add(annotationDocumentToExport);
}
sourceDocuments.add(exDocument);
exDocuments.put(sourceDocument, exDocument);
}
exProjekt.setSourceDocuments(sourceDocuments);
exProjekt.setAnnotationDocuments(annotationDocuments);
List<ProjectPermission> projectPermissions = new ArrayList<ProjectPermission>();
// add project permissions to the project
for (User user : projectService.listProjectUsersWithPermissions(aProject)) {
for (de.tudarmstadt.ukp.clarin.webanno.model.ProjectPermission permission : projectService
.listProjectPermissionLevel(user, aProject)) {
ProjectPermission permissionToExport = new ProjectPermission();
permissionToExport.setLevel(permission.getLevel());
permissionToExport.setUser(user.getUsername());
projectPermissions.add(permissionToExport);
}
}
exProjekt.setProjectPermissions(projectPermissions);
// export automation Mira template
List<de.tudarmstadt.ukp.clarin.webanno.export.model.MiraTemplate> exTemplates = new ArrayList<>();
for (MiraTemplate template : automationService.listMiraTemplates(aProject)) {
de.tudarmstadt.ukp.clarin.webanno.export.model.MiraTemplate exTemplate = new de.tudarmstadt.ukp.clarin.webanno.export.model.MiraTemplate();
exTemplate.setAnnotateAndPredict(template.isAnnotateAndRepeat());
exTemplate.setAutomationStarted(template.isAutomationStarted());
exTemplate.setCurrentLayer(template.isCurrentLayer());
exTemplate.setResult(template.getResult());
exTemplate.setTrainFeature(featureToExFeatures.get(template.getTrainFeature()));
if (template.getOtherFeatures().size() > 0) {
Set<de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationFeature> exOtherFeatures = new HashSet<>();
for (AnnotationFeature feature : template.getOtherFeatures()) {
exOtherFeatures.add(featureToExFeatures.get(feature));
}
exTemplate.setOtherFeatures(exOtherFeatures);
}
exTemplates.add(exTemplate);
}
exProjekt.setMiraTemplates(exTemplates);
return exProjekt;
}
/**
* Copy source documents from the file system of this project to the export folder
*/
public static void exportSourceDocuments(DocumentService documentService,
AutomationService automationService, ProjectExportModel model, Project aProject,
File aCopyDir)
throws IOException, ProjectExportException
{
File sourceDocumentDir = new File(aCopyDir + SOURCE_FOLDER);
FileUtils.forceMkdir(sourceDocumentDir);
// Get all the source documents from the project
List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService
.listSourceDocuments(aProject);
documents.addAll(automationService.listTabSepDocuments(aProject));
int i = 1;
for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
try {
FileUtils.copyFileToDirectory(documentService.getSourceDocumentFile(sourceDocument),
sourceDocumentDir);
model.progress = (int) Math.ceil(((double) i) / documents.size() * 10.0);
i++;
} catch (FileNotFoundException e) {
// error(e.getMessage());
StringBuffer errorMessage = new StringBuffer();
errorMessage.append("Source file '");
errorMessage.append(sourceDocument.getName());
errorMessage.append("' related to project couldn't be located in repository");
LOG.error(errorMessage.toString(), ExceptionUtils.getRootCause(e));
model.messages.add(errorMessage.toString());
throw new ProjectExportException("Couldn't find some source file(s) related to project");
// continue;
}
}
}
/**
* Copy annotation document as Serialized CAS from the file system of this project to the
* export folder.
*/
public static void exportAnnotationDocuments(DocumentService documentService,
ImportExportService importExportService, UserDao userRepository,
ProjectExportModel aModel, File aCopyDir)
throws IOException, UIMAException, ClassNotFoundException
{
List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService
.listSourceDocuments(aModel.project);
int i = 1;
int initProgress = aModel.progress;
for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
// Determine which format to use for export
String formatId;
if (FORMAT_AUTO.equals(aModel.format)) {
formatId = sourceDocument.getFormat();
}
else {
formatId = importExportService.getWritableFormatId(aModel.format);
}
Class<?> writer = importExportService.getWritableFormats().get(formatId);
if (writer == null) {
String msg = "[" + sourceDocument.getName()
+ "] No writer found for format [" + formatId
+ "] - exporting as WebAnno TSV instead.";
// Avoid repeating the same message over for different users
if (!aModel.messages.contains(msg)) {
aModel.messages.add(msg);
}
writer = WebannoTsv3Writer.class;
}
// Export annotations from regular users
for (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument annotationDocument : documentService
.listAnnotationDocuments(sourceDocument)) {
// copy annotation document only for ACTIVE users and the state of the
// annotation document is not NEW/IGNORE
if (userRepository.get(annotationDocument.getUser()) != null
&& !annotationDocument.getState().equals(AnnotationDocumentState.NEW)
&& !annotationDocument.getState()
.equals(AnnotationDocumentState.IGNORE)) {
File annotationDocumentAsSerialisedCasDir = new File(
aCopyDir.getAbsolutePath() + ANNOTATION_CAS_FOLDER
+ sourceDocument.getName());
File annotationDocumentDir = new File(aCopyDir.getAbsolutePath()
+ ANNOTATION_ORIGINAL_FOLDER + sourceDocument.getName());
FileUtils.forceMkdir(annotationDocumentAsSerialisedCasDir);
FileUtils.forceMkdir(annotationDocumentDir);
File annotationFileAsSerialisedCas = documentService.getCasFile(
sourceDocument, annotationDocument.getUser());
File annotationFile = null;
if (annotationFileAsSerialisedCas.exists() && writer != null) {
annotationFile = importExportService.exportAnnotationDocument(sourceDocument,
annotationDocument.getUser(), writer,
annotationDocument.getUser(), Mode.ANNOTATION, false);
}
if (annotationFileAsSerialisedCas.exists()) {
FileUtils.copyFileToDirectory(annotationFileAsSerialisedCas,
annotationDocumentAsSerialisedCasDir);
if (writer != null) {
FileUtils
.copyFileToDirectory(annotationFile, annotationDocumentDir);
FileUtils.forceDelete(annotationFile);
}
}
}
}
// BEGIN FIXME #1224 CURATION_USER and CORRECTION_USER files should be exported in annotation_ser
// If this project is a correction project, add the auto-annotated CAS to same
// folder as CURATION_FOLDER
if (WebAnnoConst.PROJECT_TYPE_AUTOMATION.equals(aModel.project.getMode())
|| WebAnnoConst.PROJECT_TYPE_CORRECTION.equals(aModel.project.getMode())) {
File correctionCasFile = documentService.getCasFile(sourceDocument,
CORRECTION_USER);
if (correctionCasFile.exists()) {
// Copy CAS - this is used when importing the project again
File curationCasDir = new File(aCopyDir + CURATION_AS_SERIALISED_CAS
+ sourceDocument.getName());
FileUtils.forceMkdir(curationCasDir);
FileUtils.copyFileToDirectory(correctionCasFile, curationCasDir);
// Copy secondary export format for convenience - not used during import
File curationDir = new File(aCopyDir + CURATION_FOLDER + sourceDocument.getName());
FileUtils.forceMkdir(curationDir);
File correctionFile = importExportService.exportAnnotationDocument(sourceDocument,
CORRECTION_USER, writer, CORRECTION_USER, Mode.CORRECTION);
FileUtils.copyFileToDirectory(correctionFile, curationDir);
FileUtils.forceDelete(correctionFile);
}
}
// END FIXME #1224 CURATION_USER and CORRECTION_USER files should be exported in annotation_ser
aModel.progress = initProgress + (int) Math.ceil(((double) i) / documents.size() * 80.0);
i++;
}
}
/**
* Copy Project logs from the file system of this project to the export folder
*/
public static void exportProjectLog(ProjectService projectService, Project aProject, File aCopyDir)
throws IOException
{
File logDir = new File(aCopyDir + LOG_FOLDER);
FileUtils.forceMkdir(logDir);
if (projectService.getProjectLogFile(aProject).exists()) {
FileUtils.copyFileToDirectory(projectService.getProjectLogFile(aProject), logDir);
}
}
/**
* Copy Project guidelines from the file system of this project to the export folder
*/
public static void exportGuideLine(ProjectService projectService, Project aProject, File aCopyDir)
throws IOException
{
File guidelineDir = new File(aCopyDir + GUIDELINES_FOLDER);
FileUtils.forceMkdir(guidelineDir);
File annotationGuidlines = projectService.getGuidelinesFile(aProject);
if (annotationGuidlines.exists()) {
for (File annotationGuideline : annotationGuidlines.listFiles()) {
FileUtils.copyFileToDirectory(annotationGuideline, guidelineDir);
}
}
}
/**
* Copy Project guidelines from the file system of this project to the export folder
*/
public static void exportProjectMetaInf(ProjectService projectService, Project aProject, File aCopyDir)
throws IOException
{
File metaInfDir = new File(aCopyDir + META_INF);
FileUtils.forceMkdir(metaInfDir);
File metaInf = projectService.getMetaInfFolder(aProject);
if (metaInf.exists()) {
FileUtils.copyDirectory(metaInf, metaInfDir);
}
}
/**
* Copy Project Constraints from file system of this project to export folder
*/
public static void exportProjectConstraints(ConstraintsService constraintsService, Project project,
File exportTempDir)
throws IOException
{
File constraintsDir = new File(exportTempDir + CONSTRAINTS);
FileUtils.forceMkdir(constraintsDir);
String fileName;
for (ConstraintSet set : constraintsService.listConstraintSets(project)) {
fileName = set.getName();
/*
* Copying with file's original name to save ConstraintSet's name
*/
FileUtils.copyFile(constraintsService.exportConstraintAsFile(set),
new File(constraintsDir, fileName));
}
}
/**
* Copy, if exists, curation documents to a folder that will be exported as Zip file
*
* @param aCopyDir
* The folder where curated documents are copied to be exported as Zip File
*/
public static void exportCuratedDocuments(DocumentService documentService,
ImportExportService importExportService, ProjectExportModel aModel, File aCopyDir,
boolean aIncludeInProgress)
throws FileNotFoundException, UIMAException, IOException, ClassNotFoundException,
ProjectExportException
{
// Get all the source documents from the project
List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService
.listSourceDocuments(aModel.project);
// Determine which format to use for export.
Class<?> writer;
if (FORMAT_AUTO.equals(aModel.format)) {
writer = WebannoTsv3Writer.class;
}
else {
writer = importExportService.getWritableFormats().get(
importExportService.getWritableFormatId(aModel.format));
if (writer == null) {
writer = WebannoTsv3Writer.class;
}
}
int initProgress = aModel.progress-1;
int i = 1;
for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
File curationCasDir = new File(aCopyDir + CURATION_AS_SERIALISED_CAS
+ sourceDocument.getName());
FileUtils.forceMkdir(curationCasDir);
File curationDir = new File(aCopyDir + CURATION_FOLDER + sourceDocument.getName());
FileUtils.forceMkdir(curationDir);
// If depending on aInProgress, include only the the curation documents that are
// finished or also the ones that are in progress
if (
(aIncludeInProgress &&
SourceDocumentState.CURATION_IN_PROGRESS.equals(sourceDocument.getState())) ||
SourceDocumentState.CURATION_FINISHED.equals(sourceDocument.getState())
) {
File curationCasFile = documentService.getCasFile(sourceDocument, WebAnnoConst.CURATION_USER);
if (curationCasFile.exists()) {
// Copy CAS - this is used when importing the project again
FileUtils.copyFileToDirectory(curationCasFile, curationCasDir);
// Copy secondary export format for convenience - not used during import
try {
File curationFile = importExportService.exportAnnotationDocument(sourceDocument,
WebAnnoConst.CURATION_USER, writer, WebAnnoConst.CURATION_USER, Mode.CURATION);
FileUtils.copyFileToDirectory(curationFile, curationDir);
FileUtils.forceDelete(curationFile);
} catch (Exception e) {
//error("Unexpected error while exporting project: " + ExceptionUtils.getRootCauseMessage(e) );
throw new ProjectExportException("Aborting due to unrecoverable error while exporting!");
}
}
}
aModel.progress = initProgress+ (int) Math.ceil(((double) i)/documents.size()*10.0);
i++;
}
}
}