/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.execute;
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;
import org.aksw.gerbil.datatypes.ExperimentTaskConfiguration;
import org.aksw.gerbil.io.nif.NIFWriter;
import org.aksw.gerbil.io.nif.impl.TurtleNIFWriter;
import org.aksw.gerbil.matching.Matching;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.Marking;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AnnotatorOutputWriter {
private static final Logger LOGGER = LoggerFactory.getLogger(AnnotatorOutputWriter.class);
private static final String DEFAULT_STORABLE_ANNOTATOR_NAME_PART = "(store)";
private File outputDirectory;
private String storableAnnotatorNamePart = DEFAULT_STORABLE_ANNOTATOR_NAME_PART;
public AnnotatorOutputWriter(String outputDirectory) {
this.outputDirectory = new File(outputDirectory);
if (!this.outputDirectory.exists()) {
this.outputDirectory.mkdirs();
}
}
public <T extends Marking> void storeAnnotatorOutput(ExperimentTaskConfiguration configuration,
List<List<T>> results, List<Document> documents) {
if (outputShouldBeStored(configuration)) {
FileOutputStream fout = null;
GZIPOutputStream gout = null;
try {
File file = generateOutputFile(configuration);
List<Document> resultDocuments = generateResultDocuments(results, documents);
fout = new FileOutputStream(file);
gout = new GZIPOutputStream(fout);
NIFWriter writer = new TurtleNIFWriter();
writer.writeNIF(resultDocuments, gout);
} catch (Exception e) {
LOGGER.error("Couldn't write annotator result to file.", e);
} finally {
IOUtils.closeQuietly(gout);
IOUtils.closeQuietly(fout);
}
}
}
private boolean outputShouldBeStored(ExperimentTaskConfiguration configuration) {
return configuration.datasetConfig.couldBeCached() && (configuration.annotatorConfig.couldBeCached()
|| configuration.annotatorConfig.getName().contains(storableAnnotatorNamePart));
}
private File generateOutputFile(ExperimentTaskConfiguration configuration) {
StringBuilder fileBuilder = new StringBuilder();
fileBuilder.append(outputDirectory.getAbsolutePath());
fileBuilder.append(File.separator);
appendCleanedString(fileBuilder, configuration.annotatorConfig.getName());
fileBuilder.append('-');
appendCleanedString(fileBuilder, configuration.datasetConfig.getName());
if (configuration.matching == Matching.WEAK_ANNOTATION_MATCH) {
fileBuilder.append("-w-");
} else {
fileBuilder.append("-s-");
}
appendCleanedString(fileBuilder, configuration.type.name());
fileBuilder.append(".ttl.gz");
return new File(fileBuilder.toString());
}
private void appendCleanedString(StringBuilder builder, String s) {
char chars[] = s.toCharArray();
for (int i = 0; i < chars.length; ++i) {
if (Character.isLetterOrDigit(chars[i])) {
builder.append(chars[i]);
} else {
builder.append('_');
}
}
}
private <T extends Marking> List<Document> generateResultDocuments(List<List<T>> results,
List<Document> documents) {
List<Document> resultDocuments = new ArrayList<Document>(documents.size());
Document datasetDocument, resultDocument;
for (int d = 0; d < documents.size(); ++d) {
datasetDocument = documents.get(d);
resultDocument = new DocumentImpl(datasetDocument.getText(), datasetDocument.getDocumentURI());
if ((d < results.size()) && (results.get(d) != null)) {
for (T m : results.get(d)) {
resultDocument.addMarking(m);
}
}
resultDocuments.add(resultDocument);
}
return resultDocuments;
}
}