/*
* Copyright 2014
* Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.tcf;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription;
import static org.junit.Assert.assertEquals;
import java.io.File;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
public class TcfWriterTest
{
/**
* @throws Exception
* if an error occurs.
* @see <a href="https://code.google.com/p/dkpro-core-asl/issues/detail?id=436">Issue 436: XML
* preamble written twice if original file exists and is not TCF</a>
*/
@Test
public void testOriginalNotTcf()
throws Exception
{
File targetFolder = testContext.getTestOutputFolder();
JCas jcas = JCasFactory.createJCas();
// Generate a fake metadata that points to a non-TCF file
DocumentMetaData meta = DocumentMetaData.create(jcas);
meta.setDocumentBaseUri(new File("src/test/resources").toURI().toURL().toString());
meta.setDocumentUri(new File("src/test/resources/not-a-tcf-file.txt").toURI().toURL().toString());
// Add some content
jcas.setDocumentText("okeydokey");
// TCF files are usually written without Token offset information, so the TcfReader expects
// that text is covered by tokens, otherwise it cannot read it.
new Token(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
// Write as TCF
AnalysisEngineDescription writer = createEngineDescription(TcfWriter.class,
TcfWriter.PARAM_TARGET_LOCATION, targetFolder);
SimplePipeline.runPipeline(jcas, writer);
// Read again as TCF
CollectionReaderDescription reader = createReaderDescription(TcfReader.class,
TcfReader.PARAM_SOURCE_LOCATION, targetFolder.getPath() + "/*.tcf");
for (JCas jcas2 : SimplePipeline.iteratePipeline(reader)) {
assertEquals("okeydokey", jcas2.getDocumentText());
}
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}