/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.bincas;
import static org.apache.commons.io.FileUtils.readFileToString;
import static org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription;
import static org.apache.uima.fit.pipeline.SimplePipeline.runPipeline;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.util.CasCreationUtils;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import de.tudarmstadt.ukp.dkpro.core.io.text.TextReader;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
public class SerializedCasWriterReaderTest
{
@Rule
public TemporaryFolder testFolder = new TemporaryFolder();
@Test
public void testCasWithTypeSystemEmbedded() throws Exception
{
write(true);
read();
}
@Test
public void testCasWithTypeSystemSeparate() throws Exception
{
write(false);
read();
}
public void write(boolean aIncludeTypeSystem) throws Exception
{
CollectionReader reader = CollectionReaderFactory.createReader(
TextReader.class,
TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/texts",
TextReader.PARAM_PATTERNS, "*.txt",
TextReader.PARAM_LANGUAGE, "latin");
AnalysisEngine writer = AnalysisEngineFactory.createEngine(
SerializedCasWriter.class,
SerializedCasWriter.PARAM_TARGET_LOCATION, testFolder.getRoot(),
SerializedCasWriter.PARAM_TYPE_SYSTEM_LOCATION,
aIncludeTypeSystem ? null : testFolder.newFile("typesystem.ser"));
runPipeline(reader, writer);
assertTrue(new File(testFolder.getRoot(), "example1.txt.ser").exists());
}
public void read() throws Exception
{
CollectionReader reader = CollectionReaderFactory.createReader(
SerializedCasReader.class,
SerializedCasReader.PARAM_SOURCE_LOCATION, testFolder.getRoot(),
SerializedCasReader.PARAM_PATTERNS, "*.ser",
SerializedCasReader.PARAM_TYPE_SYSTEM_LOCATION,
new File(testFolder.getRoot(), "typesystem.ser"));
CAS cas = CasCreationUtils.createCas(createTypeSystemDescription(), null, null);
reader.getNext(cas);
String refText = readFileToString(new File("src/test/resources/texts/example1.txt"));
assertEquals(refText, cas.getDocumentText());
assertEquals("latin", cas.getDocumentLanguage());
}
// @Test
// public void lenientTest() throws Exception
// {
// TypeSystemDescription tsdMeta = TypeSystemDescriptionFactory
// .createTypeSystemDescription("desc.type.metadata");
//
//
// // Create a CAS initialized with that type system and set the text
// CAS casOut = createCas(tsdMeta, null, null);
// casOut.setDocumentText("This is a test.");
// DocumentMetaData meta = DocumentMetaData.create(casOut);
// meta.setDocumentId("document");
//
// // Write out
// AnalysisEngine writer = AnalysisEngineFactory.createEngine(
// SerializedCasWriter.class, tsdMeta,
// SerializedCasWriter.PARAM_TARGET_LOCATION, testFolder.getRoot().getPath());
// writer.process(casOut);
//
// // Create a new type system from scratch
// TypeSystemDescription tsd = new TypeSystemDescription_impl();
// TypeDescription tokenTypeDesc = tsd.addType("Token", "", CAS.TYPE_NAME_ANNOTATION);
// tokenTypeDesc.addFeature("length", "", CAS.TYPE_NAME_INTEGER);
// tsd = CasCreationUtils.mergeTypeSystems(asList(tsd, tsdMeta));
//
// // Now read in to CAS with different type system
// CollectionReader reader = CollectionReaderFactory.createReader(
// SerializedCasReader.class,
// SerializedCasReader.PARAM_SOURCE_LOCATION, testFolder.getRoot().getPath(),
// SerializedCasReader.PARAM_PATTERNS, new String [] {
// SerializedCasReader.INCLUDE_PREFIX+"*.ser"
// });
//
// CAS casIn = CasCreationUtils.createCas(tsd, null, null);
// reader.getNext(casIn);
//
// upgrade(casIn, tsd);
//
// // Try to create an annotation with the extra type
// AnnotationFS fs = casOut.createAnnotation(casIn.getTypeSystem().getType("Token"), 0, 1);
// casOut.addFsToIndexes(fs);
// }
//
// private void upgrade(CAS aCas, TypeSystemDescription aTsd) throws Exception
// {
// // Prepare template for new CAS
// CAS newCas = CasCreationUtils.createCas(aTsd, null, null);
// CASCompleteSerializer serializer = Serialization.serializeCASComplete((CASImpl) newCas);
//
// // Save old type system
// TypeSystem oldTypeSystem = aCas.getTypeSystem();
//
// // Save old CAS contents
// ByteArrayOutputStream os2 = new ByteArrayOutputStream();
// Serialization.serializeWithCompression(aCas, os2, oldTypeSystem);
//
// // Prepare CAS with new type system
// Serialization.deserializeCASComplete(serializer, (CASImpl) aCas);
//
// // Restore CAS data to new type system
// Serialization.deserializeCAS(aCas, new ByteArrayInputStream(os2.toByteArray()), oldTypeSystem, null);
// }
//
// private void upgrade(CAS aCas) throws Exception
// {
// // Prepare template for new CAS
// CAS newCas = JCasFactory.createJCas().getCas();
// CASCompleteSerializer serializer = Serialization.serializeCASComplete((CASImpl) newCas);
//
// // Save old type system
// TypeSystem oldTypeSystem = aCas.getTypeSystem();
//
// // Save old CAS contents
// ByteArrayOutputStream os2 = new ByteArrayOutputStream();
// Serialization.serializeWithCompression(aCas, os2, oldTypeSystem);
//
// // Prepare CAS with new type system
// Serialization.deserializeCASComplete(serializer, (CASImpl) aCas);
//
// // Restore CAS data to new type system
// Serialization.deserializeCAS(aCas, new ByteArrayInputStream(os2.toByteArray()), oldTypeSystem, null);
// }
@Rule
public DkproTestContext testContext = new DkproTestContext();
}