/* * Copyright 2012 * Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.tcf; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; import static org.apache.uima.fit.pipeline.SimplePipeline.runPipeline; import static org.junit.Assert.assertEquals; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.commons.io.FileUtils; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.fit.component.CasDumpWriter; import org.custommonkey.xmlunit.XMLAssert; import org.junit.Rule; import org.junit.Test; import org.xml.sax.InputSource; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; import eu.clarin.weblicht.wlfxb.io.WLDObjector; import eu.clarin.weblicht.wlfxb.tc.api.TextCorpus; import eu.clarin.weblicht.wlfxb.tc.api.TextCorpusLayer; import eu.clarin.weblicht.wlfxb.tc.xb.TextCorpusStored; import eu.clarin.weblicht.wlfxb.xb.WLData; public class TcfReaderWriterTest { @Test public void test1() throws Exception { testOneWay("tcf-after.xml", "tcf-after-expected.xml"); } @Test public void testWithCmdMetadata() throws Exception { testOneWay("tcf04-karin-wl.xml", "tcf04-karin-wl_expected.xml"); } public void testOneWay(String aInputFile, String aExpectedFile) throws Exception { CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, aInputFile); AnalysisEngineDescription writer = createEngineDescription( TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/oneway", TcfWriter.PARAM_OVERWRITE, true, TcfWriter.PARAM_FILENAME_EXTENSION, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true); AnalysisEngineDescription dumper = createEngineDescription(CasDumpWriter.class, CasDumpWriter.PARAM_OUTPUT_FILE, "target/test-output/oneway/dump.txt"); runPipeline(reader, writer, dumper); InputStream isReference = new FileInputStream(new File("src/test/resources/" + aExpectedFile)); InputStream isActual = new FileInputStream(new File("target/test-output/oneway/" + aInputFile)); WLData wLDataReference = WLDObjector.read(isReference); TextCorpusStored aCorpusDataReference = wLDataReference.getTextCorpus(); WLData wLDataActual = WLDObjector.read(isActual); TextCorpusStored aCorpusDataActual = wLDataActual.getTextCorpus(); // check if layers maintained assertEquals(aCorpusDataReference.getLayers().size(), aCorpusDataActual.getLayers().size()); // Check if every layers have the same number of annotations for (TextCorpusLayer layer : aCorpusDataReference.getLayers()) { assertEquals( "Layer size mismatch in ["+layer.getClass().getName()+"]", layer.size(), getLayer(aCorpusDataActual, layer.getClass()).size()); } XMLAssert.assertXMLEqual( new InputSource("src/test/resources/" + aExpectedFile), new InputSource(new File("target/test-output/oneway/" + aInputFile).getPath())); } private static TextCorpusLayer getLayer(TextCorpus aCorpus, Class<? extends TextCorpusLayer> aLayerType) { for (TextCorpusLayer layer : aCorpus.getLayers()) { if (layer.getClass().equals(aLayerType)) { return layer; } } throw new IllegalArgumentException("No layer of type [" + aLayerType.getName() + "]"); } @Test public void testRoundtrip() throws Exception { CollectionReaderDescription reader = createReaderDescription(TcfReader.class, TcfReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TcfReader.PARAM_PATTERNS, "wlfxb.xml"); AnalysisEngineDescription writer = createEngineDescription( TcfWriter.class, TcfWriter.PARAM_TARGET_LOCATION, "target/test-output/roundtrip", TcfWriter.PARAM_OVERWRITE, true, TcfWriter.PARAM_FILENAME_EXTENSION, ".xml", TcfWriter.PARAM_STRIP_EXTENSION, true); runPipeline(reader, writer); String reference = FileUtils.readFileToString( new File("src/test/resources/wlfxb.xml"), "UTF-8"); String actual = FileUtils.readFileToString( new File("target/test-output/roundtrip/wlfxb.xml"), "UTF-8"); assertEquals(reference, actual); } @Rule public DkproTestContext testContext = new DkproTestContext(); }