/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.gate; import static org.apache.uima.fit.util.JCasUtil.selectAll; import java.io.OutputStream; import java.util.Iterator; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.BooleanArrayFS; import org.apache.uima.cas.ByteArrayFS; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CommonArrayFS; import org.apache.uima.cas.DoubleArrayFS; import org.apache.uima.cas.Feature; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.FloatArrayFS; import org.apache.uima.cas.IntArrayFS; import org.apache.uima.cas.LongArrayFS; import org.apache.uima.cas.ShortArrayFS; import org.apache.uima.cas.StringArrayFS; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.descriptor.MimeTypeCapability; import org.apache.uima.fit.descriptor.TypeCapability; import org.apache.uima.fit.util.CasUtil; import org.apache.uima.fit.util.FSUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.TOP; import org.apache.uima.resource.ResourceInitializationException; import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase; import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters; import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes; import gate.Annotation; import gate.AnnotationSet; import gate.Document; import gate.DocumentExporter; import gate.FeatureMap; import gate.corpora.DocumentContentImpl; import gate.corpora.DocumentImpl; import gate.corpora.export.GateXMLExporter; import gate.util.InvalidOffsetException; import gate.util.SimpleFeatureMapImpl; import it.unimi.dsi.fastutil.ints.Int2IntMap; import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; @MimeTypeCapability({MimeTypes.APPLICATION_X_GATE_XML}) @TypeCapability( inputs = { "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData"}) public class GateXmlWriter2 extends JCasFileWriter_ImplBase { /** * Specify the suffix of output files. Default value <code>.xml</code>. If the suffix is not * needed, provide an empty string as value. */ public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION; @ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".xml") private String filenameSuffix; private DocumentExporter exporter; @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); exporter = new GateXMLExporter(); } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { Document document = new DocumentImpl(); document.setContent(new DocumentContentImpl(aJCas.getDocumentText())); AnnotationSet as = document.getAnnotations(); Int2IntMap processed = new Int2IntOpenHashMap(); Iterator<TOP> fses = selectAll(aJCas).iterator(); while (fses.hasNext()) { TOP fs = fses.next(); try { process(processed, as, fs); } catch (InvalidOffsetException e) { throw new AnalysisEngineProcessException(e); } } try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) { exporter.export(document, docOS); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } private Annotation process(Int2IntMap aProcessed, AnnotationSet aAs, FeatureStructure aFS) throws InvalidOffsetException { if (aFS.getCAS().getSofa() == aFS) { return null; } int fsAddr = aFS.getCAS().getLowLevelCAS().ll_getFSRef(aFS); if (aProcessed.containsKey(fsAddr)) { return aAs.get(aProcessed.get(fsAddr)); } FeatureMap fm = new SimpleFeatureMapImpl(); for (Feature aFeature : aFS.getType().getFeatures()) { System.out.printf("Processing %s%n", aFeature.getName()); if ( CAS.FEATURE_FULL_NAME_SOFA.equals(aFeature.getName()) || CAS.FEATURE_FULL_NAME_BEGIN.equals(aFeature.getName()) || CAS.FEATURE_FULL_NAME_END.equals(aFeature.getName()) ) { continue; } // Here we store the values before we coerce them into the final target type // "target" is actually an array Object target; int length; if (aFeature.getRange().isPrimitive()) { switch (aFeature.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: fm.put(aFeature.getShortName(), aFS.getBooleanValue(aFeature)); break; case CAS.TYPE_NAME_BYTE: fm.put(aFeature.getShortName(), aFS.getByteValue(aFeature)); break; case CAS.TYPE_NAME_DOUBLE: fm.put(aFeature.getShortName(), aFS.getDoubleValue(aFeature)); break; case CAS.TYPE_NAME_FLOAT: fm.put(aFeature.getShortName(), aFS.getFloatValue(aFeature)); break; case CAS.TYPE_NAME_INTEGER: fm.put(aFeature.getShortName(), aFS.getIntValue(aFeature)); break; case CAS.TYPE_NAME_LONG: fm.put(aFeature.getShortName(), aFS.getLongValue(aFeature)); break; case CAS.TYPE_NAME_SHORT: fm.put(aFeature.getShortName(), aFS.getShortValue(aFeature)); break; case CAS.TYPE_NAME_STRING: fm.put(aFeature.getShortName(), aFS.getStringValue(aFeature)); break; default: throw new IllegalArgumentException( "Unable to convert value of feature [" + aFeature.getName() + "] with type [" + aFeature.getRange().getName() + "]"); } } // "null" case else if (aFS.getFeatureValue(aFeature) == null) { fm.put(aFeature.getShortName(), null); } // Handle case where feature is an array else if (aFeature.getRange().isArray()) { CommonArrayFS source = (CommonArrayFS) aFS.getFeatureValue(aFeature); length = source.size(); switch (aFeature.getRange().getComponentType().getName()) { case CAS.TYPE_NAME_BOOLEAN: target = new boolean[length]; ((BooleanArrayFS) source).copyToArray(0, (boolean[]) target, 0, length); break; case CAS.TYPE_NAME_BYTE: target = new byte[length]; ((ByteArrayFS) source).copyToArray(0, (byte[]) target, 0, length); break; case CAS.TYPE_NAME_DOUBLE: target = new double[length]; ((DoubleArrayFS) source).copyToArray(0, (double[]) target, 0, length); break; case CAS.TYPE_NAME_FLOAT: target = new float[length]; ((FloatArrayFS) source).copyToArray(0, (float[]) target, 0, length); break; case CAS.TYPE_NAME_INTEGER: target = new int[length]; ((IntArrayFS) source).copyToArray(0, (int[]) target, 0, length); break; case CAS.TYPE_NAME_LONG: target = new long[length]; ((LongArrayFS) source).copyToArray(0, (long[]) target, 0, length); break; case CAS.TYPE_NAME_SHORT: target = new short[length]; ((ShortArrayFS) source).copyToArray(0, (short[]) target, 0, length); break; case CAS.TYPE_NAME_STRING: target = new String[length]; ((StringArrayFS) source).copyToArray(0, (String[]) target, 0, length); break; default: throw new UnsupportedOperationException("Cannot convert FS arrays yet"); } } // Handle case where feature is a list else if (FSUtil.isMultiValuedFeature(aFS, aFeature)) { // Get length of list length = 0; { FeatureStructure cur = aFS.getFeatureValue(aFeature); // We assume to by facing a non-empty element if it has a "head" feature while (cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD) != null) { length++; cur = cur.getFeatureValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_TAIL)); } } switch (aFeature.getRange().getName()) { case CAS.TYPE_NAME_FLOAT_LIST: { float[] floatTarget = new float[length]; int i = 0; FeatureStructure cur = aFS.getFeatureValue(aFeature); // We assume to by facing a non-empty element if it has a "head" feature while (cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD) != null) { floatTarget[i] = cur.getFloatValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD)); cur = cur.getFeatureValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_TAIL)); } target = floatTarget; break; } case CAS.TYPE_NAME_INTEGER_LIST: { int[] intTarget = new int[length]; int i = 0; FeatureStructure cur = aFS.getFeatureValue(aFeature); // We assume to by facing a non-empty element if it has a "head" feature while (cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD) != null) { intTarget[i] = cur.getIntValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD)); cur = cur.getFeatureValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_TAIL)); } target = intTarget; break; } case CAS.TYPE_NAME_STRING_LIST: { String[] stringTarget = new String[length]; int i = 0; FeatureStructure cur = aFS.getFeatureValue(aFeature); // We assume to by facing a non-empty element if it has a "head" feature while (cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD) != null) { stringTarget[i] = cur.getStringValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_HEAD)); cur = cur.getFeatureValue( cur.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_TAIL)); } target = stringTarget; break; } default: { throw new UnsupportedOperationException("Cannot convert FS lists yet"); } } fm.put(aFeature.getShortName(), target); } else if (aFS.getCAS().getTypeSystem().subsumes(CasUtil.getType(aFS.getCAS(), TOP.class), aFeature.getRange())) { fm.put(aFeature.getShortName(), process(aProcessed, aAs, aFS.getFeatureValue(aFeature))); } else { throw new IllegalArgumentException("Unable to convert value of feature [" + aFeature.getName() + "] with type [" + aFeature.getRange().getName() + "]]"); } } Long begin = null; Long end = null; if (aFS instanceof AnnotationFS) { begin = Long.valueOf(((AnnotationFS) aFS).getBegin()); end = Long.valueOf(((AnnotationFS) aFS).getEnd()); } Integer aid = aAs.add(begin, end, aFS.getType().getName(), fm); aProcessed.put((int) fsAddr, (int) aid); return aAs.get(aid); } }