/* * Licensed to the Technische Universität Darmstadt under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Technische Universität Darmstadt * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.dkpro.core.io.lxf.internal; import static org.apache.uima.fit.util.JCasUtil.indexCovered; import static org.apache.uima.fit.util.JCasUtil.select; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.FEAT_LABEL; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.FEAT_LEMMA; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.FEAT_POS; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_DEPENDENCY; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_MORPHOLOGY; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_SENTENCE; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_TOKEN; import java.util.Collection; import java.util.HashMap; import java.util.Map; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.Marker; import org.apache.uima.jcas.JCas; import org.dkpro.core.io.lxf.internal.model.LxfEdge; import org.dkpro.core.io.lxf.internal.model.LxfGraph; import org.dkpro.core.io.lxf.internal.model.LxfNode; import org.dkpro.core.io.lxf.internal.model.LxfRegion; import org.dkpro.core.io.lxf.internal.model.LxfText; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; public class DKPro2Lxf { public static void convert(JCas aJCas, LxfGraph aTarget) { convert(aJCas, null, aTarget); } public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget) { convert(aJCas, aSource, aTarget, createIdMap("dkpro", aSource), "dkpro"); } /** * Creates an id map that contains of the correspondence between tools and annotation layer. The * key in the map is the layer and the value is the annotation tool that created ther layer. If * the layer was present in the source than the tool from the source will be used for the layer. * Otherwise the toolName will be used. * * @param toolName * - Tool name for new layers * @param aSource * - original lxf for DKPro * @return */ public static Map<String, String> createIdMap(String toolName, LxfGraph aSource) { Map<String, String> ids = new HashMap<>(); if (aSource != null) { for (LxfNode n : aSource.getNodes()) { ids.put(n.getType(), n.getOrigin()); } } if (!ids.containsKey(LAYER_DEPENDENCY)) ids.put(LAYER_DEPENDENCY, toolName); if (!ids.containsKey(LAYER_MORPHOLOGY)) ids.put(LAYER_MORPHOLOGY, toolName); if (!ids.containsKey(LAYER_SENTENCE)) ids.put(LAYER_SENTENCE, toolName); if (!ids.containsKey(LAYER_TOKEN)) ids.put(LAYER_TOKEN, toolName); return ids; } /** * Convert from CAS to LXF. * * @param aJCas * the source CAS. * @param aSource * the original LXF. If this is non-null, then delta-mode is enabled. * @param aTarget * the target LXF. * @param tooName * the name of the tool generating the new annotation * @param ids * The ids of the tool responsible for generation of the annotation Layer. The key is * the annotation layer. The value is the tool that generates the annotation. */ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, Map<String, String> ids, String toolName) { if (aSource == null) { aTarget.setMedia(new LxfText(aJCas.getDocumentText())); } ToolGeneratorIndex toolEdgeIndex = new ToolGeneratorIndex(ids.values()); ToolGeneratorIndex toolNodeIndex = new ToolGeneratorIndex(ids.values()); ToolGeneratorIndex toolRegionIndex = new ToolGeneratorIndex(ids.values()); NodeIterator iter = new NodeIterator(aSource); Map<Sentence, Collection<Token>> idxSentTok = indexCovered(aJCas, Sentence.class, Token.class); Map<Sentence, Collection<Dependency>> idxSentDep = indexCovered(aJCas, Sentence.class, Dependency.class); for (Sentence sentence : select(aJCas, Sentence.class)) { LxfNode sentenceNode; String toolid = ids.get(LAYER_SENTENCE); if (aSource == null || needsExport(aJCas, sentence)) { // Sentence region LxfRegion sentenceRegion = new LxfRegion(toolid, toolRegionIndex.nextIndex(toolid), sentence.getBegin(), sentence.getEnd()); aTarget.addRegion(sentenceRegion); sentenceNode = new LxfNode(LAYER_SENTENCE, toolid, toolNodeIndex.nextIndex(toolid), 0, sentenceRegion); // Setting this to the base text as per discussion sentenceNode.setFeature(FEAT_LABEL, sentence.getCoveredText()); aTarget.addNode(sentenceNode); } else { sentenceNode = iter.next(toolid, LAYER_SENTENCE); } // Tokens, POS, lemma Map<Token, LxfNode> idxMorph = new HashMap<>(); Collection<Token> tokens = idxSentTok.get(sentence); for (Token token : tokens) { // Convert or obtain token node LxfNode tokenNode; toolid = ids.get(LAYER_TOKEN); if (aSource == null || needsExport(aJCas, token)) { LxfRegion tokenRegion = new LxfRegion(toolid, toolRegionIndex.nextIndex(toolid), token.getBegin(), token.getEnd()); aTarget.addRegion(tokenRegion); tokenNode = new LxfNode(LAYER_TOKEN, toolid, toolNodeIndex.nextIndex(toolid), 0, tokenRegion); String form = token.getText(); tokenNode.setFeature(FEAT_LABEL, form); aTarget.addNode(tokenNode); int edgeIndex = toolEdgeIndex.nextIndex(toolid); aTarget.addEdge(new LxfEdge(tokenNode.getOrigin(), edgeIndex, 0, tokenNode, sentenceNode)); } else { tokenNode = iter.next(toolid, LAYER_TOKEN); } toolid = ids.get(LAYER_MORPHOLOGY); // Convert POS if exists - if we create a node, pass it on to the lemma conversion // as well POS pos = token.getPos(); LxfNode morphNode = null; boolean newMorphNode = false; if (pos != null) { if ((aSource == null || needsExport(aJCas, pos))) { morphNode = new LxfNode(LAYER_MORPHOLOGY, toolid, toolNodeIndex.nextIndex(toolid), 0); morphNode.setFeature(FEAT_POS, token.getPos().getPosValue()); aTarget.addNode(morphNode); aTarget.addEdge(new LxfEdge(morphNode.getOrigin(), toolEdgeIndex.nextIndex(toolid), 0, morphNode, tokenNode)); newMorphNode = true; // Need to remember this because we may want to connect the dependencies to // this node idxMorph.put(token, morphNode); } else { morphNode = iter.next(toolid, LAYER_MORPHOLOGY); idxMorph.put(token, morphNode); } } // Convert lemma if exists Lemma lemma = token.getLemma(); if (lemma != null && (aSource == null || needsExport(aJCas, lemma))) { LxfNode lemmaNode = newMorphNode ? morphNode : null; if (lemmaNode == null) { lemmaNode = new LxfNode(LAYER_MORPHOLOGY, toolName, toolNodeIndex.nextIndex(toolid), 0); aTarget.addNode(lemmaNode); aTarget.addEdge(new LxfEdge(lemmaNode.getOrigin(), toolEdgeIndex.nextIndex(toolid), 0, lemmaNode, tokenNode)); //idxMorph.put(token, lemmaNode); } lemmaNode.setFeature(FEAT_LEMMA, token.getLemma().getValue()); } } toolid = ids.get(LAYER_DEPENDENCY); // Dependencies Collection<Dependency> deps = idxSentDep.get(sentence); for (Dependency dep : deps) { if (aSource != null && !needsExport(aJCas, dep)) continue; LxfNode depNode = new LxfNode(LAYER_DEPENDENCY, toolid, toolNodeIndex.nextIndex(toolid), 0); depNode.setFeature(FEAT_LABEL, dep.getDependencyType()); aTarget.addNode(depNode); LxfNode govMorphNode = idxMorph.get(dep.getGovernor()); LxfNode depMorphNode = idxMorph.get(dep.getDependent()); aTarget.addEdge(new LxfEdge(depNode.getOrigin(), toolEdgeIndex.nextIndex(toolid), 0, depNode, depMorphNode)); if (!govMorphNode.getId().equals(depMorphNode.getId())) { aTarget.addEdge(new LxfEdge(depNode.getOrigin(), toolEdgeIndex.nextIndex(toolid), 0, govMorphNode, depNode)); } } } } private static boolean needsExport(JCas aCas, FeatureStructure aFS) { Marker marker = aCas.getCasImpl().getCurrentMark(); return marker.isNew(aFS); } }