/* * Licensed to the Technische Universität Darmstadt under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Technische Universität Darmstadt * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.dkpro.core.io.lxf.internal; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.FEAT_LABEL; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.FEAT_LEMMA; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.FEAT_POS; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_DEPENDENCY; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_MORPHOLOGY; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_SENTENCE; import static org.dkpro.core.io.lxf.internal.model.LxfVocabulary.LAYER_TOKEN; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.uima.jcas.JCas; import org.dkpro.core.io.lxf.internal.model.LxfEdge; import org.dkpro.core.io.lxf.internal.model.LxfGraph; import org.dkpro.core.io.lxf.internal.model.LxfNode; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT; public class Lxf2DKPro { public static void convert(LxfGraph aLxf, JCas aJCas) { aJCas.setDocumentText(aLxf.getMedia().getData()); List<LxfNode> sentenceNodes = aLxf.getNodes().stream() .filter(n -> LAYER_SENTENCE.equals(n.getLayer())).collect(Collectors.toList()); for (LxfNode sn : sentenceNodes) { int[] region = aLxf.getRegion(sn.getLinks().get(0).get(0)).getAnchors(); Sentence sentence = new Sentence(aJCas, region[0], region[1]); // label feature on sentence seems redundant because tokens also have it // token.setForm(s.getFeature(FEAT_LABEL)); sentence.addToIndexes(); } // Convert tokens Map<String, Token> idxToken = new HashMap<>(); List<LxfNode> tokenNodes = aLxf.getNodes().stream() .filter(n -> LAYER_TOKEN.equals(n.getLayer())).collect(Collectors.toList()); for (LxfNode tn : tokenNodes) { int[] region = aLxf.getRegion(tn.getLinks().get(0).get(0)).getAnchors(); Token token = new Token(aJCas, region[0], region[1]); token.setText(tn.getFeature(FEAT_LABEL)); token.addToIndexes(); idxToken.put(tn.getId(), token); } // Convert morphology (pos, lemma) List<LxfNode> posNodes = aLxf.getNodes().stream() .filter(n -> LAYER_MORPHOLOGY.equals(n.getLayer())).collect(Collectors.toList()); for (LxfNode pn : posNodes) { // We assume that if there is a POS it must be attached to exactly one token node String tokenId = aLxf.getEdges(pn, LAYER_TOKEN).get(0).getTo()[1]; Token token = idxToken.get(tokenId); // Convert POS if pos feature is set if (pn.getFeature(FEAT_POS) != null) { POS pos = new POS(aJCas, token.getBegin(), token.getEnd()); pos.setPosValue(pn.getFeature(FEAT_POS)); pos.addToIndexes(); token.setPos(pos); } // Convert Lemma if lemma feature is set if (pn.getFeature(FEAT_LEMMA) != null) { Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); lemma.setValue(pn.getFeature(FEAT_LEMMA)); lemma.addToIndexes(); token.setLemma(lemma); } } // Convert dependencies List<LxfNode> dependencyNodes = aLxf.getNodes().stream() .filter(n -> LAYER_DEPENDENCY.equals(n.getLayer())).collect(Collectors.toList()); for (LxfNode dn : dependencyNodes) { // We assume that if there is a dependency it must be attached to exactly one governor // and one dependent List<LxfEdge> govEdges = aLxf.getEdges(LAYER_MORPHOLOGY, dn); List<LxfEdge> depEdges = aLxf.getEdges(dn, LAYER_MORPHOLOGY); LxfNode govMorphNode = govEdges.isEmpty() ? null : aLxf.getNode(govEdges.get(0).getFrom()); LxfNode depMorphNode = depEdges.isEmpty() ? null : aLxf.getNode(depEdges.get(0).getTo()); // We assume that the gov and dep nodes are attached each to exactly one token Token govToken; Token depToken; try { govToken = govMorphNode != null ? idxToken.get(aLxf.getEdges(govMorphNode, LAYER_TOKEN).get(0).getTo()[1]) : null; depToken = depMorphNode != null ? idxToken.get(aLxf.getEdges(depMorphNode, LAYER_TOKEN).get(0).getTo()[1]) : null; } catch (IndexOutOfBoundsException e) { // Ok, so looks like somebody forgot to link the POS to the tokens... let's see // if we can recover from that somehow, e.g. by going over the indexes. govToken = govMorphNode != null ? idxToken.get(String.format("repp-n%d@1", govMorphNode.getIndex() + 1)) : null; depToken = depMorphNode != null ? idxToken.get(String.format("repp-n%d@1", depMorphNode.getIndex() + 1)) : null; } // Create dependency relation according to DKPro Core conventions if (depToken != null && govToken != null) { Dependency dep = new Dependency(aJCas); dep.setDependencyType(dn.getFeature(FEAT_LABEL)); dep.setFlavor(DependencyFlavor.BASIC); dep.setGovernor(govToken); dep.setDependent(depToken); dep.setBegin(dep.getDependent().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.addToIndexes(); } else if (depToken != null && govToken == null) { Dependency dep = new ROOT(aJCas); dep.setDependencyType("ROOT"); dep.setFlavor(DependencyFlavor.BASIC); dep.setGovernor(depToken); dep.setDependent(depToken); dep.setBegin(dep.getGovernor().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.addToIndexes(); } else { throw new IllegalStateException("Illegal dependency relation."); } } } }