/* * Copyright 2011 Corpuslinguistic working group Humboldt University Berlin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.utils; import annis.CommonHelper; import static annis.model.AnnisConstants.ANNIS_NS; import static annis.model.AnnisConstants.FEAT_MATCHEDIDS; import static annis.model.AnnisConstants.FEAT_RELANNIS_NODE; import annis.model.AnnisNode; import annis.model.Annotation; import annis.model.AnnotationGraph; import annis.model.Edge; import annis.model.Edge.EdgeType; import annis.model.RelannisEdgeFeature; import annis.model.RelannisNodeFeature; import annis.service.ifaces.AnnisResultSet; import annis.service.objects.AnnisResultImpl; import annis.service.objects.AnnisResultSetImpl; import annis.service.objects.Match; import com.google.common.base.Preconditions; import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.corpus_tools.salt.common.SCorpusGraph; import org.corpus_tools.salt.common.SDocument; import org.corpus_tools.salt.common.SDocumentGraph; import org.corpus_tools.salt.common.SDominanceRelation; import org.corpus_tools.salt.common.SPointingRelation; import org.corpus_tools.salt.common.SSequentialDS; import org.corpus_tools.salt.common.SSpanningRelation; import org.corpus_tools.salt.common.SToken; import org.corpus_tools.salt.common.SaltProject; import org.corpus_tools.salt.core.SAnnotation; import org.corpus_tools.salt.core.SFeature; import org.corpus_tools.salt.core.SLayer; import org.corpus_tools.salt.core.SNode; import org.corpus_tools.salt.core.SRelation; import org.corpus_tools.salt.util.DataSourceSequence; import org.corpus_tools.salt.SALT_TYPE; import org.corpus_tools.salt.util.SaltUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This class can convert the current Salt graph model into the legacy model * AOM (Annis Object Model) * and * "PaulaInline" * @author Thomas Krause <krauseto@hu-berlin.de> */ public class LegacyGraphConverter { private final static Logger log = LoggerFactory.getLogger(LegacyGraphConverter.class ); public static AnnisResultSet convertToResultSet(SaltProject p) { List<AnnotationGraph> annotationGraphs = convertToAOM(p); AnnisResultSetImpl annisResultSet = new AnnisResultSetImpl(); for (AnnotationGraph annotationGraph : annotationGraphs) { annisResultSet.add(new AnnisResultImpl(annotationGraph)); } return annisResultSet; } public static List<AnnotationGraph> convertToAOM(SaltProject p) { List<AnnotationGraph> result = new ArrayList<AnnotationGraph>(); if(p != null) { for (SCorpusGraph corpusGraph : p.getCorpusGraphs()) { for (SDocument doc : corpusGraph.getDocuments()) { result.add(convertToAnnotationGraph(doc)); } } } return result; } public static AnnotationGraph convertToAnnotationGraph(SDocument document) { SDocumentGraph docGraph = document.getDocumentGraph(); SFeature featMatchedIDs = docGraph.getFeature(ANNIS_NS, FEAT_MATCHEDIDS); Match match = new Match(); if (featMatchedIDs != null && featMatchedIDs.getValue_STEXT() != null) { match = Match.parseFromString(featMatchedIDs.getValue_STEXT(), ','); } // get matched node names by using the IDs List<Long> matchedNodeIDs = new ArrayList<>(); for(URI u : match.getSaltIDs()) { SNode node = docGraph.getNode(u.toASCIIString()); if(node == null) { // that's weird, fallback to the id log.warn("Could not get matched node from id {}", u.toASCIIString()); matchedNodeIDs.add(-1l); } else { RelannisNodeFeature relANNISFeat = (RelannisNodeFeature) node.getFeature( SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue(); matchedNodeIDs.add(relANNISFeat.getInternalID()); } } AnnotationGraph result = convertToAnnotationGraph(docGraph, matchedNodeIDs); return result; } public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) { Set<Long> matchSet = new HashSet<>(matchedNodeIDs); AnnotationGraph annoGraph = new AnnotationGraph(); List<String> pathList = CommonHelper.getCorpusPath( docGraph.getDocument().getGraph(), docGraph.getDocument()); annoGraph.setPath(pathList.toArray(new String[pathList.size()])); annoGraph.setDocumentName(docGraph.getDocument().getName()); Map<SNode, AnnisNode> allNodes = new HashMap<>(); for (SNode sNode : docGraph.getNodes()) { SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)); if (featNodeRaw != null) { RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue(); long internalID = featNode.getInternalID(); AnnisNode aNode = new AnnisNode(internalID); for (SAnnotation sAnno : sNode.getAnnotations()) { aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT())); } aNode.setName(sNode.getName()); Set<SLayer> layers = sNode.getLayers(); if(!layers.isEmpty()) { aNode.setNamespace(layers.iterator().next().getName()); } RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature( SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue(); if (sNode instanceof SToken) { List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION); if (seqList != null) { DataSourceSequence seq = seqList.get(0); Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId()); SSequentialDS seqDS = seq.getDataSource(); Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId()); Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId()); String seqDSData = (String) seqDS.getData(); Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId()); Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId()); Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId()); int start = seq.getStart().intValue(); int end = seq.getEnd().intValue(); Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end); String spannedText = seqDSData.substring(start, end); Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end); aNode.setSpannedText(spannedText); aNode.setToken(true); aNode.setTokenIndex(feat.getTokenIndex()); } } else { aNode.setToken(false); aNode.setTokenIndex(null); } aNode.setCorpus(feat.getCorpusRef()); aNode.setTextId(feat.getTextRef()); aNode.setLeft(feat.getLeft()); aNode.setLeftToken(feat.getLeftToken()); aNode.setRight(feat.getRight()); aNode.setRightToken(feat.getRightToken()); if (matchSet.contains(aNode.getId())) { aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1); annoGraph.getMatchedNodeIds().add(aNode.getId()); } else { aNode.setMatchedNodeInQuery(null); } annoGraph.addNode(aNode); allNodes.put(sNode, aNode); } } for (SRelation rel : docGraph.getRelations()) { RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel); if (featRelation != null) { addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph); } } // add relations with empty relation name for every dominance relation List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations()); for(SDominanceRelation rel : dominanceRelations) { RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel); if(featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) { addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph); } } return annoGraph; } private static void addRelation(SRelation<? extends SNode, ? extends SNode> rel, long pre, long componentID, Map<SNode, AnnisNode> allNodes, AnnotationGraph annoGraph) { addRelation(rel.getClass(), rel.getType(), rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), pre, componentID, allNodes, annoGraph); } private static void addRelation( Class<? extends SRelation> clazz, String type, Collection<SAnnotation> annotations, SNode source, SNode target, Set<SLayer> relLayers, long pre, long componentID, Map<SNode, AnnisNode> allNodes, AnnotationGraph annoGraph) { Edge aEdge = new Edge(); aEdge.setSource(allNodes.get(source)); aEdge.setDestination(allNodes.get(target)); aEdge.setEdgeType(EdgeType.UNKNOWN); aEdge.setPre(pre); aEdge.setComponentID(componentID); if(!relLayers.isEmpty()) { aEdge.setNamespace(relLayers.iterator().next().getName()); } aEdge.setName(type); if (SDominanceRelation.class.isAssignableFrom(clazz)) { aEdge.setEdgeType(EdgeType.DOMINANCE); } else if (SPointingRelation.class.isAssignableFrom(clazz)) { aEdge.setEdgeType(EdgeType.POINTING_RELATION); } else if (SSpanningRelation.class.isAssignableFrom(clazz)) { aEdge.setEdgeType(EdgeType.COVERAGE); } for (SAnnotation sAnno : annotations) { aEdge.addAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT())); } annoGraph.addEdge(aEdge); aEdge.getDestination().addIncomingEdge(aEdge); if(aEdge.getSource() != null) { aEdge.getSource().addOutgoingEdge(aEdge); } } }