/* * Copyright 2013 SFB 632. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.libgui; import annis.libgui.visualizers.VisualizerInput; import annis.model.AnnisConstants; import static annis.model.AnnisConstants.ANNIS_NS; import static annis.model.AnnisConstants.FEAT_RELANNIS_NODE; import annis.model.RelannisNodeFeature; import java.util.List; import java.util.Properties; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.corpus_tools.salt.common.SDocumentGraph; import org.corpus_tools.salt.common.SSpan; import org.corpus_tools.salt.core.SAnnotation; import org.corpus_tools.salt.core.SLayer; import org.corpus_tools.salt.core.SNode; import org.corpus_tools.salt.util.SaltUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Helps to extract page number annotations from {@link SSpan} of a salt * document. * * <p>It uses the following algorithm:</p> * <ul> * <li>Get all spans which are annoteted with a page number.</li> * <li>Create intervalls left and right token index of the annis model with the * help of SFeatures and {@link AnnisConstants} and build a mapping from these * intervalls to the sspan.</li> * <li>Get the best fitting intervall for a specific span.</li> * <ul> * * * @author Benjamin Weißenfels <b.pixeldrama@gmail.com> */ public class PDFPageHelper { private static final Logger log = LoggerFactory.getLogger(PDFPageHelper.class); public static final String MAPPING_PAGE_KEY = "pdf_page_key"; public static final String DEFAULT_PAGE_NUMBER_ANNOTATION_NAME = "page"; public static final String PAGE_NUMBER_SEPERATOR = "-"; public static final String PAGE_NO_VALID_NUMBER = "-1"; private SortedMap<Integer, TreeMap<Integer, SSpan>> sspans = new TreeMap<Integer, TreeMap<Integer, SSpan>>(); private VisualizerInput input; public PDFPageHelper(VisualizerInput visInput) { this.input = visInput; getAllSSpanWithPageNumber(visInput.getDocument().getDocumentGraph()); } /** * Returns a page annotation for a span, if the span is overlapped by a page * annotation. */ public String getPageAnnoForGridEvent(SSpan span) { int left = getLeftIndexFromSNode(span); int right = getRightIndexFromSNode(span); if (sspans == null) { log.warn("no page annos found"); return null; } // lookup left index int leftIdx = -1; for (Integer i : sspans.keySet()) { if (i <= left) { leftIdx = i; } } if (leftIdx == -1) { log.debug("no left index found"); return null; } // lookup right key int rightIdx = -1; for (Integer i : sspans.get(leftIdx).keySet()) { if (i >= right) { rightIdx = i; } } if (rightIdx == -1) { log.debug("no right index found"); return null; } return getPageFromAnnotation(span); } /** * Returns the value of page annotiation for a node. It takes the visualizer * mappings into account. If no mapping is used, this definition is used: {@link * #PAGE_NUMBER_ANNOATATION_NAME} * */ public String getPageFromAnnotation(SNode node) { if (node != null && node.getAnnotations() != null) { Set<SLayer> layers = node.getLayers(); String nodeNamespace = null; if(layers != null) { for (SLayer l : layers) { nodeNamespace = l.getName(); } for (SAnnotation anno : node.getAnnotations()) { if ((nodeNamespace == null || input.getNamespace() == null) && getPDFPageAnnotationName().equals(anno.getName())) { return anno.getValue_STEXT(); } else if (nodeNamespace.equals(input.getNamespace()) && getPDFPageAnnotationName().equals(anno.getName())) { return anno.getValue_STEXT(); } } } } return null; } private void getAllSSpanWithPageNumber( SDocumentGraph graph) { if (graph == null) { log.error("could not get page annos from empty graph"); return; } List<SSpan> sSpans = graph.getSpans(); if (sSpans != null) { for (SSpan s : sSpans) { Set<SAnnotation> sAnnotations = s.getAnnotations(); if (sAnnotations != null) { for (SAnnotation anno : sAnnotations) { // TODO support mappings of resolver vis map if (getPDFPageAnnotationName().equals(anno.getName())) { int leftIdx = getLeftIndexFromSNode(s); int rightIdx = getRightIndexFromSNode(s); if (sspans.containsKey(leftIdx)) { if (sspans.get(leftIdx).containsKey(rightIdx)) { log.warn("an intervall {}-{} is overrided by: {}", s); } sspans.get(leftIdx).put(rightIdx, s); } else { sspans.put(leftIdx, new TreeMap<Integer, SSpan>()); sspans.get(leftIdx).put(rightIdx, s); } } } } } } } /** * Get the most left token index of a SSpan. * */ public int getLeftIndexFromSNode(SSpan s) { RelannisNodeFeature feat = (RelannisNodeFeature) s.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue(); return (int) feat.getLeftToken(); } /** * Get the most right token index of a SSpan. * */ public int getRightIndexFromSNode(SSpan s) { RelannisNodeFeature feat = (RelannisNodeFeature) s.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue_SOBJECT(); return (int) feat.getRightToken(); } /** * Gets the pdf page annotation name. It takes into acount the mappings * defined in {@link VisualizerInput#mappings}. * */ public String getPDFPageAnnotationName() { Properties mappings = input.getMappings(); if (mappings != null) { return mappings.getProperty(MAPPING_PAGE_KEY, DEFAULT_PAGE_NUMBER_ANNOTATION_NAME); } return DEFAULT_PAGE_NUMBER_ANNOTATION_NAME; } /** * Creates a String (eg. <b>3-9</b> or <b>3</b>), based on the most left and * most right page annotation. * * <p>The page annotation is detected with * {@link #getPageFromAnnotation(de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SSpan)}</p> * * @return A String which represents the start and the end page of a pdf, * seperated by {@link #PAGE_NUMBER_SEPERATOR}. If there is no end page, or * exactly one page annotation, only a String with one number is returned. */ public String getMostLeftAndMostRightPageAnno() { if (sspans == null || sspans.isEmpty()) { return null; } TreeMap<Integer, SSpan> rightTokIdxToSSpan = sspans.get(sspans.firstKey()); SSpan leftSpan = rightTokIdxToSSpan.get(rightTokIdxToSSpan.firstKey()); SSpan rightSpan = null; Integer rightIdx = null; for (Integer leftIdxKey : sspans.keySet()) { for (Integer rightIdxKey : sspans.get(leftIdxKey).keySet()) { if (rightIdx == null || rightIdx <= rightIdxKey) { rightIdx = rightIdxKey; rightSpan = sspans.get(leftIdxKey).get(rightIdx); } } } if (rightIdx != null) { return getPageFromAnnotation(leftSpan) + PAGE_NUMBER_SEPERATOR + getPageFromAnnotation(rightSpan); } return getPageFromAnnotation(leftSpan); } }