/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.data; import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.List; /** * Management of non wiki text areas. */ public class PageElementAreas { /** Flag to activate areas checking */ private static boolean CHECK_AREAS = false; /** List of non wiki text areas */ private final List<Area> areas; /** * Initialize areas. */ public PageElementAreas() { areas = new LinkedList<PageElementAreas.Area>(); } /** * @return List of areas. */ public List<Area> getAreas() { return Collections.unmodifiableList(areas); } /** * @param index Current index. * @return First index after area. */ public int getEndArea(int index) { for (Area area : areas) { if (area.beginIndex > index) { return index; } if (area.endIndex > index) { return area.endIndex; } } return index; } /** * Add comments to non wiki text areas. * * @param comments List of comments. */ public void addComments(List<PageElementComment> comments) { addPageElements(comments); } /** * Add tags to non wiki text areas. * @param tags List of tags. */ public void addTags(List<PageElementTag> tags) { if (tags != null) { for (PageElementTag tag : tags) { if (!tag.isFullTag() && (PageElementTag.TAG_WIKI_NOWIKI.equals(tag.getNormalizedName()) || PageElementTag.TAG_WIKI_MATH.equals(tag.getNormalizedName()) || PageElementTag.TAG_WIKI_MATH_CHEM.equals(tag.getNormalizedName()) || PageElementTag.TAG_WIKI_PRE.equals(tag.getNormalizedName()) || PageElementTag.TAG_WIKI_SOURCE.equals(tag.getNormalizedName()) || PageElementTag.TAG_WIKI_SYNTAXHIGHLIGHT.equals(tag.getNormalizedName()))) { if (!tag.isEndTag() || !tag.isComplete()) { addArea(tag.getCompleteBeginIndex(), tag.getCompleteEndIndex()); } } else { addArea(tag.getBeginIndex(), tag.getEndIndex()); } } } } /** * Add internal links to non wiki text areas. * * @param links List of internal links. */ public void addInternalLinks(List<PageElementInternalLink> links) { if (links != null) { for (PageElementInternalLink link : links) { int beginIndex = link.getBeginIndex(); int endIndex = link.getEndIndex(); if (link.getText() != null) { addArea(beginIndex, beginIndex + link.getTextOffset()); addArea(endIndex - 2, endIndex); } else { addArea(beginIndex, endIndex); } } } } /** * Add images to non wiki text areas. * * @param images List of images. */ public void addImages(List<PageElementImage> images) { if (images != null) { for (PageElementImage image : images) { int beginIndex = image.getBeginIndex(); int endIndex = image.getEndIndex(); if ((image.getFirstPipeOffset() < 0) || (image.getParameters() == null)) { addArea(beginIndex, endIndex); } else { addArea(beginIndex, beginIndex + image.getFirstPipeOffset() + 1); for (PageElementImage.Parameter param : image.getParameters()) { MagicWord magicWord = param.getMagicWord(); if (magicWord != null) { if (MagicWord.IMG_ALT.equals(magicWord.getName())) { int equalIndex = param.getContents().indexOf("="); if (equalIndex < 0) { addArea( beginIndex + param.getBeginOffset(), beginIndex + param.getEndOffset() + 1); } else { addArea( beginIndex + param.getBeginOffset(), beginIndex + param.getBeginOffset() + equalIndex + 1); } } else { addArea( beginIndex + param.getBeginOffset(), beginIndex + param.getEndOffset() + 1); } } } } } } } /** * Add categories to non wiki text areas. * * @param categories List of categories. */ public void addCategories(List<PageElementCategory> categories) { addPageElements(categories); } /** * Add interwiki links to non wiki text areas. * * @param links List of interwiki links. */ public void addInterwikiLinks(List<PageElementInterwikiLink> links) { if (links != null) { for (PageElementInterwikiLink link : links) { int beginIndex = link.getBeginIndex(); int endIndex = link.getEndIndex(); if (link.getText() != null) { addArea(beginIndex, beginIndex + link.getTextOffset()); addArea(endIndex - 2, endIndex); } else { addArea(beginIndex, endIndex); } } } } /** * Add language links to non wiki text areas. * * @param links List of language links. */ public void addLanguageLinks(List<PageElementLanguageLink> links) { addPageElements(links); } /** * Add ISBNs to non wiki text areas. * * @param isbns List of ISBNs. */ public void addISBN(List<PageElementISBN> isbns) { addPageElements(isbns); } /** * Add ISSNs to non wiki text areas. * * @param issns List of ISSNs. */ public void addISSN(List<PageElementISSN> issns) { addPageElements(issns); } /** * Add PMIDs to non wiki text areas. * * @param pmids List of PMIDs. */ public void addPMID(List<PageElementPMID> pmids) { addPageElements(pmids); } /** * Add RFCs to non wiki text areas. * * @param rfcs List of RFCs. */ public void addRFC(List<PageElementRFC> rfcs) { addPageElements(rfcs); } /** * Add templates to non wiki text areas. * * @param templates List of templates. */ public void addTemplates(List<PageElementTemplate> templates) { if (templates != null) { for (PageElementTemplate template : templates) { int beginIndex = template.getBeginIndex(); int endIndex = template.getEndIndex(); if (template.getParameterCount() > 0) { if (template.getParameterName(0) != null) { addArea(beginIndex, template.getParameterNameStartIndex(0)); } else { addArea(beginIndex, template.getParameterValueStartIndex(0)); } for (int numParam = 0; numParam < template.getParameterCount(); numParam++) { String paramName = template.getParameterName(numParam); if ((paramName != null) && (paramName.length() > 0)) { addArea( template.getParameterPipeIndex(numParam), template.getParameterValueStartIndex(numParam)); } else { int pipeIndex = template.getParameterPipeIndex(numParam); addArea(pipeIndex, pipeIndex + 1); } } addArea(endIndex - 2, endIndex); } else { addArea(beginIndex, endIndex); } } } } /** * Add functions to non wiki text areas. * * @param functions List of functions. */ public void addFunctions(List<PageElementFunction> functions) { if (functions != null) { for (PageElementFunction function : functions) { int beginIndex = function.getBeginIndex(); int endIndex = function.getEndIndex(); if (function.getParameterCount() > 1) { addArea(beginIndex, function.getParameterSeparatorOffset(0) + 1); for (int numParam = 1; numParam < function.getParameterCount(); numParam++) { int separatorIndex = function.getParameterSeparatorOffset(numParam); addArea(separatorIndex, separatorIndex + 1); } addArea(endIndex - 2, endIndex); } else { addArea(beginIndex, endIndex); } } } } /** * Add magic words to non wiki text areas. * * @param magicWords List of magic words. */ public void addMagicWords(List<PageElementMagicWord> magicWords) { addPageElements(magicWords); } /** * Add parameters to non wiki text areas. * * @param parameters List of parameters. */ public void addParameters(List<PageElementParameter> parameters) { addPageElements(parameters); } /** * Add titles to non wiki text areas. * * @param titles Titles. */ public void addTitles(List<PageElementTitle> titles) { if (titles != null) { for (PageElementTitle title : titles) { int beginIndex = title.getBeginIndex(); int endIndex = title.getEndIndex(); int after = (title.getAfterTitle() != null) ? title.getAfterTitle().length() : 0; addArea(beginIndex, beginIndex + title.getFirstLevel()); addArea(endIndex - after - title.getSecondLevel(), endIndex - after); } } } /** * Add external links to non wiki text areas. * * @param links Links. */ public void addExternalLinks(List<PageElementExternalLink> links) { if (links != null) { for (PageElementExternalLink link : links) { int beginIndex = link.getBeginIndex(); int endIndex = link.getEndIndex(); if (link.getText() != null) { addArea(beginIndex, beginIndex + link.getTextOffset()); if (link.hasSquare()) { addArea(endIndex - 1, endIndex); } } else { addArea(beginIndex, endIndex); } } } } /** * Add elements to non wiki text areas. * * @param elements List of elements. */ private void addPageElements(List<? extends PageElement> elements) { if (elements != null) { for (PageElement element : elements) { addArea(element.getBeginIndex(), element.getEndIndex()); } } } /** * Add an area to the list of non wiki text areas. * @param beginIndex Begin index. * @param endIndex End index. */ private void addArea(int beginIndex, int endIndex) { Iterator<PageElementAreas.Area> itArea = areas.iterator(); int currentIndex = 0; while (itArea.hasNext()) { Area area = itArea.next(); if (beginIndex <= area.endIndex) { if (endIndex < area.beginIndex) { areas.add(currentIndex, new Area(beginIndex, endIndex)); if (CHECK_AREAS) { checkAreas(); } return; } area.beginIndex = Math.min(area.beginIndex, beginIndex); if (endIndex <= area.endIndex) { return; } area.endIndex = endIndex; while (itArea.hasNext()) { Area tmpArea = itArea.next(); if (tmpArea.beginIndex > endIndex) { if (CHECK_AREAS) { checkAreas(); } return; } area.endIndex = Math.max(area.endIndex, tmpArea.endIndex); itArea.remove(); } if (CHECK_AREAS) { checkAreas(); } return; } currentIndex++; } areas.add(new Area(beginIndex, endIndex)); if (CHECK_AREAS) { checkAreas(); } } /** * Internal checking of the areas. */ public void checkAreas() { int previousEnd = -1; for (PageElementAreas.Area area : areas) { if (area.beginIndex >= area.endIndex) { System.err.println("Error " + area); } if (previousEnd >= area.beginIndex) { System.err.println("Error " + area + "/" + previousEnd); } previousEnd = area.endIndex; } } public void printAreas(String text) { System.err.println("Areas " + text + " :"); for (Area area : areas) { System.err.println(" " + area.getBeginIndex() + "->" + area.getEndIndex()); } } /** * Utility class for memorizing an area. */ public static class Area { /** * Begin index of the area. */ int beginIndex; /** * End index of the area. */ int endIndex; /** * @param beginIndex Begin index. * @param endIndex End index. */ Area(int beginIndex, int endIndex) { this.beginIndex = beginIndex; this.endIndex = endIndex; } /** * @return Begin index. */ public int getBeginIndex() { return beginIndex; } /** * @return End index. */ public int getEndIndex() { return endIndex; } /** * @return Textual description. * @see java.lang.Object#toString() */ @Override public String toString() { return "Area: " + beginIndex + "->" + endIndex; } } }