/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.data; import org.wikipediacleaner.api.constants.EnumWikipedia; /** * Class containing information about a complete internal link ([[link#anchor|text]]). */ public class PageElementInternalLink extends PageElement { private final String linkNotTrimmed; private final String link; private final String anchorNotTrimmed; private final String anchor; private final String textNotTrimmed; private final String text; private final int textOffset; /** * Analyze contents to check if it matches an internal link. * * @param wiki Wiki. * @param contents Contents. * @param index Block start index. * @return Block details it there's a block. */ public static PageElementInternalLink analyzeBlock( EnumWikipedia wiki, String contents, int index) { // Verify arguments if (contents == null) { return null; } // Look for '[[' int tmpIndex = index; if ((tmpIndex >= contents.length()) || (!contents.startsWith("[[", tmpIndex))) { return null; } tmpIndex += 2; int beginIndex = tmpIndex; // Retrieve link int anchorIndex = -1; int pipeIndex = -1; int endIndex = -1; int level3CurlyBrackets = 0; while ((tmpIndex < contents.length()) && (endIndex < 0) && (pipeIndex < 0)) { switch (contents.charAt(tmpIndex)) { case '[': return null; case ']': if ((level3CurlyBrackets == 0) && (contents.startsWith("]]", tmpIndex))) { endIndex = tmpIndex; tmpIndex++; } else { return null; } break; case '{': if (contents.startsWith("{{{", tmpIndex)) { level3CurlyBrackets++; tmpIndex += 2; } else { return null; } break; case '}': if ((contents.startsWith("}}}", tmpIndex)) && (level3CurlyBrackets > 0)) { level3CurlyBrackets--; tmpIndex += 2; } else { return null; } break; case '#': if ((level3CurlyBrackets == 0) && (anchorIndex < 0)) { anchorIndex = tmpIndex; } break; case '|': if (level3CurlyBrackets == 0) { pipeIndex = tmpIndex; } } tmpIndex++; } // Retrieve link text if (endIndex < 0) { int level1Brackets = 0; int level2CurlyBrackets = 0; level3CurlyBrackets = 0; while ((tmpIndex < contents.length()) && (endIndex < 0)) { switch (contents.charAt(tmpIndex)) { case '[': if (contents.startsWith("[[", tmpIndex)) { return null; } level1Brackets++; break; case ']': if (contents.startsWith("]]]", tmpIndex)) { if (level1Brackets > 0) { level1Brackets--; } else { endIndex = tmpIndex; tmpIndex++; } } else if (contents.startsWith("]]", tmpIndex)) { endIndex = tmpIndex; tmpIndex++; } else if (level1Brackets > 0) { level1Brackets--; } break; case '{': if (contents.startsWith("{{{", tmpIndex)) { level3CurlyBrackets++; tmpIndex += 2; } else if (contents.startsWith("{{", tmpIndex)) { level2CurlyBrackets++; tmpIndex++; } break; case '}': if ((contents.startsWith("}}}", tmpIndex)) && (level3CurlyBrackets > 0)) { level3CurlyBrackets--; tmpIndex += 2; } else if ((contents.startsWith("}}", tmpIndex)) && (level2CurlyBrackets > 0)) { level2CurlyBrackets--; tmpIndex++; } break; } tmpIndex++; } } if (endIndex < 0) { return null; } // Extract link elements String link = null; String anchor = null; String text = null; int textOffset = -1; if ((pipeIndex >= 0) && (pipeIndex < endIndex)) { if ((anchorIndex >= 0) && (anchorIndex < pipeIndex)) { link = contents.substring(beginIndex, anchorIndex); anchor = contents.substring(anchorIndex + 1, pipeIndex); } else { link = contents.substring(beginIndex, pipeIndex); } text = contents.substring(pipeIndex + 1, endIndex); textOffset = pipeIndex + 1 - index; } else if ((anchorIndex >= 0) && (anchorIndex < endIndex)) { link = contents.substring(beginIndex, anchorIndex); anchor = contents.substring(anchorIndex + 1, endIndex); } else { link = contents.substring(beginIndex, endIndex); } // Check that it is really an internal link String linkTrimmed = link.trim(); int colonIndex = linkTrimmed.indexOf(':'); if (colonIndex > 0) { String namespaceName = linkTrimmed.substring(0, colonIndex); // Is it a category ? Namespace category = wiki.getWikiConfiguration().getNamespace(Namespace.CATEGORY); if ((category != null) && (category.isPossibleName(namespaceName))) { return null; } // Is it a file / image ? Namespace image = wiki.getWikiConfiguration().getNamespace(Namespace.IMAGE); if ((image != null) && (image.isPossibleName(namespaceName))) { return null; } // Is it a language link ? if (Language.isLanguageCode( wiki.getWikiConfiguration().getLanguages(), namespaceName)) { return null; } } // Is it an interwiki ? if (colonIndex >= 0) { String namespaceName = null; if (colonIndex == 0) { if (linkTrimmed.length() > 1) { colonIndex = linkTrimmed.indexOf(':', 1); if (colonIndex > 1) { namespaceName = linkTrimmed.substring(1, colonIndex); } } } else { namespaceName = linkTrimmed.substring(0, colonIndex); } if ((namespaceName != null) && (wiki.getWikiConfiguration() != null) && (wiki.getWikiConfiguration().getInterwikis() != null)) { for (Interwiki iw : wiki.getWikiConfiguration().getInterwikis()) { if (iw.getPrefix().equals(namespaceName)) { return null; } } } } // Check that this is not an external link between double square brackets if (PageElementExternalLink.isPossibleProtocol(linkTrimmed, 0)) { return null; } // Create internal link return new PageElementInternalLink( wiki, index, endIndex + 2, link, anchor, text, textOffset); } public String getLink() { return link; } public String getLinkNotNormalized() { return (linkNotTrimmed != null) ? linkNotTrimmed.trim() : null; } public String getAnchor() { return anchor; } public String getFullLink() { if (anchor == null) { return link; } return link + "#" + anchor; } public String getText() { return text; } public int getTextOffset() { return textOffset; } public String getDisplayedText() { if (text != null) { return text; } if (anchor == null) { return linkNotTrimmed; } return linkNotTrimmed + "#" + anchorNotTrimmed; } public String getDisplayedTextNotTrimmed() { if (textNotTrimmed != null) { return textNotTrimmed; } if (anchor == null) { return linkNotTrimmed; } return linkNotTrimmed + "#" + anchorNotTrimmed; } private PageElementInternalLink( EnumWikipedia wikipedia, int beginIndex, int endIndex, String link, String anchor, String text, int textOffset) { super(beginIndex, endIndex); this.linkNotTrimmed = link; this.link = (link != null) ? wikipedia.normalizeTitle(link) : null; this.anchorNotTrimmed = anchor; this.anchor = (anchor != null) ? anchor.trim() : null; this.textNotTrimmed = text; this.text = (text != null) ? text.trim() : null; this.textOffset = textOffset; } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { return createInternalLink(linkNotTrimmed, anchorNotTrimmed, textNotTrimmed); } /** * Create an internal link. * * @param link Link. * @param text Displayed text. * @return Internal link. */ public static String createInternalLink(String link, String text) { return createInternalLink(link, null, text); } /** * Create an internal link. * * @param link Link. * @param anchor Anchor * @param text Displayed text. * @return Internal link. */ public static String createInternalLink(String link, String anchor, String text) { StringBuilder sb = new StringBuilder(); sb.append("[["); String fullLink = null; if ((link != null) || (anchor != null)) { fullLink = ((link != null) ? link.trim() : "") + ((anchor != null) ? ("#" + anchor.trim()) : ""); } if (text != null) { if ((fullLink != null) && (!Page.areSameTitle(fullLink, text))) { sb.append(fullLink); sb.append("|"); } sb.append(text.trim()); } else { sb.append(fullLink); } sb.append("]]"); return sb.toString(); } }