/* * WPCleaner: A tool to help on Wikipedia maintenance tasks. * Copyright (C) 2013 Nicolas Vervelle * * See README.txt file for licensing information. */ package org.wikipediacleaner.api.data; import java.util.List; import org.wikipediacleaner.api.constants.EnumWikipedia; /** * Class containing information about a complete interwiki link ([[iw:link#anchor|text]]). */ public class PageElementInterwikiLink extends PageElement { private final Interwiki interwiki; private final String interwikiTextNotTrimmed; private final String interwikiText; private final String linkNotTrimmed; private final String link; private final String anchorNotTrimmed; private final String anchor; private final String textNotTrimmed; private final String text; private final int textOffset; /** * Analyze contents to check if it matches an internal link. * * @param wikipedia Wikipedia. * @param contents Contents. * @param index Block start index. * @return Block details it there's a block. */ public static PageElementInterwikiLink analyzeBlock( EnumWikipedia wikipedia, String contents, int index) { // Verify arguments if (contents == null) { return null; } // Look for '[[' int tmpIndex = index; if ((tmpIndex >= contents.length()) || (!contents.startsWith("[[", tmpIndex))) { return null; } tmpIndex += 2; int beginIndex = tmpIndex; // Possible whitespace characters while ((tmpIndex < contents.length()) && (contents.charAt(tmpIndex) == ' ')) { tmpIndex++; } // Possible colon at the beginning boolean beginWithColon = false; if ((tmpIndex < contents.length()) && (contents.charAt(tmpIndex) == ':')) { beginWithColon = true; tmpIndex++; beginIndex = tmpIndex; while ((tmpIndex < contents.length()) && (contents.charAt(tmpIndex) == ' ')) { tmpIndex++; } } // Find elements of interwiki link if (tmpIndex >= contents.length()) { return null; } int endIndex = contents.indexOf("]]", tmpIndex); if (endIndex < 0) { return null; } int colonIndex = PageContents.findCharacter(contents, ':', tmpIndex, endIndex); if (colonIndex < 0) { return null; } String interwikiText = contents.substring(beginIndex, colonIndex); List<Interwiki> interwikis = wikipedia.getWikiConfiguration().getInterwikis(); if (interwikis == null) { return null; } Interwiki interwiki = null; for (Interwiki tmpInterwiki : interwikis) { if ((tmpInterwiki != null) && ((tmpInterwiki.getLanguage() == null) || (tmpInterwiki.getLanguage().length() == 0) || beginWithColon) && (interwikiText.equals(tmpInterwiki.getPrefix()))) { if (interwiki == null) { interwiki = tmpInterwiki; } else if (tmpInterwiki.getLanguage().length() < interwiki.getLanguage().length()) { interwiki = tmpInterwiki; } } } if (interwiki == null) { return null; } int anchorIndex = PageContents.findCharacter(contents, '#', tmpIndex, endIndex); int pipeIndex = PageContents.findCharacter(contents, '|', tmpIndex, endIndex); // Create interwiki link if ((pipeIndex >= 0) && (pipeIndex < endIndex)) { if ((anchorIndex >= 0) && (anchorIndex < pipeIndex)) { return new PageElementInterwikiLink( index, endIndex + 2, interwiki, interwikiText, contents.substring(colonIndex + 1, anchorIndex), contents.substring(anchorIndex + 1, pipeIndex), contents.substring(pipeIndex + 1, endIndex), pipeIndex + 1 - index); } return new PageElementInterwikiLink( index, endIndex + 2, interwiki, interwikiText, contents.substring(colonIndex + 1, pipeIndex), null, contents.substring(pipeIndex + 1, endIndex), pipeIndex + 1 - index); } if ((anchorIndex >= 0) && (anchorIndex < endIndex)) { return new PageElementInterwikiLink( index, endIndex + 2, interwiki, interwikiText, contents.substring(colonIndex + 1, anchorIndex), contents.substring(anchorIndex + 1, endIndex), null, -1); } return new PageElementInterwikiLink( index, endIndex + 2, interwiki, interwikiText, contents.substring(colonIndex + 1, endIndex), null, null, -1); } public Interwiki getInterwiki() { return interwiki; } public String getInterwikiText() { return interwikiText; } public String getLink() { return link; } public String getAnchor() { return anchor; } public String getFullLink() { if (anchor == null) { return link; } return link + "#" + anchor; } public String getText() { return text; } public int getTextOffset() { return textOffset; } /** * @param beginIndex Begin index. * @param endIndex End index. * @param interwiki Interwiki. * @param interwikiText Interwiki text. * @param link Link. * @param anchor Anchor. * @param text Text. * @param textOffset Offset of the text. */ private PageElementInterwikiLink( int beginIndex, int endIndex, Interwiki interwiki, String interwikiText, String link, String anchor, String text, int textOffset) { super(beginIndex, endIndex); this.interwiki = interwiki; this.interwikiTextNotTrimmed = interwikiText; this.interwikiText = (interwikiText != null) ? interwikiText.trim() : null; this.linkNotTrimmed = link; this.link = (link != null) ? link.trim() : null; this.anchorNotTrimmed = anchor; this.anchor = (anchor != null) ? anchor.trim() : null; this.textNotTrimmed = text; this.text = (text != null) ? text.trim() : null; this.textOffset = textOffset; } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("[["); sb.append(interwikiTextNotTrimmed); sb.append(':'); sb.append(linkNotTrimmed); if (anchorNotTrimmed != null) { sb.append('#'); sb.append(anchorNotTrimmed); } if (textNotTrimmed != null) { sb.append('|'); sb.append(textNotTrimmed); } sb.append("]]"); return sb.toString(); } }