package org.wikipedia.page.linkpreview; import android.support.annotation.NonNull; import android.support.annotation.Nullable; import android.text.TextUtils; import org.wikipedia.dataclient.WikiSite; import org.wikipedia.dataclient.page.PageSummary; import org.wikipedia.page.Page; import org.wikipedia.page.PageTitle; import java.text.BreakIterator; import java.util.ArrayList; import java.util.List; import java.util.Locale; public class LinkPreviewContents { private static final int EXTRACT_MAX_SENTENCES = 2; private final PageTitle title; public PageTitle getTitle() { return title; } private final String extract; public String getExtract() { return extract; } public LinkPreviewContents(@NonNull PageSummary pageSummary, @NonNull WikiSite wiki) { title = new PageTitle(pageSummary.getTitle(), wiki); extract = makeStringFromSentences(getSentences(removeParens(pageSummary.getExtract()), title.getWikiSite()), EXTRACT_MAX_SENTENCES); title.setThumbUrl(pageSummary.getThumbnailUrl()); } public LinkPreviewContents(@NonNull Page page) { title = page.getTitle(); PageExtract pageExtract = new PageExtract(page); // Follow the same logic as if the computed string was retrieved from the API extract = makeStringFromSentences(getSentences(removeParens(pageExtract.getText()), title.getWikiSite()), EXTRACT_MAX_SENTENCES); } /** * Remove text contained in parentheses from a string. * @param text String to be processed. * @return New string that is the same as the original string, but without any * content in parentheses. */ public static String removeParens(@Nullable String text) { if (text == null) { return ""; } StringBuilder outStr = new StringBuilder(text.length()); char c; int level = 0; int i = 0; for (; i < text.length(); i++) { c = text.charAt(i); if (c == ')' && level == 0) { // abort if we have an imbalance of parentheses return text; } if (c == '(') { level++; continue; } else if (c == ')') { level--; continue; } if (level == 0) { // Remove leading spaces before parentheses if (c == ' ' && (i < text.length() - 1) && text.charAt(i + 1) == '(') { continue; } outStr.append(c); } } // fill in the rest of the string if (i + 1 < text.length()) { outStr.append(text.substring(i + 1, text.length())); } // if we had an imbalance of parentheses, then return the original string, // instead of the transformed one. return (level == 0) ? outStr.toString() : text; } /** * Split a block of text into sentences, taking into account the language in which * the text is assumed to be. * @param text Text to be transformed into sentences. * @param wiki WikiSite that will provide the language of the given text. * @return List of sentences. */ public static List<String> getSentences(String text, WikiSite wiki) { List<String> sentenceList = new ArrayList<>(); BreakIterator iterator = BreakIterator.getSentenceInstance(new Locale(wiki.languageCode())); // feed the text into the iterator, with line breaks removed: text = text.replaceAll("(\r|\n)", " "); iterator.setText(text); for (int start = iterator.first(), end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { String sentence = text.substring(start, end).trim(); if (TextUtils.isGraphic(sentence)) { // if it's the first sentence, then remove parentheses from it. String formattedSentence = sentenceList.isEmpty() ? removeParens(sentence) : sentence; sentenceList.add(formattedSentence); } } // if we couldn't detect any sentences using the BreakIterator, then just return the // original text as a single sentence. if (sentenceList.isEmpty()) { sentenceList.add(text); } return sentenceList; } private String makeStringFromSentences(List<String> sentences, int maxSentences) { return TextUtils.join(" ", sentences.subList(0, Math.min(maxSentences, sentences.size()))); } }