package org.wikipedia.miner.comparison; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.wikipedia.miner.model.Article; import org.wikipedia.miner.util.MarkupStripper; public class ConnectionSnippet implements Comparable<ConnectionSnippet> { private String _markup ; private String _plainText ; private Article _source ; private Article _topic1 ; private Article _topic2 ; private int _sentenceIndex ; private boolean _followsHeading = false ; private boolean _isListItem = false ; private Double _weight ; private static Pattern _headingPattern = Pattern.compile("\\s*={2,}(.*?)={2,}(.*)") ; private static Pattern _listPattern = Pattern.compile("\\s*[*#]+(.*)") ; private static MarkupStripper _stripper = new MarkupStripper() ; public ConnectionSnippet(int sentenceIndex, Article source, Article topic1, Article topic2) { _sentenceIndex = sentenceIndex ; _source = source ; _topic1 = topic1 ; _topic2 = topic2 ; _markup = _source.getSentenceMarkup(_sentenceIndex) ; Matcher m = _headingPattern.matcher(_markup) ; if (m.matches()) { _followsHeading = true ; _markup = m.group(2).trim() ; } m = _listPattern.matcher(_markup) ; if (m.matches()) { _isListItem = true ; _markup = m.group(1).trim() ; } _plainText = _stripper.stripToPlainText(_markup, null) ; } public String getMarkup() { return _markup ; } public String getPlainText() { return _plainText; } public Article getSource() { return _source; } public Article getTopic1() { return _topic1; } public Article getTopic2() { return _topic2; } public int getSentenceIndex() { return _sentenceIndex; } public boolean followsHeading() { return _followsHeading; } public boolean isListItem() { return _isListItem; } public Double getWeight() { return _weight; } public void setWeight(double weight) { _weight = weight ; } public int compareTo(ConnectionSnippet s) { int cmp = 0 ; if (s._weight != null && _weight != null && s._weight != _weight) cmp = s._weight.compareTo(_weight) ; if (cmp == 0) cmp = _source.compareTo(s._source) ; if (cmp == 0) cmp = new Integer(s._sentenceIndex).compareTo(_sentenceIndex) ; return cmp ; } }