package org.wikipedia.miner.web.service;
import java.util.ArrayList;
import java.util.Collections;
import java.util.SortedSet;
import java.util.TreeSet;
import javax.servlet.http.HttpServletRequest;
import org.simpleframework.xml.Attribute;
import org.simpleframework.xml.ElementList;
import org.wikipedia.miner.model.Label;
import org.wikipedia.miner.model.Wikipedia;
import org.dmilne.xjsf.UtilityMessages.ParameterMissingMessage;
import org.wikipedia.miner.util.text.TextProcessor;
import org.dmilne.xjsf.Service;
import org.dmilne.xjsf.param.IntParameter;
import org.dmilne.xjsf.param.StringParameter;
import com.google.gson.annotations.Expose;
public class CorrectService extends WMService {
/**
*
*/
private static final long serialVersionUID = 7243235547641000876L;
private StringParameter prmTerm ;
private IntParameter prmMax ;
public CorrectService() {
super("query","Provides alternatives for misspelt words",
"<p></p>", false);
prmTerm = new StringParameter("term", "The term or phrase to find spelling corrections for", null) ;
addGlobalParameter(prmTerm) ;
prmMax = new IntParameter("max", "The maximum number of suggestions to return", 10) ;
addGlobalParameter(prmMax) ;
}
public Service.Message buildWrappedResponse(HttpServletRequest request) {
String term = prmTerm.getValue(request) ;
if (term == null)
return new ParameterMissingMessage(request) ;
Wikipedia wikipedia = getWikipedia(request) ;
TextProcessor tp = wikipedia.getEnvironment().getConfiguration().getDefaultTextProcessor() ;
Message msg = new Message(request) ;
int max = prmMax.getValue(request) ;
int count = 0 ;
for (Suggestion s:getSuggestions(term, wikipedia, tp)) {
if (count++ > max) break ;
msg.addSuggestion(s) ;
}
return msg;
}
private TreeSet<Suggestion> getSuggestions(String term, Wikipedia wikipedia, TextProcessor tp) {
TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>() ;
for (String s1:getWordsWithin1Edit(term)) {
Label l1 = new Label(wikipedia.getEnvironment(), s1, tp) ;
if (l1.exists()) {
suggestions.add(new Suggestion(s1, 1, l1.getOccCount())) ;
}
for (String s2:getWordsWithin1Edit(s1)) {
Label l2 = new Label(wikipedia.getEnvironment(), s2, tp) ;
if (l2.exists()) {
suggestions.add(new Suggestion(s2, 2, l2.getOccCount())) ;
}
}
}
return suggestions ;
}
private ArrayList<String> getWordsWithin1Edit(String word) {
ArrayList<String> result = new ArrayList<String>();
for(int i=0; i < word.length(); ++i) result.add(word.substring(0, i) + word.substring(i+1));
for(int i=0; i < word.length()-1; ++i) result.add(word.substring(0, i) + word.substring(i+1, i+2) + word.substring(i, i+1) + word.substring(i+2));
for(int i=0; i < word.length(); ++i) for(char c='a'; c <= 'z'; ++c) result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i+1));
for(int i=0; i <= word.length(); ++i) for(char c='a'; c <= 'z'; ++c) result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i));
return result;
}
public static class Message extends Service.Message {
@Expose
@ElementList(inline=true, entry="suggestion")
private TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>() ;
private Message(HttpServletRequest request) {
super(request) ;
}
private void addSuggestion(Suggestion s) {
suggestions.add(s) ;
}
public SortedSet<Suggestion> getSuggestions() {
return Collections.unmodifiableSortedSet(suggestions) ;
}
}
public static class Suggestion implements Comparable<Suggestion> {
@Expose
@Attribute
private String text ;
@Expose
@Attribute
private Integer editDistance ;
@Expose
@Attribute
private Long occCount ;
private Suggestion(String text, int editDistance, long occCount) {
this.text = text ;
this.editDistance = editDistance ;
this.occCount = occCount ;
}
public int compareTo(Suggestion s) {
int c = editDistance.compareTo(s.editDistance) ;
if (c != 0)
return c ;
c = s.occCount.compareTo(occCount) ;
if (c != 0)
return c ;
return text.compareTo(s.text) ;
}
public String getText() {
return text;
}
public Integer getEditDistance() {
return editDistance;
}
public Long getOccCount() {
return occCount;
}
}
}