package er.extensions.components.javascript;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import er.extensions.foundation.ERXStringUtilities;
/**
* ERXGoogleSpell provides a simple API to spell checking with Google's web service.
*
* This code is based on the work from the blog post
* http://immike.net/blog/2007/04/07/hacking-google-spell-checker-for-fun-and-profit/.
*
* @author mschrag
*/
public class ERXGoogleSpell {
public static void main(String[] args) throws CorrectionException {
String str = "gogle spel";
System.out.println("ERXGoogleSpell.correct: " + ERXGoogleSpell.correct(str));
}
/**
* Corrects the spelling of the given text (language = "en", escaping XML).
*
* @param text
* the misspelled text
* @return the corrected text
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static String correct(String text) throws CorrectionException {
return ERXGoogleSpell.correct(text, "en");
}
/**
* Corrects the spelling of the given text (escaping XML).
*
* @param text
* the misspelled text
* @param lang
* the language of the text
* @return the corrected text
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static String correct(String text, String lang) throws CorrectionException {
return ERXGoogleSpell.correct(text, lang, lang);
}
/**
* Corrects the spelling of the given text (escaping XML).
*
* @param text
* the misspelled text
* @param lang
* the language of the text
* @param hl
* the human interface language
* @return the corrected text
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static String correct(String text, String lang, String hl) throws CorrectionException {
return ERXGoogleSpell.correct(text, lang, hl, true);
}
/**
* Corrects the spelling of the given text.
*
* @param text
* the misspelled text
* @param lang
* the language of the text
* @param hl
* the human interface language
* @param escapeXml if true, xml characters in the text will be escaped
* @return the corrected text
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static String correct(String text, String lang, String hl, boolean escapeXml) throws CorrectionException {
Correction[] corrections = ERXGoogleSpell.suggestions(text, lang, hl, escapeXml);
int lastOffset = 0;
StringBuilder buffer = new StringBuilder();
for (int correctionNum = 0; correctionNum < corrections.length; correctionNum++) {
Correction correction = corrections[correctionNum];
String[] suggestions = correction.suggestions();
if (suggestions.length > 0) {
String suggestion = suggestions[0];
int offset = correction.offset();
buffer.append(text.substring(lastOffset, offset));
buffer.append(suggestion);
lastOffset = offset + correction.length();
}
}
buffer.append(text.substring(lastOffset));
return buffer.toString();
}
/**
* Returns possible spelling corrections of the given text (language = "en", escaping XML).
*
* @param text
* the misspelled text
* @return the list of suggested corrections
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static Correction[] suggestions(String text) throws CorrectionException {
return ERXGoogleSpell.suggestions(text, "en");
}
/**
* Returns possible spelling corrections of the given text (escaping XML).
*
* @param text
* the misspelled text
* @param lang
* the language of the text
* @return the list of suggested corrections
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static Correction[] suggestions(String text, String lang) throws CorrectionException {
return ERXGoogleSpell.suggestions(text, lang, lang);
}
/**
* Returns possible spelling corrections of the given text (escaping XML).
*
* @param text
* the misspelled text
* @param lang
* the language of the text
* @param hl
* the human interface language
* @return the list of suggested corrections
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static Correction[] suggestions(String text, String lang, String hl) throws CorrectionException {
return ERXGoogleSpell.suggestions(text, lang, hl, true);
}
/**
* Returns possible spelling corrections of the given text.
*
* @param text
* the misspelled text
* @param lang
* the language of the text
* @param hl
* the human interface language
* @param escapeXml if true, xml characters in the text will be escaped
* @return the list of suggested corrections
* @throws CorrectionException
* if there is a problem correcting the text
*/
public static Correction[] suggestions(String text, String lang, String hl, boolean escapeXml) throws CorrectionException {
try {
StringBuilder request = new StringBuilder();
request.append("<spellrequest textalreadyclipped=\"0\" ignoredups=\"1\" ignoredigits=\"1\" ignoreallcaps=\"0\"><text>");
if (escapeXml) {
request.append(ERXStringUtilities.escapeNonXMLChars(text));
}
else {
request.append(text);
}
request.append("</text></spellrequest>");
URL url = new URL("https://www.google.com/tbproxy/spell?lang=" + lang + "&hl=" + hl);
URLConnection connection = url.openConnection();
connection.setDoOutput(true);
try (OutputStreamWriter out = new OutputStreamWriter(connection.getOutputStream())) {
out.write(request.toString());
}
Correction[] corrections;
try (InputStream in = connection.getInputStream()) {
Document responseDocument = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(in);
responseDocument.normalize();
NodeList correctionNodes = responseDocument.getElementsByTagName("c");
int correctionCount = correctionNodes.getLength();
corrections = new Correction[correctionCount];
for (int correctionNum = 0; correctionNum < correctionCount; correctionNum++) {
Node correctionNode = correctionNodes.item(correctionNum);
if (correctionNode instanceof Element) {
Element correctionElement = (Element) correctionNode;
String correctionsStr = "";
Node correctionsNodeItem = correctionElement.getChildNodes().item(0);
if (correctionsNodeItem != null) {
correctionsStr = correctionsNodeItem.getNodeValue();
}
int offset = Integer.parseInt(correctionElement.getAttribute("o"));
int length = Integer.parseInt(correctionElement.getAttribute("l"));
int confidence = Integer.parseInt(correctionElement.getAttribute("s"));
String[] correctionStrs = correctionsStr.split("\t");
corrections[correctionNum] = new Correction(offset, length, confidence, correctionStrs);
}
}
}
return corrections;
}
catch (Exception e) {
throw new CorrectionException("Failed to correct spelling of '" + text + "'.", e);
}
}
/**
* Correction encapsulates a suggested spelling correction for a word in a string of text.
*
* @author mschrag
*/
public static class Correction {
private int _offset;
private int _length;
private int _confidence;
private String[] _suggestions;
/**
* Creates a new correction.
*
* @param offset
* the offset of the misspelled in the original text
* @param length
* the length of the misspelled word in the original text
* @param confidence
* the confidence of correction (0 or 1)
* @param suggestions
* the list of suggested corrections
*/
public Correction(int offset, int length, int confidence, String[] suggestions) {
_offset = offset;
_length = length;
_confidence = confidence;
_suggestions = suggestions;
}
/**
* Returns the offset of the misspelled word in the original text.
*/
public int offset() {
return _offset;
}
/**
* Returns the length of the misspelled word in the original text.
*/
public int length() {
return _length;
}
/**
* Returns the confidence of the correction (0 or 1).
*/
public int confidence() {
return _confidence;
}
/**
* Returns an ordered list of suggested spelling corrections.
*/
public String[] suggestions() {
return _suggestions;
}
}
/**
* CorrectionException is thrown if anything fails during the correction process.
*
* @author mschrag
*/
public static class CorrectionException extends Exception {
/**
* Do I need to update serialVersionUID?
* See section 5.6 <cite>Type Changes Affecting Serialization</cite> on page 51 of the
* <a href="http://java.sun.com/j2se/1.4/pdf/serial-spec.pdf">Java Object Serialization Spec</a>
*/
private static final long serialVersionUID = 1L;
/**
* Creates a new CorrectionException.
*
* @param message
* the exception message
* @param cause
* the root cause
*/
public CorrectionException(String message, Throwable cause) {
super(message, cause);
}
/**
* Creates a new CorrectionException.
*
* @param message
* the exception message
*/
public CorrectionException(String message) {
super(message);
}
}
}