/** AlphaNumericPreprocessor.java
*
* @author Sunita Sarawagi
* @version 1.3
*/
package iitb.Segment;
/**
*
* @author Sunita Sarawagi
*
*/
public class AlphaNumericPreprocessor extends Preprocessor {
public static String DIGIT = new String("DIGIT");
public int getCode() {
return 1;
}
public static String preprocess(String s) {
if (isNumber(s)) {
return DIGIT;
}
return s;
}
//TODO: Unnecessary constructor? Empty and everything else is static (except getCode()).
public AlphaNumericPreprocessor() {}
/**
* Checks, if the String encodes a natural number (int).
* @param s The String to check.
* @return true, if the String encodes a natural number, false otherwise.
*/
public static boolean isNumber(String s) {
try {
Integer.valueOf(s);
} catch (NumberFormatException e) {
return false;
}
return true;
}
public static TrainData preprocess(TrainData tokens, int numLabels) {
for (tokens.startScan(); tokens.hasMoreRecords();) {
TrainRecord tr = tokens.nextRecord();
for (int s = 0; s < tr.numSegments(); s++) {
String[] words = tr.tokens(s);
for (int j = 0; j < words.length; j++) {
words[j] = preprocess(words[j]);
}
}
}
return tokens;
}
};