package semanticMarkup.ling.learn.knowledge;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import semanticMarkup.ling.learn.dataholder.DataHolder;
import semanticMarkup.ling.learn.utility.LearnerUtility;
public class FiniteSetsLoader implements IModule {
private LearnerUtility myLearnerUtility;
public FiniteSetsLoader(LearnerUtility learnerUtility) {
this.myLearnerUtility = learnerUtility;
}
@Override
public void run(DataHolder dataholderHandler) {
this.addStopWords(dataholderHandler);
this.addCharacters(dataholderHandler);
this.addNumbers(dataholderHandler);
this.addClusterStrings(dataholderHandler);
this.addProperNouns(dataholderHandler);
}
public void addStopWords(DataHolder dataholderHandler) {
PropertyConfigurator.configure("conf/log4j.properties");
Logger myLogger = Logger.getLogger("learn.addStopWords");
myLogger.trace("Add stop words");
List<String> stops = new ArrayList<String>();
stops.addAll(Arrays.asList(this.myLearnerUtility.getConstant().STOP.split("\\|")));
stops.addAll(Arrays.asList(new String[] { "NUM", "(", "[", "{", ")",
"]", "}", "d+" }));
myLogger.trace("Stop Words: " + stops);
for (int i = 0; i < stops.size(); i++) {
String word = stops.get(i);
if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
continue;
}
dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
myLogger.trace(String.format(
"(\"%s\", \"b\", \"*\", \"wordpos\", 0) added\n", word));
// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
// WordPOSValue("*", 0, 0, null, null));
// System.out.println("Add Stop Word: " + word+"\n");
}
myLogger.trace("Quite\n");
}
public void addCharacters(DataHolder dataholderHandler) {
PropertyConfigurator.configure("conf/log4j.properties");
Logger myLogger = Logger.getLogger("learn.addCharacters");
myLogger.trace("Add characters");
List<String> chars = new ArrayList<String>();
chars.addAll(Arrays.asList(this.myLearnerUtility.getConstant().CHARACTER.split("\\|")));
//
// System.out.println(chars);
// System.out.println(this.myLearnerUtility.getConstant().CHARACTER);
for (int i = 0; i < chars.size(); i++) {
String word = chars.get(i);
// String reg="\\b("+this.myLearnerUtility.getConstant().FORBIDDEN+")\\b";
// boolean f = word.matches(reg);
if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
continue;
}
dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
// WordPOSValue("", 0, 0, null, null));
// System.out.println("addCharacter word: " + word);
}
}
public void addNumbers(DataHolder dataholderHandler) {
PropertyConfigurator.configure("conf/log4j.properties");
Logger myLogger = Logger.getLogger("learn.addNumbers");
myLogger.trace("Add numbers");
List<String> nums = new ArrayList<String>();
nums.addAll(Arrays.asList(this.myLearnerUtility.getConstant().NUMBER.split("\\|")));
// System.out.println(nums);
// System.out.println(this.myLearnerUtility.getConstant().NUMBER);
for (int i = 0; i < nums.size(); i++) {
String word = nums.get(i);
// String reg="\\b("+this.myLearnerUtility.getConstant().FORBIDDEN+")\\b";
// boolean f = word.matches(reg);
if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
continue;
}
dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
// WordPOSValue("*", 0, 0, null, null));
// System.out.println("add Number: " + word);
}
dataholderHandler.updateDataHolder("NUM", "b", "*", "wordpos", 0);
// this.getWordPOSHolder().put(new WordPOSKey("NUM", "b"), new
// WordPOSValue("*",0, 0, null, null));
}
public void addClusterStrings(DataHolder dataholderHandler) {
PropertyConfigurator.configure("conf/log4j.properties");
Logger myLogger = Logger.getLogger("learn.addClusterstrings");
myLogger.trace("Add clusterstrings");
List<String> cltstrs = new ArrayList<String>();
cltstrs.addAll(Arrays.asList(this.myLearnerUtility.getConstant().CLUSTERSTRING.split("\\|")));
// System.out.println(cltstrs);
// System.out.println(this.myLearnerUtility.getConstant().CLUSTERSTRING);
for (int i = 0; i < cltstrs.size(); i++) {
String word = cltstrs.get(i);
if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
continue;
}
dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
// WordPOSValue("*", 1, 1, null, null));
// System.out.println("addClusterString: " + word);
}
}
public void addProperNouns(DataHolder dataholderHandler) {
PropertyConfigurator.configure("conf/log4j.properties");
Logger myLogger = Logger.getLogger("learn.addProperNouns");
myLogger.trace("Add proper nouns");
List<String> ppnouns = new ArrayList<String>();
ppnouns.addAll(Arrays.asList(Constant.PROPERNOUN.split("\\|")));
for (int i = 0; i < ppnouns.size(); i++) {
String word = ppnouns.get(i);
if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
continue;
}
dataholderHandler.updateDataHolder(word, "b", "*", "wordpos", 0);
// this.getWordPOSHolder().put(new WordPOSKey(word, "z"), new
// WordPOSValue("*", 0, 0, null, null));
// System.out.println("Add ProperNoun: " + word);
}
}
}