package semanticMarkup.ling.learn.knowledge;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import semanticMarkup.ling.learn.Configuration;
import semanticMarkup.ling.learn.dataholder.DataHolder;
import semanticMarkup.ling.learn.dataholder.SentenceStructure;
import semanticMarkup.ling.learn.utility.LearnerUtility;
import semanticMarkup.ling.learn.utility.StringUtility;
/**
* Correct markups that used an adj as a singular, e.g lateral, adult, juvenile
*
* @author Dongye
*/
public class AdjectiveVerifier implements IModule {
private LearnerUtility myLearnerUtility;
public AdjectiveVerifier(LearnerUtility learnerUtility) {
this.myLearnerUtility = learnerUtility;
}
@Override
public void run(DataHolder dataholderHandler) {
this.adjectivesVerification(dataholderHandler);
}
/**
* correct markups that used an adj as an s, e.g lateral, adult, juvenile
*/
public void adjectivesVerification(DataHolder dataholderHandler) {
PropertyConfigurator.configure("conf/log4j.properties");
Logger myLogger = Logger.getLogger("learn.adjectivesVerification");
String pattern = "^<N>([a-z]+)</N> ([^N,;.]+ <N>[a-z]+</N>)";
Iterator<SentenceStructure> iter = dataholderHandler
.getSentenceHolderIterator();
while (iter.hasNext()) {
SentenceStructure sentenceItem = iter.next();
String sentence = sentenceItem.getSentence();
if (sentence != null) {
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(sentence);
if (m.find()) {
String part1 = m.group(1);
String part2 = m.group(2);
myLogger.trace(String.format("Sentence %s\n"
+ "\tSentence: %s\n" + "\tPart1: %s\n"
+ "\tPart2: %s", sentenceItem.getID(),
sentenceItem.getSentence(), part1, part2));
boolean condition1 = this.isSentenceTag(dataholderHandler,
part2);
boolean condition2 = StringUtils.equals(this
.myLearnerUtility.getWordFormUtility()
.getNumber(part1), "p");
if (condition1 && condition2) {
String wrongWord = part1;
myLogger.trace("\tWrong: " + wrongWord);
// if (StringUtility.isMatchedNullSafe(wrongWord,
// "\\w")) {
if (StringUtility.isMatchedNullSafe(wrongWord, "\\w")) {
this.noun2Modifier(dataholderHandler, wrongWord);
Set<String> words = dataholderHandler
.getWordsFromUnknownWord(null, false,
String.format("^%s$", wrongWord),
true);
for (String word : words) {
this.noun2Modifier(dataholderHandler, word);
}
}
}
}
}
}
}
/**
* Check if a word is (part of) the tag of any sentence
*
* @param dataholderHandler
* DataHolder handler
* @param raw
* word to check
* @return true if it is, false otherwise
*/
public boolean isSentenceTag(DataHolder dataholderHandler, String raw) {
boolean result = false;
result = dataholderHandler.isExistSentence(false,
String.format("^%s.*$", raw));
return result;
}
/**
* change the POS tag of a word from noun to modifier
*
* @param dataholderHandler
* dataholder handler
*
* @param word
* the word to change
* @return true if any updates has been made, false otherwise
*/
public boolean noun2Modifier(DataHolder dataholderHandler, String word) {
boolean isUpdated = false;
ArrayList<String> deletedPOSs = new ArrayList<String>();
deletedPOSs.add("s");
deletedPOSs.add("p");
deletedPOSs.add("n");
for (String POS : deletedPOSs) {
dataholderHandler.deleteWordPOS(true, word, true, POS);
}
dataholderHandler.updateDataHolder(word, "m", "", "modifiers", 1);
String oldPattern = String.format("(^%s$|^.* %s$)", word, word);
dataholderHandler.updateSentenceTag(oldPattern, null);
return isUpdated;
}
}