package rainbownlp.machinelearning.featurecalculator.sentence; import rainbownlp.core.Artifact; import rainbownlp.core.Artifact.Type; import rainbownlp.core.FeatureValuePair; import rainbownlp.core.PhraseLink; import rainbownlp.machinelearning.IFeatureCalculator; import rainbownlp.machinelearning.MLExample; import rainbownlp.machinelearning.MLExampleFeature; import rainbownlp.util.StringUtil; public class SentenceNGram implements IFeatureCalculator { @Override public void calculateFeatures(MLExample exampleToProcess) { if(exampleToProcess.getRelatedPhrase() == null) { PhraseLink sentencesLink = exampleToProcess.getRelatedPhraseLink(); Artifact firstSentence = sentencesLink.getFirstPhrase().getStartArtifact(); Artifact secondSentence = sentencesLink.getSecondPhrase().getStartArtifact(); calculateSentenceNGram(1, firstSentence, exampleToProcess, "FirstSentence1Gram"); calculateSentenceNGram(1, secondSentence, exampleToProcess, "SecondSentence1Gram"); calculateSentenceNGram(2, firstSentence, exampleToProcess, "FirstSentence2Gram"); calculateSentenceNGram(2, secondSentence, exampleToProcess, "SecondSentence2Gram"); }else { Artifact sentence = exampleToProcess.getRelatedPhrase().getStartArtifact(); if(sentence.getArtifactType() == Type.Sentence) { calculateSentenceNGram(1, sentence, exampleToProcess, "Sentence1Gram"); calculateSentenceNGram(2, sentence, exampleToProcess, "Sentence2Gram"); } } } void calculateSentenceNGram(int n, Artifact sentence, MLExample example, String featureName) { String[] word_text = StringUtil.getTermByTermWordnet(sentence.getContent().toLowerCase()).split(" "); for(int i=0;i<word_text.length-n;i++) { String cur_content = ""; for(int j=0;j<n;j++) { int new_part_index = i+j; if(!word_text[new_part_index].trim().equals("")) { cur_content = cur_content.concat("_"+word_text[new_part_index].trim()); } } cur_content = cur_content.replaceAll("^_", ""); FeatureValuePair value_pair = FeatureValuePair.getInstance( featureName, cur_content, "1"); MLExampleFeature.setFeatureExample(example,value_pair); } } }