/**
*
*/
package rainbownlp.machinelearning.featurecalculator.link;
import java.util.ArrayList;
import java.util.List;
import rainbownlp.core.Artifact;
import rainbownlp.core.FeatureValuePair;
import rainbownlp.core.Phrase;
import rainbownlp.core.PhraseLink;
import rainbownlp.core.FeatureValuePair.FeatureName;
import rainbownlp.machinelearning.IFeatureCalculator;
import rainbownlp.machinelearning.MLExample;
import rainbownlp.machinelearning.MLExampleFeature;
/**
* @author ehsan
*
*/
public class ConceptsBetweenWords implements IFeatureCalculator {
public static void main(String[] args) throws Exception
{
// List<MLExample> trainExamples =
// MLExample.getAllExamples(LinkExampleBuilder.ExperimentGroupTimexEvent, true);
// List<MLExample> trainExamples2 =
// MLExample.getAllExamples(LinkExampleBuilder.ExperimentGroupEventEvent, true);
// List<MLExample> all_train_examples = new ArrayList<MLExample>();
// all_train_examples.addAll(trainExamples);
// all_train_examples.addAll(trainExamples2);
//
// for ( MLExample example_to_process: all_train_examples )
// {
// ConceptsBetweenWords n_grams = new ConceptsBetweenWords();
//
// n_grams.calculateFeatures(example_to_process);
// }
}
@Override
public void calculateFeatures(MLExample exampleToProcess) {
PhraseLink phraseLink = exampleToProcess.getRelatedPhraseLink();
Phrase phrase1 = phraseLink.getFirstPhrase();
Phrase phrase2 = phraseLink.getSecondPhrase();
Artifact curArtifact = phrase1.getEndArtifact().getNextArtifact();
Artifact toArtifact = phrase2.getStartArtifact();
Integer count_words_between=0;
while(curArtifact!=null &&
!curArtifact.equals(toArtifact))
{
String curContent = curArtifact.getContent();
FeatureValuePair wordBetweenFeature = FeatureValuePair.getInstance(
FeatureName.LinkWordBetween,
curContent, "1");
MLExampleFeature.setFeatureExample(exampleToProcess, wordBetweenFeature);
curArtifact = curArtifact.getNextArtifact();
count_words_between++;
// if(curArtifact!=null)
// {
// FeatureValuePair ngramBetweenFeature = FeatureValuePair.getInstance(
// FeatureName.Link2GramBetween,
// curContent+"_"+curArtifact.getContent(), "1");
//
// MLExampleFeature.setFeatureExample(exampleToProcess, ngramBetweenFeature);
// }
}
//get number of werds between
FeatureValuePair count_word_between = FeatureValuePair.getInstance(
FeatureName.LinkBetweenWordCount,
count_words_between.toString());
MLExampleFeature.setFeatureExample(exampleToProcess, count_word_between);
// String phrase1_content = phrase1.getPhraseContent().replace(" ", "_");
// if(phrase1.getEndArtifact().getNextArtifact()!=null)
// {
// FeatureValuePair ngramFeature = FeatureValuePair.getInstance(
// FeatureName.Link2GramFrom,
// phrase1_content+"_"+phrase1.getEndArtifact().getNextArtifact().getContent(), "1");
//
// MLExampleFeature.setFeatureExample(exampleToProcess, ngramFeature);
// }
// if(phrase1.getStartArtifact().getPreviousArtifact()!=null)
// {
// FeatureValuePair ngramFeature = FeatureValuePair.getInstance(
// FeatureName.Link2GramFrom,
// phrase1.getStartArtifact().getPreviousArtifact().getContent()
// +"_"+phrase1_content, "1");
//
// MLExampleFeature.setFeatureExample(exampleToProcess, ngramFeature);
// }
// String phrase2_content = phrase2.getPhraseContent().replace(" ", "_");
// if(phrase2.getEndArtifact().getNextArtifact()!=null)
// {
// FeatureValuePair ngramFeature = FeatureValuePair.getInstance(
// FeatureName.Link2GramTo,
// phrase2_content+"_"+phrase2.getEndArtifact().getNextArtifact().getContent(), "1");
//
// MLExampleFeature.setFeatureExample(exampleToProcess, ngramFeature);
// }
//
// if(phrase2.getStartArtifact().getPreviousArtifact()!=null)
// {
// FeatureValuePair ngramFeature = FeatureValuePair.getInstance(
// FeatureName.Link2GramTo,
// phrase2.getStartArtifact().getPreviousArtifact().getContent()
// +"_"+phrase2_content, "1");
//
// MLExampleFeature.setFeatureExample(exampleToProcess, ngramFeature);
// }
//number of phrases between
List<Phrase> phrases = Phrase.getPhrasesBetweenPhrases(phrase1, phrase2, exampleToProcess.getAssociatedFilePath());
FeatureValuePair count_phrase_between = FeatureValuePair.getInstance(
FeatureName.LinkBetweenPhraseCount,
String.valueOf(phrases.size()));
MLExampleFeature.setFeatureExample(exampleToProcess, count_phrase_between);
}
}