package com.formulasearchengine.mathosphere.mlp.ml;
import com.beust.jcommander.internal.Lists;
import com.formulasearchengine.mathosphere.mlp.pojos.Relation;
import com.formulasearchengine.mathosphere.mlp.pojos.Sentence;
import com.formulasearchengine.mathosphere.mlp.pojos.WikiDocumentOutput;
import com.formulasearchengine.mathosphere.mlp.pojos.Word;
import com.formulasearchengine.mathosphere.mlp.text.MachineLearningPatternMatcher;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.nndep.DependencyParser;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.trees.GrammaticalStructure;
import weka.core.*;
import java.util.*;
import java.util.stream.Collectors;
/**
* Created by Leo on 21.12.2016.
* Source [1]: Extracting Textual Descriptions of Mathematical Expressions in Scientific Papers;Giovanni Yoko Kristianto, Goran Topić, Akiko Aizawa
*/
public class WekaUtils {
public static final String MATCH = "match";
public static final String NO_MATCH = "no match";
public static final String DEFINIEN = "definiens";
public static final String IDENTIFIER = "identifier";
public static final String Q_ID = "qId";
public static final String TITLE = "title";
/**
* Feature 11 of [1]
* Relative distance identifier - definiens in words. Normalized by {@link WikiDocumentOutput#maxSentenceLength}
*/
public static final String WORD_DISTANCE = "wordDistance";
/**
* Feature 12 of [1]
* weather or not the definiens is before or after the identifier
*/
public static final String WORD_POSITIONING = "wordPositioning";
public static final String PATTERN_1 = "pattern 1 identifier, definition";
public static final String PATTERN_2 = "pattern 2 definition, identifier";
public static final String PATTERN_3 = "pattern 3 identifier, isOrAre, definition";
public static final String PATTERN_4 = "pattern 4 identifier, isOrAre, the, definition";
public static final String PATTERN_5 = "pattern 5 let, identifier, be, denoted, by, definition";
public static final String PATTERN_6 = "pattern 6 let, identifier, be, denoted, by, the_one_or_more, definition";
public static final String PATTERN_7 = "pattern 7 definition, isOrAre, denoted, by, identifier";
public static final String PATTERN_8 = "pattern 8 definition, isOrAre, denoted, by, the_one_or_more, identifier";
public static final String PATTERN_9 = "pattern 9 identifier, denotes, definition";
public static final String PATTERN_10 = "pattern 10 identifier, denotes, the_one_or_more, definition";
/**
* colon between
*/
public static final String COLON_BETWEEN = "colon between";
/**
* comma between
*/
public static final String COMMMA_BETWEEN = "commma between";
/**
* othermath or identifier between
*/
public static final String OTHER_MATH_BETWEEN = "other math between";
/**
* patentheses
*/
public static final String IDENTIFIER_IN_PARENTHESES = "identifier in parentheses in sentence";
public static final String DEFINIENS_IN_PARENTHESES = "definiens in parentheses in sentence";
//Feature 13 of [1]
public static final String SURFACE_TEXT_AND_POS_TAG_OF_TWO_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_DESC_CANDIDATE = "Surface text and POS tag of two preceding and following tokens around the desc candidate";
//Feature 15 of [1]
public static final String SURFACE_TEXT_AND_POS_TAG_OF_THREE_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_PAIRED_MATH_EXPR = "Surface text and POS tag of three preceding and following tokens around the paired math expr";
//Feature 17 of [1]
public static final String SURFACE_TEXT_OF_THE_FIRST_VERB_THAT_APPEARS_BETWEEN_THE_DESC_CANDIDATE_AND_THE_TARGET_MATH_EXPR = "Surface text of the first verb that appears between the desc candidate and the target math expr";
//Feature 18 of [1]
public static final String GRAPH_DISTANCE = "graphDistance";
//Feature 19 of [1]
public static final String SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN = "dependency with length 3 from definien";
//Feature 21 in [1]
public static final String SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER = "dependency with length 3 from identifier";
//Feature 20 of [1]
public static final String INCOMING_TO_DEFINIEN = "direction of " + SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN;
//Feature 22 of [1]
public static final String INCOMING_TO_IDENTIFIER = "direction of " + SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER;
public static final String DISTANCE_FROM_FIRST_OCCURRENCE = "distance_from_first_occurence";
public static final String RELATIVE_TERM_FREQUENCY = "relative_term_frequency";
//String constants
public static final String CLASSIFICATION = "classification";
private static final String DEFINIENS_TEXT = "definiens_candidate";
private static final String IDENTIFIER_TEXT = "identifier_candidate";
/**
* Assumed length of the longest possible word (30) times three to accommodate for noun phrases. For normalisation.
*/
public static final double LONGEST_NNP_IN_ENGLISH = 100d;
/**
* According to https://en.wikipedia.org/wiki/Longest_English_sentence
*/
public static final double LONGEST_SENTENCE_IN_ENGISH = 300d;
public static final List<String> nominal = Lists.newArrayList(MATCH, NO_MATCH);
//TODO: move hack to CLI
private static final boolean NO_STRING = false;
private static final boolean NO_DEP = false;
private static final boolean NO_STAT = false;
private static final boolean NO_PM = false;
private static final boolean NO_BASIC = false;
public Instances createInstances(String title) {
ArrayList<Attribute> atts = new ArrayList<>();
//meta information
atts.add(new Attribute(TITLE, (FastVector) null));
atts.add(new Attribute(Q_ID, (FastVector) null));
atts.add(new Attribute(IDENTIFIER, (FastVector) null));
atts.add(new Attribute(DEFINIEN, (FastVector) null));
atts.add(new Attribute(PATTERN_1));
atts.add(new Attribute(PATTERN_2));
atts.add(new Attribute(PATTERN_3));
atts.add(new Attribute(PATTERN_4));
atts.add(new Attribute(PATTERN_5));
atts.add(new Attribute(PATTERN_6));
atts.add(new Attribute(PATTERN_7));
atts.add(new Attribute(PATTERN_8));
atts.add(new Attribute(PATTERN_9));
atts.add(new Attribute(PATTERN_10));
atts.add(new Attribute(COLON_BETWEEN));
atts.add(new Attribute(COMMMA_BETWEEN));
atts.add(new Attribute(OTHER_MATH_BETWEEN));
atts.add(new Attribute(DEFINIENS_IN_PARENTHESES));
atts.add(new Attribute(IDENTIFIER_IN_PARENTHESES));
atts.add(new Attribute(WORD_DISTANCE));
atts.add(new Attribute(WORD_POSITIONING));
atts.add(new Attribute(SURFACE_TEXT_AND_POS_TAG_OF_TWO_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_DESC_CANDIDATE, (FastVector) null));
atts.add(new Attribute(SURFACE_TEXT_AND_POS_TAG_OF_THREE_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_PAIRED_MATH_EXPR, (FastVector) null));
atts.add(new Attribute(SURFACE_TEXT_OF_THE_FIRST_VERB_THAT_APPEARS_BETWEEN_THE_DESC_CANDIDATE_AND_THE_TARGET_MATH_EXPR, (FastVector) null));
atts.add(new Attribute(GRAPH_DISTANCE));
atts.add(new Attribute(SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER, (FastVector) null));
atts.add(new Attribute(INCOMING_TO_IDENTIFIER));
atts.add(new Attribute(SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN, (FastVector) null));
atts.add(new Attribute(INCOMING_TO_DEFINIEN));
atts.add(new Attribute(DISTANCE_FROM_FIRST_OCCURRENCE));
atts.add(new Attribute(RELATIVE_TERM_FREQUENCY));
atts.add(new Attribute(CLASSIFICATION, nominal));
Instances result = new Instances(title, atts, 0);
result.setClassIndex(result.numAttributes() - 1);
return result;
}
/**
* Extract the features from the relations and add them to the instances.
*
* @param parser for dependency graph features.
* @param relations the relations to process
* @param title title of the document
* @param qId qid of the document
* @param instances where to add the relations see {@link #createInstances(String)} for the definition.
* @param maxSentenceLength length of the longest sentence in the document, for normalisation.
* @return Instances where all provided relations have been added
*/
public Instances addRelationsToInstances(DependencyParser parser, List<Relation> relations, String title, String qId, Instances instances, double maxSentenceLength) {
for (Relation relation : relations) {
addRelationToInstances(parser, getPrecomputedGraphStore(), title, qId, instances, maxSentenceLength, relation);
}
return instances;
}
public void addRelationToInstances(DependencyParser parser, Map<Sentence, GrammaticalStructure> precomputedGraphs, String title, String qId, Instances instances, double maxSentenceLength, Relation relation) {
double[] patternMatches = new MachineLearningPatternMatcher().match(relation.getSentence(), relation.getIdentifier(), relation.getDefinition(), relation.getIdentifierPosition(), relation.getWordPosition());
double[] values = new double[instances.numAttributes()];
addStringValue(values, instances, TITLE, title);
addStringValue(values, instances, Q_ID, qId);
addStringValue(values, instances, IDENTIFIER, relation.getIdentifier());
addStringValue(values, instances, DEFINIEN, relation.getDefinition());
//distance between identifier and definiens candidate
int wordDistance = relation.getIdentifierPosition() - relation.getWordPosition();
values[instances.attribute(WORD_DISTANCE).index()] = (double) Math.abs(wordDistance) / maxSentenceLength;
//weather or not the definiens is before or after the identifier
if (!NO_PM) {
for (int i = 0; i < patternMatches.length - 5; i++) {
values[instances.attribute(PATTERN_1).index() + i] = patternMatches[i];
}
}
if (!NO_BASIC) {
values[instances.attribute(WORD_POSITIONING).index()] = wordDistance > 0 ? 1 : 0;
values[instances.attribute(COLON_BETWEEN).index()] = patternMatches[10];
values[instances.attribute(COMMMA_BETWEEN).index()] = patternMatches[11];
values[instances.attribute(OTHER_MATH_BETWEEN).index()] = patternMatches[12];
values[instances.attribute(DEFINIENS_IN_PARENTHESES).index()] = patternMatches[13];
values[instances.attribute(IDENTIFIER_IN_PARENTHESES).index()] = patternMatches[14];
}
addStringFeatures(values, instances, relation);
addDependencyTreeFeatures(parser, precomputedGraphs, values, instances, relation, maxSentenceLength);
values[instances.attribute(DISTANCE_FROM_FIRST_OCCURRENCE).index()] = relation.getDistanceFromFirstIdentifierOccurence();
values[instances.attribute(RELATIVE_TERM_FREQUENCY).index()] = relation.getRelativeTermFrequency();
if (NO_STAT) {
values[instances.attribute(DISTANCE_FROM_FIRST_OCCURRENCE).index()] = 0;
values[instances.attribute(RELATIVE_TERM_FREQUENCY).index()] = 0;
values[instances.attribute(WORD_DISTANCE).index()] = 0;
}
values[values.length - 1] = relation.getRelevance() > 1 ? nominal.indexOf(MATCH) : nominal.indexOf(NO_MATCH);
DenseInstance instance = new DenseInstance(1.0, values);
instances.add(instance);
}
/**
* Adds {@link #SURFACE_TEXT_AND_POS_TAG_OF_TWO_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_DESC_CANDIDATE},
* {@link #SURFACE_TEXT_AND_POS_TAG_OF_THREE_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_PAIRED_MATH_EXPR} and
* {@link #SURFACE_TEXT_OF_THE_FIRST_VERB_THAT_APPEARS_BETWEEN_THE_DESC_CANDIDATE_AND_THE_TARGET_MATH_EXPR} to values.
*
* @param values values object.
* @param instances instances where the values will be added.
* @param relation the relation from whitch to extract the features.
*/
private void addStringFeatures(double[] values, Instances instances, Relation relation) {
if (NO_STRING) {
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_TWO_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_DESC_CANDIDATE, "");
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_THREE_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_PAIRED_MATH_EXPR, "");
addStringValue(values, instances, SURFACE_TEXT_OF_THE_FIRST_VERB_THAT_APPEARS_BETWEEN_THE_DESC_CANDIDATE_AND_THE_TARGET_MATH_EXPR, "");
} else {
int wordDistance = relation.getIdentifierPosition() - relation.getWordPosition();
//Surface text and POS tag of two preceding and following tokens around the desc candidate
List<Word> pre = Lists.newArrayList(relation.getSentence().getWords().subList(Math.max(0, relation.getWordPosition() - 2), relation.getWordPosition()));
List<Word> post = Lists.newArrayList(relation.getSentence().getWords().subList(relation.getWordPosition() + 1, Math.min(relation.getWordPosition() + 3, relation.getSentence().getWords().size())));
//replace the occurrences of the identifier
replaceWord(wordDistance, pre, post, IDENTIFIER_TEXT);
String twoBeforeAndAfter = wordListToSimpleString(pre) + " " + wordListToSimpleString(post);
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_TWO_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_DESC_CANDIDATE, twoBeforeAndAfter);
//Surface text and POS tag of three preceding and following tokens around the paired math expr
pre = Lists.newArrayList(relation.getSentence().getWords().subList(Math.max(0, relation.getIdentifierPosition() - 3), relation.getIdentifierPosition()));
post = Lists.newArrayList(relation.getSentence().getWords().subList(relation.getIdentifierPosition() + 1, Math.min(relation.getIdentifierPosition() + 4, relation.getSentence().getWords().size())));
//replace the occurrences of the definiens
replaceWord(-wordDistance, pre, post, DEFINIENS_TEXT);
String threePrecedingAndFollowing = wordListToSimpleString(pre) + " " + wordListToSimpleString(post);
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_THREE_PRECEDING_AND_FOLLOWING_TOKENS_AROUND_THE_PAIRED_MATH_EXPR, threePrecedingAndFollowing);
List<Word> wordsInbetween;
if (relation.getIdentifierPosition() > relation.getWordPosition()) {
wordsInbetween = relation.getSentence().getWords().
subList(relation.getWordPosition() + 1, relation.getIdentifierPosition());
} else {
wordsInbetween = relation.getSentence().getWords().
subList(relation.getIdentifierPosition() + 1, relation.getWordPosition());
}
//Surface text of the first verb that appears between the desc candidate and the target math expr
Optional<Word> firstVerb = wordsInbetween.stream().filter(w -> w.getPosTag().startsWith("VB")).findFirst();
if (firstVerb.isPresent()) {
addStringValue(values, instances, SURFACE_TEXT_OF_THE_FIRST_VERB_THAT_APPEARS_BETWEEN_THE_DESC_CANDIDATE_AND_THE_TARGET_MATH_EXPR, firstVerb.get().getWord());
} else {
addStringValue(values, instances, SURFACE_TEXT_OF_THE_FIRST_VERB_THAT_APPEARS_BETWEEN_THE_DESC_CANDIDATE_AND_THE_TARGET_MATH_EXPR, "");
}
}
}
/**
* Adds {@link #SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN}, {@link #SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER}, {@link #INCOMING_TO_DEFINIEN} and
* {@link #INCOMING_TO_IDENTIFIER} to values.
*
* @param values values object.
* @param graphs map to store graphs that were computed previously. may be null.
* @param instances instances where the values will be added.
* @param relation the relation from which to extract the features.
*/
private void addDependencyTreeFeatures(DependencyParser parser, Map<Sentence, GrammaticalStructure> graphs, double[] values, Instances instances, Relation relation, double maxSentenceLength) {
if (NO_DEP) {
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER, "");
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN, "");
values[instances.attribute(GRAPH_DISTANCE).index()] = 0;
values[instances.attribute(INCOMING_TO_IDENTIFIER).index()] = 0;
values[instances.attribute(INCOMING_TO_DEFINIEN).index()] = 0;
} else {
GrammaticalStructure dependencyTree;
if (graphs != null && graphs.containsKey(relation.getSentence())) {
dependencyTree = graphs.get(relation.getSentence());
} else {
List<TaggedWord> taggedSentence = new ArrayList<>();
for (Word word : relation.getSentence().getWords()) {
taggedSentence.add(new TaggedWord(word.getWord(), word.getPosTag()));
}
//create dependency graph
dependencyTree = parser.predict(taggedSentence);
//store to avoid recomputation
if (graphs != null)
graphs.put(relation.getSentence(), dependencyTree);
}
SemanticGraph semanticGraph = new SemanticGraph(dependencyTree.typedDependencies());
IndexedWord identifier = semanticGraph.getNodeByIndex(relation.getIdentifierPosition() + 1);
IndexedWord definiens = semanticGraph.getNodeByIndex(relation.getWordPosition() + 1);
//shortest edge path for distance
List<SemanticGraphEdge> edgesOnPath = semanticGraph.getShortestUndirectedPathEdges(identifier, definiens);
int distance = edgesOnPath.size();
values[instances.attribute(GRAPH_DISTANCE).index()] = (double) distance / maxSentenceLength;
//shortest node path for dependencies
List<IndexedWord> fromIdentifier = semanticGraph.getShortestUndirectedPathNodes(identifier, definiens);
removeUnwanted(fromIdentifier);
List<IndexedWord> fromDefinien = new ArrayList<>(fromIdentifier);
Collections.reverse(fromDefinien);
//we don't want the first, since that would be the identifier
replaceWord(definiens.index(), fromIdentifier, DEFINIENS_TEXT);
replaceWord(identifier.index(), fromDefinien, IDENTIFIER_TEXT);
List<Word> threeFromIdentifier = getDependencyWithLengthOfThree(fromIdentifier);
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER, wordListToSimpleString(threeFromIdentifier));
List<Word> threeFromDefinien = getDependencyWithLengthOfThree(fromDefinien);
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN, wordListToSimpleString(threeFromDefinien));
values[instances.attribute(INCOMING_TO_IDENTIFIER).index()] = edgesOnPath.get(0).getDependent().equals(identifier) ? 1 : 0;
values[instances.attribute(INCOMING_TO_DEFINIEN).index()] = edgesOnPath.get(edgesOnPath.size() - 1).getDependent().equals(definiens) ? 1 : 0;
}
if (NO_STRING) {
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_IDENTIFIER, "");
addStringValue(values, instances, SURFACE_TEXT_AND_POS_TAG_OF_DEPENDENCY_WITH_LENGTH_3_FROM_DEFINIEN, "");
}
}
/**
* Get a list of three words, beginning from the second word. Converts {@link IndexedWord} to {@link Word}
*
* @param fromIdentifierOrDefiniens List of indexed words starting with the source (identifier or definiens).
* @return list with at most three words.
*/
public List<Word> getDependencyWithLengthOfThree(List<IndexedWord> fromIdentifierOrDefiniens) {
return fromIdentifierOrDefiniens.subList(
Math.min(1, fromIdentifierOrDefiniens.size()),
Math.min(4, fromIdentifierOrDefiniens.size())
).stream().map(iw -> new Word(iw.word(), iw.tag())).collect(Collectors.toList());
}
/**
* Replace the surface test of the word with the specific index in the sentence.
* I.e. to hide the surface text of the definiens and identifier from the machine learner.
*
* @param index index of the word in the sentence.
* @param words list of words.
* @param text the new surface text.
*/
public void replaceWord(int index, List<IndexedWord> words, String text) {
for (IndexedWord iw : words) {
if (iw.index() == index) {
iw.setWord(text);
break;
}
}
}
/**
* Replace teh surface test of the word with the specific index in the sentence. Pre is in reverse order and post is in order.
* I.e. to hide the surface text of the definiens and identifier from the machine learner.
*
* @param index index of the word in the sentence.
* @param pre list of words before the identifier or definiens.
* @param post list of words after the identifier or definiens.
* @param text the new surface text.
*/
public void replaceWord(int index, List<Word> pre, List<Word> post, String text) {
Word replacement;
if (index > 0 && index <= post.size()) {
replacement = new Word(text, post.get(index - 1).getPosTag());
post.remove(index - 1);
post.add(index - 1, replacement);
}
if (index < 0 && Math.abs(index) <= pre.size()) {
int size = pre.size();
replacement = new Word(text, pre.get(size + index).getPosTag());
pre.remove(size + index);
pre.add(size + index, replacement);
}
}
/**
* Convenience method to add a string value.
*
* @param data
* @param instances
* @param field
* @param string
*/
public void addStringValue(double[] data, Instances instances, String field, String string) {
data[instances.attribute(field).index()] = instances.attribute(field).addStringValue(string);
}
/**
* Gets a string from a list of words.
*
* @param words the words to convert.
* @return String containing surface text and pos tag of the words.
*/
public String wordListToSimpleString(List<Word> words) {
StringBuilder stringBuilder = new StringBuilder();
for (Word w : words) {
//do not include things that the tokenizer eats anyway
if (!tokenisationDelimiters.contains(w.getWord()) && !tokenisationDelimitersPOSTags.contains(w.getPosTag())) {
stringBuilder.append(w.getWord().replaceAll(" ", "_").replaceAll(tokenisationDelimitersRegex, ""));
stringBuilder.append("_");
stringBuilder.append(w.getPosTag());
stringBuilder.append(" ");
}
}
return stringBuilder.toString();
}
private void removeUnwanted(List<IndexedWord> words) {
Iterator i = words.iterator();
while (i.hasNext()) {
IndexedWord w = (IndexedWord) i.next();
if (tokenisationDelimitersPOSTags.contains(w.tag())) {
i.remove();
} else if (tokenisationDelimiters.contains(w.word())) {
i.remove();
}
}
}
/**
* {@link weka.core.tokenizers.NGramTokenizer#m_Delimiters}
*/
private static String tokenisationDelimiters = " \r\n\t\\.,;:'\"\\(\\)\\?\\!";
private static String tokenisationDelimitersRegex = "[ \r\n\t.,;:'\"()?!]";
private static List<String> tokenisationDelimitersPOSTags =
Arrays.asList(new String[]{"-LRB-", "-RRB-", "$", "#", ".", ",", ":", "\"", "(", ")", "``", "'", "`", "\'\'"});
public static double average(double[] doubles) {
return Arrays.stream(doubles).sum() / doubles.length;
}
public Map<Sentence, GrammaticalStructure> getPrecomputedGraphStore() {
return new HashMap<Sentence, GrammaticalStructure>();
}
}