/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.test.unit;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.assertj.core.util.Lists;
import org.mockito.Mockito;
import com.google.common.base.Preconditions;
import eu.project.ttc.models.Component;
import eu.project.ttc.models.CompoundType;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermBuilder;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.models.Word;
public class TermFactory {
private TermIndex termIndex;
private static final Pattern TERM_WORD_PATTERN = Pattern.compile("(\\w+)\\:(\\S+)(?:\\|(\\S+))");
private static final Pattern COMPONENT_PATTERN = Pattern.compile("(\\S+)\\|(\\S+)");
public TermFactory(TermIndex termIndex) {
super();
this.termIndex = termIndex;
}
public Term create(String... termWordSpecs) {
TermBuilder builder = TermBuilder.start(termIndex);
for(String termWordSpec:termWordSpecs) {
Matcher matcher = TERM_WORD_PATTERN.matcher(termWordSpec);
Preconditions.checkArgument(matcher.find(), "Bad term word spec: %s", termWordSpec);
String label = matcher.group(1);
String lemma = matcher.group(2);
String stem = lemma;
if(matcher.groupCount() == 3)
stem = matcher.group(3);
builder.addWord(lemma, stem, label);
}
return builder.createAndAddToIndex();
}
public void addPrefix(Term term1, Term term2) {
termsExist(term1, term2);
term1.addTermVariation(term2, VariationType.IS_PREFIX_OF, "");
}
public void addDerivesInto(String type, Term term1, Term term2) {
termsExist(term1, term2);
term1.addTermVariation(term2, VariationType.DERIVES_INTO, type);
}
private void termsExist(Term... terms) {
for(Term t:terms)
Preconditions.checkArgument(
this.termIndex.getTermByGroupingKey(t.getGroupingKey()) != null,
"Term %s does not exists in term index",
t.getGroupingKey());
}
public void wordComposition(CompoundType type, String wordLemma, String... componentSpecs) {
Word word = this.termIndex.getWord(wordLemma);
Preconditions.checkArgument(
word != null,
"No such word: %s", wordLemma);
List<Component> components = Lists.newArrayList();
for(String componentSpec:componentSpecs) {
Matcher matcher = COMPONENT_PATTERN.matcher(componentSpec);
Preconditions.checkArgument(matcher.find(), "Bad component word spec: %s", componentSpec);
String substring = matcher.group(1);
String lemma = matcher.group(2);
int start = wordLemma.indexOf(substring);
Component component = new Component(lemma, start, start + substring.length());
components.add(component);
}
word.setComposition(type, components);
}
public static Term termMock(String groupingKey, int freq, int rank, double specificity) {
Term term = Mockito.mock(Term.class);
Mockito.when(term.getGroupingKey()).thenReturn(groupingKey);
Mockito.when(term.getFrequency()).thenReturn(freq);
Mockito.when(term.getRank()).thenReturn(rank);
Mockito.when(term.getSpecificity()).thenReturn(specificity);
return term;
}
}