/******************************************************************************* * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique) * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *******************************************************************************/ package eu.project.ttc.models.scored; import com.google.common.base.Objects; import com.google.common.primitives.Doubles; import eu.project.ttc.models.Term; import eu.project.ttc.models.TermVariation; import eu.project.ttc.models.TermWord; import eu.project.ttc.utils.StringUtils; import eu.project.ttc.utils.TermUtils; public class ScoredVariation extends ScoredTermOrVariant { private static final double THRESHOLD_EXTENSION_GAIN = 0.333333d; private TermVariation variation; private ScoredTerm extensionAffix; private boolean extensionAffixSet = false; public ScoredVariation(ScoredModel scoredModel, TermVariation tv) { super(scoredModel, tv.getVariant()); this.variation = tv; } public TermVariation getTermVariation() { return variation; } public ScoredTerm getExtensionAffix() { if(!extensionAffixSet) { Term rawTerm = null; try { rawTerm = TermUtils.getExtensionAffix( scoredModel.getTermIndex(), getBase().getTerm(), getVariant().getTerm()); } catch (IllegalStateException e) { rawTerm = null; } this.extensionAffix = rawTerm == null ? null : scoredModel.getAdapter(rawTerm); extensionAffixSet = true; // System.out.format("Ext affix for term (%s,%s) is %s\n", // getBase().getTerm(), // getVariant().getTerm(), // extensionAffix // ); } return this.extensionAffix; } public ScoredTerm getVariant() { return this.scoredModel.getAdapter(this.variation.getVariant()); } public ScoredTerm getBase() { return this.scoredModel.getAdapter(this.variation.getBase()); } private static final String LABEL_FORMAT = "S:%2.0f,E:%2.0f(G:%2.0f/WR:%2.0f/O:%2.0f),F:%2.0f,I:%2.0f,V:%2.0f"; public String getLabel() { return String.format(LABEL_FORMAT, 100*getStrictnessScore(), 100*getExtensionScore(), 100*getExtensionGainScore(), 100*getExtensionSpecScore(), 100*getExtensionOrthographicScore(), 100*getFrequencyScore(), 100*getVariantIndependanceScore(), 100*getVariationScore() ).trim(); // return String.format("%2.0f", // getVariationScore() // ).trim(); } /* ************************************* * Scores * ************************************** */ public double getVariantIndependanceScore() { int affixInclusion = 0; for(ScoredVariation sv:getBase().getVariations()) { if(sv == this) continue; if(TermUtils.isIncludedIn(getTerm(), sv.getTerm())) affixInclusion += sv.getFrequency(); } return 1 - Doubles.min(1d,((double)affixInclusion)/getFrequency()); } public double getStrictnessScore() { return TermUtils.getStrictness(variation.getVariant(), getBase().getTerm()); } public double getExtensionOrthographicScore() { if(getExtensionAffix() == null) return 0; else { double score = 1d; for(TermWord tw:getExtensionAffix().getTerm().getWords()) score = score * StringUtils.getOrthographicScore(tw.getWord().getLemma()); return score; } } public double getExtensionGainScore() { if(getExtensionAffix() == null || getExtensionAffix().getFrequency() == 0) return THRESHOLD_EXTENSION_GAIN; else return ((double)getFrequency())/getExtensionAffix().getFrequency(); } public double getExtensionSpecScore() { if(getExtensionAffix() == null) return 0; else return getExtensionAffix().getWRLog() / getBase().getMaxExtensionAffixWRLog(); } public double getExtensionScore() { double root = 2d; double w = 3d; // gain weight return getExtensionOrthographicScore() * Math.pow((w*Math.pow(getExtensionGainScore(), root) + Math.pow(getExtensionSpecScore(), root))/(1+w),1d/root); // return Math.pow((Math.pow(getExtensionSpec(), root) + Math.pow(getExtensionGain(), root))/2,1d/root); } public double getFrequencyScore() { return (double)getFrequency()/getBase().getMaxVariationFrequency(); } public double getVariationScore() { double score = getStrictnessScore() == 1d ? 0.9 + 0.1*getFrequencyScore() : 0.75*getExtensionScore() + 0.25*getFrequencyScore(); return score; } @Override public boolean equals(Object obj) { if (obj instanceof ScoredVariation) { ScoredVariation sv = (ScoredVariation) obj; return Objects.equal(this.getTerm(), sv.getTerm()) && Objects.equal(this.getVariant(),sv.getVariant()); } else return false; } }