/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.models.scored;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import com.google.common.base.MoreObjects;
import com.google.common.collect.Lists;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermOccurrence;
import eu.project.ttc.utils.StringUtils;
import eu.project.ttc.utils.TermOccurrenceUtils;
public abstract class ScoredTermOrVariant {
private List<TermOccurrence> occurrences;
protected int frequency = Integer.MIN_VALUE;
protected Term term;
protected ScoredModel scoredModel;
public ScoredTermOrVariant(ScoredModel scoredModel, Term t) {
this.scoredModel = scoredModel;
this.occurrences = Lists.newLinkedList(t.getOccurrences());
this.term = t;
reset();
}
public int removeOverlappingOccurrences(Iterable<TermOccurrence> otherOccurrences) {
TermOccurrence currentSo;
int cnt = 0;
for(TermOccurrence so:otherOccurrences) {
Iterator<TermOccurrence> it = occurrences.iterator();
while(it.hasNext()) {
currentSo = it.next();
if(TermOccurrenceUtils.areOverlapping(so, currentSo)) {
it.remove();
cnt++;
}
}
}
return cnt;
}
public Term getTerm() {
return term;
}
public Collection<TermOccurrence> getOccurrences() {
return Collections.unmodifiableCollection(this.occurrences);
}
public int getFrequency() {
return this.occurrences.size();
}
public double getWR() {
return getNormalizedFrequency()/getGeneralFrequency();
}
private double getNormalizedFrequency() {
return (1000*(double)getFrequency()) / this.scoredModel.getTermIndex().getWordAnnotationsNum();
}
private double getGeneralFrequency() {
return getTerm().getGeneralFrequencyNorm();
}
public double getWRLog() {
return Math.log(1+getWR());
}
public void reset() {
frequency = Integer.MIN_VALUE;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this.getClass()).add("term", this.term).toString();
}
public double getOrthographicScore() {
return StringUtils.getOrthographicScore(getTerm().getLemma());
}
public boolean isScoredTerm() {
return this instanceof ScoredTerm;
}
public ScoredTerm asScoredTerm() {
return (ScoredTerm)this;
}
public boolean isScoredVariation() {
return this instanceof ScoredVariation;
}
public ScoredVariation asScoredVariation() {
return (ScoredVariation)this;
}
}