/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.models.index;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Sets;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermIndex;
public class CustomTermIndexImpl implements CustomTermIndex {
private static final Logger LOGGER = LoggerFactory.getLogger(CustomTermIndexImpl.class);
private ListMultimap<String, Term> index;
private TermValueProvider valueProvider;
CustomTermIndexImpl(TermValueProvider valueProvider) {
super();
this.valueProvider = valueProvider;
this.index = ArrayListMultimap.create();
}
@Override
public Collection<String> keySet() {
return this.index.keySet();
}
@Override
public List<Term> getTerms(String key) {
return this.index.get(key);
}
@Override
public void indexTerm(TermIndex termIndex, Term term) {
Collection<String> classes = valueProvider.getClasses(termIndex, term);
if(classes != null) {
for(String cls:classes) {
if(cls!= null)
this.index.put(cls, term);
}
}
}
@Override
public void cleanSingletonKeys() {
Iterator<String> it = this.index.keySet().iterator();
while(it.hasNext())
if(this.index.get(it.next()).size() == 1)
it.remove();
}
@Override
public int size() {
return this.index.size();
}
@Override
public void removeTerm(TermIndex termIndex, Term t) {
for(String k:valueProvider.getClasses(termIndex, t))
this.index.remove(k, t);
}
@Override
public void dropBiggerEntries(int threshholdSize, boolean logWarning) {
Set<String> toRemove = Sets.newHashSet();
for(String key:index.keySet()) {
if(index.get(key).size() >= threshholdSize)
toRemove.add(key);
}
for(String rem:toRemove) {
LOGGER.warn("Removing key {} from custom index {} because its size {} is bigger than the threshhold {}",
rem,
this.valueProvider.getName(),
this.index.get(rem).size(),
threshholdSize);
index.removeAll(rem);
}
}
@Override
public void cleanEntriesByMaxSize(int maxSize) {
String msg = "Index entry {} had too many elements. Applied th={} filter. Before -> after filtering: {} -> {}";
int th;
Iterator<Term> it;
Term t;
int initialSize;
for(String key:index.keySet()) {
th = 1;
initialSize = index.get(key).size();
while (index.get(key).size() > maxSize) {
th++;
it = index.get(key).iterator();
while(it.hasNext()) {
t = it.next();
if(t.getFrequency()<th)
it.remove();
}
}
if(th>1) {
LOGGER.warn(msg,
key,
th,
initialSize,
index.get(key).size()
);
}
}
}
@Override
public boolean containsKey(String key) {
return !index.get(key).isEmpty();
}
}