/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.utils;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang.mutable.MutableInt;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import eu.project.ttc.models.TermWord;
import eu.project.ttc.types.TermOccAnnotation;
import eu.project.ttc.types.WordAnnotation;
import fr.univnantes.lina.uima.tkregex.LabelledAnnotation;
import fr.univnantes.lina.uima.tkregex.RegexOccurrence;
public class TermSuiteUtils {
private static final String GROUPING_KEY_FORMAT = "%s: %s";
public static final IndexingKey<String, String> KEY_ONE_FIRST_LETTERS = getNFirstLetterIndexingKey(1);
public static final IndexingKey<String, String> KEY_TWO_FIRST_LETTERS = getNFirstLetterIndexingKey(2);
public static final IndexingKey<String, String> KEY_THREE_FIRST_LETTERS = getNFirstLetterIndexingKey(3);
public static IndexingKey<String, String> getNFirstLetterIndexingKey(final int n) {
Preconditions.checkArgument(n>0, "n must be greater than 0");
return new IndexingKey<String, String>() {
@Override
public String getIndexKey(String fullString) {
if(fullString.length()<=n)
return fullString;
else
return fullString.substring(0, n);
}
};
}
public static String getSingleWordTermId(WordAnnotation word) {
/*
* Single word terms are pre-gathered by their lemma
*/
return word.getLemma();
}
public static <T> LinkedHashMap<T, Integer> getCounters(Iterable<T> list) {
Comparator<Entry<T, MutableInt>> comparator = new Comparator<Entry<T, MutableInt>>() {
public int compare(Entry<T,MutableInt> o1, Entry<T,MutableInt> o2) {
return ComparisonChain.start()
.compare(o2.getValue(), o1.getValue())
.result();
};
};
Map<T, MutableInt> map = Maps.newHashMap();
for(T e:list) {
MutableInt counter = map.get(e);
if(counter == null) {
counter = new MutableInt(0);
map.put(e, counter);
}
counter.increment();
}
List<Entry<T, MutableInt>> entries = Lists.newArrayList(map.entrySet());
Collections.sort(entries, comparator);
LinkedHashMap<T, Integer> counters = Maps.newLinkedHashMap();
for(Entry<T, MutableInt> e:entries)
counters.put(e.getKey(), e.getValue().intValue());
return counters;
}
public static String trimInside(String coveredText) {
return coveredText.replaceAll(TermSuiteConstants.WHITESPACE_PATTERN_STRING, TermSuiteConstants.WHITESPACE_STRING).trim();
}
public static String getGroupingKey(TermOccAnnotation annotation) {
StringBuilder patternSb = new StringBuilder();
List<String> lemmas = Lists.newArrayListWithExpectedSize(annotation.getWords().size());
for(int i=0; i< annotation.getWords().size(); i++) {
patternSb.append(annotation.getPattern(i).toLowerCase());
lemmas.add(annotation.getWords(i).getLemma());
}
return toGroupingKey(patternSb, lemmas);
}
public static String toGroupingKey(RegexOccurrence occurrence) {
StringBuilder builder = new StringBuilder();
builder
.append(Joiner.on("").join(occurrence.getLabels()))
.append(TermSuiteConstants.COLONS)
.append(TermSuiteConstants.WHITESPACE);
int i = 0;
for(LabelledAnnotation la:occurrence.getLabelledAnnotations()) {
if(i>= 1)
builder.append(TermSuiteConstants.WHITESPACE);
builder.append(((WordAnnotation) la.getAnnotation()).getLemma());
i++;
}
return builder.toString().toLowerCase();
}
private static String toGroupingKey(StringBuilder patternSb, List<String> lemmas) {
return String.format(GROUPING_KEY_FORMAT,
patternSb.toString(),
Joiner.on(TermSuiteConstants.WHITESPACE).join(lemmas));
}
public static String getGroupingKey(TermWord... words) {
return getGroupingKey(Lists.newArrayList(words));
}
public static String getGroupingKey(Collection<TermWord> words) {
StringBuilder patternSb = new StringBuilder();
List<String> lemmas = Lists.newArrayListWithExpectedSize(words.size());
for(TermWord tw:words) {
patternSb.append(tw.getSyntacticLabel().toLowerCase());
lemmas.add(tw.getWord().getLemma());
}
return toGroupingKey(patternSb, lemmas);
}
/**
*
*/
public static void listClasspath() {
ClassLoader cl = ClassLoader.getSystemClassLoader();
URL[] urls = ((URLClassLoader)cl).getURLs();
for(URL url: urls){
System.out.println(url.getFile());
}
}
// /**
// * Adds a path (jar or directory) to classpath of default Class loader
// * @param path
// */
// public static void addToClasspath(String path) {
// URLClassLoader urlClassLoader = null;
// try {
// File f = new File(path);
// Preconditions.checkArgument(f.exists(), "No such file: %s", path);
// if(f.isFile()) {
// ZipInputStream zipInputStream = new ZipInputStream(new FileInputStream(f));
// boolean isZipped = zipInputStream.getNextEntry() != null;
// Preconditions.checkArgument(isZipped, "No such file: %s", path);
// zipInputStream.close();
// } else
// Preconditions.checkArgument(f.isDirectory(), "Should be a directory or a jar : %s", f.getAbsolutePath());
// URI u = f.toURI();
// urlClassLoader = (URLClassLoader) ClassLoader.getSystemClassLoader();
// Class<URLClassLoader> urlClass = URLClassLoader.class;
// Method method;
// method = urlClass.getDeclaredMethod("addURL", new Class[]{URL.class});
// method.setAccessible(true);
// LOGGER.info("Adding {} to system class loader");
// method.invoke(urlClassLoader, new Object[]{u.toURL()});
// } catch (Exception e) {
// throw new RuntimeException("Could not add "+path+" to classpath", e);
// }
// }
}