/**
* Copyright (c) 2014, the LESK-WSD-DSM AUTHORS.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the University of Bari nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007
*
*/
package di.uniba.it.wsd;
import it.uniroma1.lcl.babelnet.BabelSense;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Load information about senses distribution
*
* @author pierpaolo
*/
public class SenseFreqAPI {
/**
*
*/
public Map<String, List<SenseFreq>> map = new HashMap<>();
/**
*
*/
public Map<String, Integer> occs = new HashMap<>();
private final File freqsFile;
private final File occsFile;
private static final Logger logger = Logger.getLogger(SenseFreqAPI.class.getName());
/**
*
* @param freqsFile
*/
public SenseFreqAPI(File freqsFile) {
this.freqsFile = freqsFile;
this.occsFile = null;
}
/**
*
* @param freqsFile
* @param occsFile
*/
public SenseFreqAPI(File freqsFile, File occsFile) {
this.freqsFile = freqsFile;
this.occsFile = occsFile;
}
private void initFreqs() throws IOException {
BufferedReader in = new BufferedReader(new FileReader(freqsFile));
while (in.ready()) {
String[] values = in.readLine().split("\\s+");
List<SenseFreq> list = new ArrayList<>();
if (values.length >= 3) {
for (int i = 1; i < values.length; i = i + 2) {
SenseFreq sf = new SenseFreq(values[i], Float.parseFloat(values[i + 1]));
list.add(sf);
}
Collections.sort(list);
map.put(values[0], list);
}
}
in.close();
logger.log(Level.INFO, "Loaded {0} sense freq info", map.size());
}
private void initOccs() throws IOException {
BufferedReader in = new BufferedReader(new FileReader(occsFile));
while (in.ready()) {
String[] values = in.readLine().split("\t");
if (values.length > 1) {
int occ = Integer.parseInt(values[1]);
occs.put(values[0], occ);
}
}
in.close();
logger.log(Level.INFO, "Loaded {0} sense occ info", occs.size());
}
/**
*
* @throws IOException
*/
public void init() throws IOException {
destroy();
initFreqs();
initOccs();
}
/**
*
*/
public void destroy() {
map.clear();
occs.clear();
}
/**
*
* @param key
* @return
*/
public List<SenseFreq> getSenseProbabilityList(String key) {
List<SenseFreq> get = map.get(key);
if (get == null) {
return new ArrayList<>();
} else {
return get;
}
}
/**
*
* @param key
* @param offset
* @param synsetSize
* @return
*/
public float getSynsetProbability(String key, String offset, int synsetSize) {
float score = 1 / (float) synsetSize;
List<SenseFreq> senseFreq = getSenseProbabilityList(key);
for (SenseFreq elem : senseFreq) {
if (elem.getId().equals(offset)) {
score = elem.getFreq();
break;
}
}
return score;
}
/**
*
* @param offset
* @return
*/
public Float getSynsetOccurrences(String offset) {
Integer get = occs.get(offset);
if (get == null) {
return null;
} else {
return get.floatValue();
}
}
/**
*
* @param sense
* @return
*/
public float getMaxSenseOccurrences(BabelSense sense) {
float maxOcc = 0;
for (int l = 0; l < sense.getSynset().getWordNetOffsets().size(); l++) {
Float occ = getSynsetOccurrences(sense.getSynset().getWordNetOffsets().get(l));
if (occ != null && occ > maxOcc) {
maxOcc = occ;
}
}
return maxOcc;
}
/**
*
* @param key
* @param sense
* @param maxSize
* @return
*/
public float getMaxSenseProbability(String key, BabelSense sense, int maxSize) {
float maxProb = 0;
for (int l = 0; l < sense.getSynset().getWordNetOffsets().size(); l++) {
float prob = getSynsetProbability(key, sense.getSynset().getWordNetOffsets().get(l), maxSize);
if (prob > maxProb) {
maxProb = prob;
}
}
return maxProb;
}
/**
*
* @param senses
* @return
*/
public float[] getOccurrencesArray(List<BabelSense> senses) {
float[] a = new float[senses.size()];
float norma = 0;
for (int i = 0; i < senses.size(); i++) {
a[i] = getMaxSenseOccurrences(senses.get(i));
norma += a[i];
}
float size = (float) a.length;
for (int i = 0; i < a.length; i++) {
a[i] = (a[i] + 1) / (norma + size);
}
return a;
}
/**
*
*/
public class SenseFreq implements Comparable<SenseFreq> {
private String id;
private float freq;
/**
*
* @param id
* @param freq
*/
public SenseFreq(String id, float freq) {
this.id = id;
this.freq = freq;
}
/**
*
* @return
*/
public String getId() {
return id;
}
/**
*
* @param id
*/
public void setId(String id) {
this.id = id;
}
/**
*
* @return
*/
public float getFreq() {
return freq;
}
/**
*
* @param freq
*/
public void setFreq(float freq) {
this.freq = freq;
}
@Override
public int hashCode() {
int hash = 7;
hash = 71 * hash + Objects.hashCode(this.id);
return hash;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final SenseFreq other = (SenseFreq) obj;
if (!Objects.equals(this.id, other.id)) {
return false;
}
return true;
}
@Override
public int compareTo(SenseFreq o) {
return Float.compare(o.freq, freq);
}
}
}