/**
* Copyright (c) 2014, the LESK-WSD-DSM AUTHORS.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the University of Bari nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007
*
*/
package di.uniba.it.wsd.tool.wn;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Build synsets occurrences information exploiting the index.sense WordNet file
* @author pierpaolo
*/
public class BuildOccSense {
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
try {
BufferedReader in = new BufferedReader(new FileReader(new File(args[0])));
Multiset<String> synset = HashMultiset.create();
while (in.ready()) {
String[] values = in.readLine().split("\\s+");
String[] keys = values[0].split("%");
String[] poss = keys[1].split(":");
String offset = null;
int occ = Integer.parseInt(values[3]);
if (poss[0].equals("1")) {
offset = values[1] + "n";
} else if (poss[0].equals("2")) {
offset = values[1] + "v";
} else if (poss[0].equals("3") || poss[0].equals("5")) {
offset = values[1] + "a";
} else if (poss[0].equals("4")) {
offset = values[1] + "r";
}
for (int i = 0; i < occ; i++) {
synset.add(offset);
}
}
in.close();
BufferedWriter out = new BufferedWriter(new FileWriter(new File(args[1])));
Iterator<Multiset.Entry<String>> iterator = synset.entrySet().iterator();
while (iterator.hasNext()) {
Multiset.Entry<String> entry = iterator.next();
out.append(entry.getElement()).append("\t").append(String.valueOf(entry.getCount()));
out.newLine();
}
out.close();
} catch (IOException | NumberFormatException ioex) {
Logger.getLogger(BuildOccSense.class.getName()).log(Level.SEVERE, "IO Error", ioex);
}
}
}