package ivory.ffg.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import tl.lin.data.map.HMapIV;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.ByteSource;
import com.google.common.io.Files;
/**
* Provides auxiliary functions for parsing qrel files.
*
* @author Nima Asadi
*/
public class QrelUtility {
public static HMapIV<int[]> parseQrelsFromXML(String qrelPath)
throws Exception {
Preconditions.checkNotNull(qrelPath);
return QrelUtility.loadQrelsFromXML(Files.asByteSource(new File(qrelPath)));
}
public static HMapIV<int[]> parseQrelsFromTabDelimited(String qrelPath)
throws Exception {
Preconditions.checkNotNull(qrelPath);
return QrelUtility.loadQrelsFromTabDelimited(Files.asByteSource(new File(qrelPath)));
}
public static HMapIV<long[]> parseLongQrelsFromTabDelimited(String qrelPath)
throws Exception {
Preconditions.checkNotNull(qrelPath);
return QrelUtility.loadLongQrelsFromTabDelimited(Files.asByteSource(new File(qrelPath)));
}
/**
* Reads a qrel set in XML format as follows:
* <parameters>
* <judgment qid="Query_ID" docid="Document_ID" />
* </parameters>
*
* @param qrelInputSupplier An input supplier that provides the qrels
* @return A map of query id to a list of document ids
*/
public static HMapIV<int[]> loadQrelsFromXML(ByteSource source)
throws ParserConfigurationException, SAXException, IOException {
Preconditions.checkNotNull(source);
Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(source.openStream());
NodeList nodeList = dom.getDocumentElement().getElementsByTagName("judgment");
if(nodeList == null) {
return null;
}
HMapIV<List<Integer>> tempQrels = new HMapIV<List<Integer>>();
for(int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
int qid = Integer.parseInt(element.getAttribute("qid"));
int docid = Integer.parseInt(element.getAttribute("doc"));
if(!tempQrels.containsKey(qid)) {
List<Integer> list = Lists.newArrayList();
tempQrels.put(qid, list);
}
tempQrels.get(qid).add(docid);
}
HMapIV<int[]> qrels = new HMapIV<int[]>();
for(int key: tempQrels.keySet()) {
List<Integer> list = tempQrels.get(key);
int[] value = new int[list.size()];
for(int i = 0; i < value.length; i++) {
value[i] = list.get(i);
}
qrels.put(key, value);
}
return qrels;
}
public static HMapIV<int[]> loadQrelsFromTabDelimited(ByteSource source)
throws IOException {
Preconditions.checkNotNull(source);
HMapIV<List<Integer>> tempQrels = new HMapIV<List<Integer>>();
BufferedReader reader = new BufferedReader(new InputStreamReader(source.openStream()));
String line;
while((line = reader.readLine()) != null) {
String[] parts = line.split("\\s+");
int qid = Integer.parseInt(parts[0]);
int docid = Integer.parseInt(parts[1]);
if(!tempQrels.containsKey(qid)) {
List<Integer> list = Lists.newArrayList();
tempQrels.put(qid, list);
}
tempQrels.get(qid).add(docid);
}
HMapIV<int[]> qrels = new HMapIV<int[]>();
for(int key: tempQrels.keySet()) {
List<Integer> list = tempQrels.get(key);
int[] value = new int[list.size()];
for(int i = 0; i < value.length; i++) {
value[i] = list.get(i);
}
qrels.put(key, value);
}
return qrels;
}
public static HMapIV<long[]> loadLongQrelsFromTabDelimited(ByteSource source)
throws IOException {
Preconditions.checkNotNull(source);
HMapIV<List<Long>> tempQrels = new HMapIV<List<Long>>();
BufferedReader reader = new BufferedReader(new InputStreamReader(source.openStream()));
String line;
while((line = reader.readLine()) != null) {
String[] parts = line.split("\\s+");
int qid = Integer.parseInt(parts[0]);
long docid = Long.parseLong(parts[1]);
if(!tempQrels.containsKey(qid)) {
List<Long> list = Lists.newArrayList();
tempQrels.put(qid, list);
}
tempQrels.get(qid).add(docid);
}
HMapIV<long[]> qrels = new HMapIV<long[]>();
for(int key: tempQrels.keySet()) {
List<Long> list = tempQrels.get(key);
long[] value = new long[list.size()];
for(int i = 0; i < value.length; i++) {
value[i] = list.get(i);
}
qrels.put(key, value);
}
return qrels;
}
}