/* * Ivory: A Hadoop toolkit for web-scale information retrieval * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package ivory.core.eval; import ivory.core.util.DelimitedValuesFileReader; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.TreeSet; import java.util.Map.Entry; import com.google.common.collect.Maps; /** * <p> * Representation of relevance judgments. In TREC parlance, qrels are judgments made by humans as to * whether a document is relevant to an information need (i.e., topic). Typically, qrels are created * by a process known as "pooling" in large-scale system evaluations such as those at TREC. * </p> * * @author Jimmy Lin */ public class Qrels { private SortedMap<String, Map<String, Boolean>> data = Maps.newTreeMap(); private float topics = 0; /** * Creates a {@code Qrels} object from a file * * @param file file containing qrels */ public Qrels(String file) { DelimitedValuesFileReader iter = new DelimitedValuesFileReader(file, " "); String[] arr; while ((arr = iter.nextValues()) != null) { String qno = arr[0]; String docno = arr[2]; boolean rel = arr[3].equals("0") ? false : true; if (data.containsKey(qno)) { data.get(qno).put(docno, rel); } else { Map<String, Boolean> t = new HashMap<String, Boolean>(); t.put(docno, rel); data.put(qno, t); } } } /** * Determines if a document is relevant for a topic. * * @param qid topic id * @param docid id of the document to test * @return {@code true} if the document is relevant */ public boolean isRelevant(String qid, String docid) { if (!data.containsKey(qid)) return false; if (!data.get(qid).containsKey(docid)) return false; return data.get(qid).get(docid); } /** * Returns the set of relevant documents for a topic. * * @param qid topic id * @return the set of relevant documents */ public Set<String> getReldocsForQid(String qid) { Set<String> set = new TreeSet<String>(); if (!data.containsKey(qid)) { return set; } topics++; for (Entry<String, Boolean> e : data.get(qid).entrySet()) { if (e.getValue()) { set.add(e.getKey()); } } return set; } /** * Returns a set containing the topic ids. * * @return a set containing the topic ids */ public Set<String> getQids() { return data.keySet(); } /** * Used with RunQueryHDFSTrainWSD class * * @return number of topics needed by RunQueryHDFSTrainWSD to compute effectiveness scores */ public float helperHDFSTrainWSDTopics() { return topics; } }