/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.quality.trec; import java.io.BufferedReader; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.StringTokenizer; import org.apache.lucene.benchmark.quality.Judge; import org.apache.lucene.benchmark.quality.QualityQuery; /** * Judge if given document is relevant to given quality query, based on Trec format for judgements. */ public class TrecJudge implements Judge { HashMap<String,QRelJudgement> judgements; /** * Constructor from a reader. * <p> * Expected input format: * <pre> * qnum 0 doc-name is-relevant * </pre> * Two sample lines: * <pre> * 19 0 doc303 1 * 19 0 doc7295 0 * </pre> * @param reader where judgments are read from. * @throws IOException */ public TrecJudge (BufferedReader reader) throws IOException { judgements = new HashMap<String,QRelJudgement>(); QRelJudgement curr = null; String zero = "0"; String line; try { while (null!=(line=reader.readLine())) { line = line.trim(); if (line.length()==0 || '#'==line.charAt(0)) { continue; } StringTokenizer st = new StringTokenizer(line); String queryID = st.nextToken(); st.nextToken(); String docName = st.nextToken(); boolean relevant = !zero.equals(st.nextToken()); assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken(); if (relevant) { // only keep relevant docs if (curr==null || !curr.queryID.equals(queryID)) { curr = judgements.get(queryID); if (curr==null) { curr = new QRelJudgement(queryID); judgements.put(queryID,curr); } } curr.addRelevandDoc(docName); } } } finally { reader.close(); } } // inherit javadocs public boolean isRelevant(String docName, QualityQuery query) { QRelJudgement qrj = judgements.get(query.getQueryID()); return qrj!=null && qrj.isRelevant(docName); } /** single Judgement of a trec quality query */ private static class QRelJudgement { private String queryID; private HashMap<String,String> relevantDocs; QRelJudgement(String queryID) { this.queryID = queryID; relevantDocs = new HashMap<String,String>(); } public void addRelevandDoc(String docName) { relevantDocs.put(docName,docName); } boolean isRelevant(String docName) { return relevantDocs.containsKey(docName); } public int maxRecall() { return relevantDocs.size(); } } // inherit javadocs public boolean validateData(QualityQuery[] qq, PrintWriter logger) { HashMap<String,QRelJudgement> missingQueries = (HashMap<String, QRelJudgement>) judgements.clone(); ArrayList<String> missingJudgements = new ArrayList<String>(); for (int i=0; i<qq.length; i++) { String id = qq[i].getQueryID(); if (missingQueries.containsKey(id)) { missingQueries.remove(id); } else { missingJudgements.add(id); } } boolean isValid = true; if (missingJudgements.size()>0) { isValid = false; if (logger!=null) { logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - "); for (int i=0; i<missingJudgements.size(); i++) { logger.println(" "+ missingJudgements.get(i)); } } } if (missingQueries.size()>0) { isValid = false; if (logger!=null) { logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - "); for (final String id : missingQueries.keySet()) { logger.println(" "+id); } } } return isValid; } // inherit javadocs public int maxRecall(QualityQuery query) { QRelJudgement qrj = judgements.get(query.getQueryID()); if (qrj!=null) { return qrj.maxRecall(); } return 0; } }