TestData.java example

Explorer
solrcene-master
package org.apache.lucene.benchmark.stats;
/**
 * Copyright 2005 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.io.File;
import java.text.NumberFormat;
import java.util.ArrayList;

import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Vector;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.Constants;
import org.apache.lucene.store.Directory;


/**
 * This class holds together all parameters related to a test. Single test is
 * performed several times, and all results are averaged.
 *
 */
public class TestData
{
    public static int[] MAX_BUFFERED_DOCS_COUNTS = new int[]{10, 20, 50, 100, 200, 500};
    public static int[] MERGEFACTOR_COUNTS = new int[]{10, 20, 50, 100, 200, 500};

    /**
     * ID of this test data.
     */
    private String id;
    /**
     * Heap size.
     */
    private long heap;
    /**
     * List of results for each test run with these parameters.
     */
    private Vector<TestRunData> runData = new Vector<TestRunData>();
    private int maxBufferedDocs, mergeFactor;
    /**
     * Directory containing source files.
     */
    private File source;
    /**
     * Lucene Directory implementation for creating an index.
     */
    private Directory directory;
    /**
     * Analyzer to use when adding documents.
     */
    private Analyzer analyzer;
    /**
     * If true, use compound file format.
     */
    private boolean compound;
    /**
     * If true, optimize index when finished adding documents.
     */
    private boolean optimize;
    /**
     * Data for search benchmarks.
     */
    private QueryData[] queries;

    public TestData()
    {
        heap = Runtime.getRuntime().maxMemory();
    }

    private static class DCounter
    {
        double total;
        int count, recordCount;
    }

    private static class LCounter
    {
        long total;
        int count;
    }

    private static class LDCounter
    {
      double Dtotal;
      int Dcount, DrecordCount;
      long Ltotal0;
      int Lcount0;
      long Ltotal1;
      int Lcount1;
    }

    /**
     * Get a textual summary of the benchmark results, average from all test runs.
     */
    static final String ID =      "# testData id     ";
    static final String OP =      "operation      ";
    static final String RUNCNT =  "     runCnt";
    static final String RECCNT =  "     recCnt";
    static final String RECSEC =  "          rec/s";
    static final String FREEMEM = "       avgFreeMem";
    static final String TOTMEM =  "      avgTotalMem";
    static final String COLS[] = {
        ID,
        OP,
        RUNCNT,
        RECCNT,
        RECSEC,
        FREEMEM,
        TOTMEM
    };
    public String showRunData(String prefix)
    {
        if (runData.size() == 0)
        {
            return "# [NO RUN DATA]";
        }
        HashMap<String,LDCounter> resByTask = new HashMap<String,LDCounter>(); 
        StringBuffer sb = new StringBuffer();
        String lineSep = System.getProperty("line.separator");
        sb.append("warm = Warm Index Reader").append(lineSep).append("srch = Search Index").append(lineSep).append("trav = Traverse Hits list, optionally retrieving document").append(lineSep).append(lineSep);
        for (int i = 0; i < COLS.length; i++) {
          sb.append(COLS[i]);
        }
        sb.append("\n");
        LinkedHashMap<String,TestData.LCounter[]> mapMem = new LinkedHashMap<String,TestData.LCounter[]>();
        LinkedHashMap<String,DCounter> mapSpeed = new LinkedHashMap<String,DCounter>();
        for (int i = 0; i < runData.size(); i++)
        {
            TestRunData trd = runData.get(i);
            for (final String label : trd.getLabels()) 
            {
                MemUsage mem = trd.getMemUsage(label);
                if (mem != null)
                {
                    TestData.LCounter[] tm = mapMem.get(label);
                    if (tm == null)
                    {
                        tm = new TestData.LCounter[2];
                        tm[0] = new TestData.LCounter();
                        tm[1] = new TestData.LCounter();
                        mapMem.put(label, tm);
                    }
                    tm[0].total += mem.avgFree;
                    tm[0].count++;
                    tm[1].total += mem.avgTotal;
                    tm[1].count++;
                }
                TimeData td = trd.getTotals(label);
                if (td != null)
                {
                    TestData.DCounter dc = mapSpeed.get(label);
                    if (dc == null)
                    {
                        dc = new TestData.DCounter();
                        mapSpeed.put(label, dc);
                    }
                    dc.count++;
                    //dc.total += td.getRate();
                    dc.total += (td.count>0 && td.elapsed<=0 ? 1 : td.elapsed); // assume at least 1ms for any countable op
                    dc.recordCount += td.count;
                }
            }
        }
        LinkedHashMap<String,String> res = new LinkedHashMap<String,String>();
        Iterator<String> it = mapSpeed.keySet().iterator();
        while (it.hasNext())
        {
            String label = it.next();
            TestData.DCounter dc = mapSpeed.get(label);
            res.put(label, 
                format(dc.count, RUNCNT) + 
                format(dc.recordCount / dc.count, RECCNT) +
                format(1,(float) (dc.recordCount * 1000.0 / (dc.total>0 ? dc.total : 1.0)), RECSEC)
                //format((float) (dc.total / (double) dc.count), RECSEC)
                );
            
            // also sum by task
            String task = label.substring(label.lastIndexOf("-")+1);
            LDCounter ldc = resByTask.get(task);
            if (ldc==null) {
              ldc = new LDCounter();
              resByTask.put(task,ldc);
            }
            ldc.Dcount += dc.count;
            ldc.DrecordCount += dc.recordCount;
            ldc.Dtotal += (dc.count>0 && dc.total<=0 ? 1 : dc.total); // assume at least 1ms for any countable op 
        }
        it = mapMem.keySet().iterator();
        while (it.hasNext())
        {
            String label = it.next();
            TestData.LCounter[] lc =  mapMem.get(label);
            String speed = res.get(label);
            boolean makeSpeed = false;
            if (speed == null)
            {
                makeSpeed = true;
                speed =  
                  format(lc[0].count, RUNCNT) + 
                  format(0, RECCNT) + 
                  format(0,(float)0.0, RECSEC);
            }
            res.put(label, speed + 
                format(0, lc[0].total / lc[0].count, FREEMEM) + 
                format(0, lc[1].total / lc[1].count, TOTMEM));
            
            // also sum by task
            String task = label.substring(label.lastIndexOf("-")+1);
            LDCounter ldc = resByTask.get(task);
            if (ldc==null) {
              ldc = new LDCounter();
              resByTask.put(task,ldc);
              makeSpeed = true;
            }
            if (makeSpeed) {
              ldc.Dcount += lc[0].count;
            }
            ldc.Lcount0 += lc[0].count;
            ldc.Lcount1 += lc[1].count;
            ldc.Ltotal0 += lc[0].total;
            ldc.Ltotal1 += lc[1].total;
        }
        it = res.keySet().iterator();
        while (it.hasNext())
        {
            String label = it.next();
            sb.append(format(prefix, ID));
            sb.append(format(label, OP));
            sb.append(res.get(label)).append("\n");
        }
        // show results by task (srch, optimize, etc.) 
        sb.append("\n");
        for (int i = 0; i < COLS.length; i++) {
          sb.append(COLS[i]);
        }
        sb.append("\n");
        it = resByTask.keySet().iterator();
        while (it.hasNext())
        {
            String task = it.next();
            LDCounter ldc = resByTask.get(task);
            sb.append(format("    ", ID));
            sb.append(format(task, OP));
            sb.append(format(ldc.Dcount, RUNCNT)); 
            sb.append(format(ldc.DrecordCount / ldc.Dcount, RECCNT));
            sb.append(format(1,(float) (ldc.DrecordCount * 1000.0 / (ldc.Dtotal>0 ? ldc.Dtotal : 1.0)), RECSEC));
            sb.append(format(0, ldc.Ltotal0 / ldc.Lcount0, FREEMEM)); 
            sb.append(format(0, ldc.Ltotal1 / ldc.Lcount1, TOTMEM));
            sb.append("\n");
        }
        return sb.toString();
    }

    private static NumberFormat numFormat [] = { NumberFormat.getInstance(), NumberFormat.getInstance()};
    private static final String padd = "                                  ";
    static {
      numFormat[0].setMaximumFractionDigits(0);
      numFormat[0].setMinimumFractionDigits(0);
      numFormat[1].setMaximumFractionDigits(1);
      numFormat[1].setMinimumFractionDigits(1);
    }

    // pad number from left
    // numFracDigits must be 0 or 1.
    static String format(int numFracDigits, float f, String col) {
      String res = padd + numFormat[numFracDigits].format(f);
      return res.substring(res.length() - col.length());
    }

    // pad number from left
    static String format(int n, String col) {
      String res = padd + n;
      return res.substring(res.length() - col.length());
    }

    // pad string from right
    static String format(String s, String col) {
      return (s + padd).substring(0,col.length());
    }

    /**
     * Prepare a list of benchmark data, using all possible combinations of
     * benchmark parameters.
     *
     * @param sources   list of directories containing different source document
     *                  collections
     * @param analyzers of analyzers to use.
     */
    public static TestData[] getAll(File[] sources, Analyzer[] analyzers)
    {
        List<TestData> res = new ArrayList<TestData>(50);
        TestData ref = new TestData();
        for (int q = 0; q < analyzers.length; q++)
        {
            for (int m = 0; m < sources.length; m++)
            {
                for (int i = 0; i < MAX_BUFFERED_DOCS_COUNTS.length; i++)
                {
                    for (int k = 0; k < MERGEFACTOR_COUNTS.length; k++)
                    {
                        for (int n = 0; n < Constants.BOOLEANS.length; n++)
                        {
                            for (int p = 0; p < Constants.BOOLEANS.length; p++)
                            {
                                ref.id = "td-" + q + m + i + k + n + p;
                                ref.source = sources[m];
                                ref.analyzer = analyzers[q];
                                ref.maxBufferedDocs = MAX_BUFFERED_DOCS_COUNTS[i];
                                ref.mergeFactor = MERGEFACTOR_COUNTS[k];
                                ref.compound = Constants.BOOLEANS[n].booleanValue();
                                ref.optimize = Constants.BOOLEANS[p].booleanValue();
                                try
                                {
                                    res.add((TestData)ref.clone());
                                }
                                catch (Exception e)
                                {
                                    e.printStackTrace();
                                }
                            }
                        }
                    }
                }
            }
        }
        return res.toArray(new TestData[0]);
    }

    /**
     * Similar to {@link #getAll(java.io.File[], org.apache.lucene.analysis.Analyzer[])} but only uses
     * maxBufferedDocs of 10 and 100 and same for mergeFactor, thus reducing the number of permutations significantly.
     * It also only uses compound file and optimize is always true.
     *
     * @param sources
     * @param analyzers
     * @return An Array of {@link TestData}
     */
    public static TestData[] getTestDataMinMaxMergeAndMaxBuffered(File[] sources, Analyzer[] analyzers)
    {
        List<TestData> res = new ArrayList<TestData>(50);
        TestData ref = new TestData();
        for (int q = 0; q < analyzers.length; q++)
        {
            for (int m = 0; m < sources.length; m++)
            {
                ref.id = "td-" + q + m + "_" + 10 + "_" + 10;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 10;
                ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m  + "_" + 10 + "_" + 100;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 10;
                ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m + "_" + 100 + "_" + 10;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 100;
                ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m + "_" + 100 + "_" + 100;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 100;
                ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
            }
        }
        return res.toArray(new TestData[0]);
    }

    @Override
    protected Object clone()
    {
        TestData cl = new TestData();
        cl.id = id;
        cl.compound = compound;
        cl.heap = heap;
        cl.mergeFactor = mergeFactor;
        cl.maxBufferedDocs = maxBufferedDocs;
        cl.optimize = optimize;
        cl.source = source;
        cl.directory = directory;
        cl.analyzer = analyzer;
        // don't clone runData
        return cl;
    }

    @Override
    public String toString()
    {
        StringBuffer res = new StringBuffer();
        res.append("#-- ID: ").append(id).append(", ").append(new Date()).append(", heap=").append(heap).append(" --\n");
        res.append("# source=").append(source).append(", directory=").append(directory).append("\n");
        res.append("# maxBufferedDocs=").append(maxBufferedDocs).append(", mergeFactor=").append(mergeFactor);
        res.append(", compound=").append(compound).append(", optimize=").append(optimize).append("\n");
        if (queries != null)
        {
            res.append(QueryData.getLabels()).append("\n");
            for (int i = 0; i < queries.length; i++)
            {
                res.append("# ").append(queries[i].toString()).append("\n");
            }
        }
        return res.toString();
    }

    public Analyzer getAnalyzer()
    {
        return analyzer;
    }

    public void setAnalyzer(Analyzer analyzer)
    {
        this.analyzer = analyzer;
    }

    public boolean isCompound()
    {
        return compound;
    }

    public void setCompound(boolean compound)
    {
        this.compound = compound;
    }

    public Directory getDirectory()
    {
        return directory;
    }

    public void setDirectory(Directory directory)
    {
        this.directory = directory;
    }

    public long getHeap()
    {
        return heap;
    }

    public void setHeap(long heap)
    {
        this.heap = heap;
    }

    public String getId()
    {
        return id;
    }

    public void setId(String id)
    {
        this.id = id;
    }

    public int getMaxBufferedDocs()
    {
        return maxBufferedDocs;
    }

    public void setMaxBufferedDocs(int maxBufferedDocs)
    {
        this.maxBufferedDocs = maxBufferedDocs;
    }

    public int getMergeFactor()
    {
        return mergeFactor;
    }

    public void setMergeFactor(int mergeFactor)
    {
        this.mergeFactor = mergeFactor;
    }

    public boolean isOptimize()
    {
        return optimize;
    }

    public void setOptimize(boolean optimize)
    {
        this.optimize = optimize;
    }

    public QueryData[] getQueries()
    {
        return queries;
    }

    public void setQueries(QueryData[] queries)
    {
        this.queries = queries;
    }

    public Vector<TestRunData> getRunData()
    {
        return runData;
    }

    public void setRunData(Vector<TestRunData> runData)
    {
        this.runData = runData;
    }

    public File getSource()
    {
        return source;
    }

    public void setSource(File source)
    {
        this.source = source;
    }
}