/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.byTask.feeds; import java.io.Closeable; import java.io.IOException; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Format; /** * Base class for source of data for benchmarking * <p> * Keeps track of various statistics, such as how many data items were generated, * size in bytes etc. * <p> * Supports the following configuration parameters: * <ul> * <li><b>content.source.forever</b> - specifies whether to generate items * forever (<b>default=true</b>). * <li><b>content.source.verbose</b> - specifies whether messages should be * output by the content source (<b>default=false</b>). * <li><b>content.source.encoding</b> - specifies which encoding to use when * reading the files of that content source. Certain implementations may define * a default value if this parameter is not specified. (<b>default=null</b>). * <li><b>content.source.log.step</b> - specifies for how many items a * message should be logged. If set to 0 it means no logging should occur. * <b>NOTE:</b> if verbose is set to false, logging should not occur even if * logStep is not 0 (<b>default=0</b>). * </ul> */ public abstract class ContentItemsSource implements Closeable { private long bytesCount; private long totalBytesCount; private int itemCount; private int totalItemCount; private Config config; private int lastPrintedNumUniqueTexts = 0; private long lastPrintedNumUniqueBytes = 0; private int printNum = 0; protected boolean forever; protected int logStep; protected boolean verbose; protected String encoding; /** update count of bytes generated by this source */ protected final synchronized void addBytes(long numBytes) { bytesCount += numBytes; totalBytesCount += numBytes; } /** update count of items generated by this source */ protected final synchronized void addItem() { ++itemCount; ++totalItemCount; } /** * A convenience method for collecting all the files of a content source from * a given directory. The collected {@link Path} instances are stored in the * given <code>files</code>. */ protected final void collectFiles(Path dir, final ArrayList<Path> files) throws IOException { Files.walkFileTree(dir, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (Files.isReadable(file)) { files.add(file.toRealPath()); } return FileVisitResult.CONTINUE; } }); } /** * Returns true whether it's time to log a message (depending on verbose and * the number of items generated). */ protected final boolean shouldLog() { return verbose && logStep > 0 && itemCount % logStep == 0; } /** Called when reading from this content source is no longer required. */ @Override public abstract void close() throws IOException; /** Returns the number of bytes generated since last reset. */ public final long getBytesCount() { return bytesCount; } /** Returns the number of generated items since last reset. */ public final int getItemsCount() { return itemCount; } public final Config getConfig() { return config; } /** Returns the total number of bytes that were generated by this source. */ public final long getTotalBytesCount() { return totalBytesCount; } /** Returns the total number of generated items. */ public final int getTotalItemsCount() { return totalItemCount; } /** * Resets the input for this content source, so that the test would behave as * if it was just started, input-wise. * <p> * <b>NOTE:</b> the default implementation resets the number of bytes and * items generated since the last reset, so it's important to call * super.resetInputs in case you override this method. */ public void resetInputs() throws IOException { bytesCount = 0; itemCount = 0; } /** * Sets the {@link Config} for this content source. If you override this * method, you must call super.setConfig. */ public void setConfig(Config config) { this.config = config; forever = config.get("content.source.forever", true); logStep = config.get("content.source.log.step", 0); verbose = config.get("content.source.verbose", false); encoding = config.get("content.source.encoding", null); } public void printStatistics(String itemsName) { if (!verbose) { return; } boolean print = false; String col = " "; StringBuilder sb = new StringBuilder(); String newline = System.getProperty("line.separator"); sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); int nut = getTotalItemsCount(); if (nut > lastPrintedNumUniqueTexts) { print = true; sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline); lastPrintedNumUniqueTexts = nut; } long nub = getTotalBytesCount(); if (nub > lastPrintedNumUniqueBytes) { print = true; sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline); lastPrintedNumUniqueBytes = nub; } if (getItemsCount() > 0) { print = true; sb.append("num "+itemsName+" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline); sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); } if (print) { System.out.println(sb.append(newline).toString()); printNum++; } } }