package org.dcache.services.billing.text;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeTraverser;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
import com.google.common.io.ByteSource;
import com.google.common.io.CharSource;
import com.google.common.io.CharStreams;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import com.google.gson.stream.JsonWriter;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.bridge.SLF4JBridgeHandler;
import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PipedReader;
import java.io.PipedWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.MonthDay;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.time.format.FormatStyle;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.logging.LogManager;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dcache.boot.LayoutBuilder;
import org.dcache.util.Args;
import org.dcache.util.ConfigurationProperties;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.io.Files.isFile;
import static java.util.Arrays.asList;
import static java.util.stream.Collectors.toList;
public class Indexer
{
private static final Logger LOGGER = LoggerFactory.getLogger(Indexer.class);
private static final Pattern BILLING_NAME_PATTERN =
Pattern.compile("^billing-(\\d\\d\\d\\d.\\d\\d.\\d\\d)(\\.bz2)?$");
private static final String BILLING_TEXT_FLAT_DIR = "billing.text.flat-dir";
private static final String BILLING_TEXT_DIR = "billing.text.dir";
private static final String BILLING_TEXT_FORMAT_PREFIX = "billing.parser.format!";
private static final String BZ2 = "bz2";
private static final int PIPE_SIZE = 2048;
private static final DateTimeFormatter CLI_DATE_FORMAT =
DateTimeFormatter.ofLocalizedDate(FormatStyle.MEDIUM);
private static final DateTimeFormatter DEFAULT_DATE_FORMAT =
DateTimeFormatter.ofPattern("MM.dd");
private static final DateTimeFormatter DEFAULT_TIME_FORMAT =
DateTimeFormatter.ofPattern("HH:mm:ss");
private static final DateTimeFormatter FILE_DATE_FORMAT =
DateTimeFormatter.ofPattern("uuuu.MM.dd");
private static final DateTimeFormatter DIRECTORY_DATE_FORMAT =
DateTimeFormatter.ofPattern("uuuu" + File.separator + "MM");
private static final DateTimeFormatter ISO8601_FORMAT =
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssX");
/**
* Almost identical to the file tree traverser from Guava, sorts directory entries
* lexicographically.
*/
private static final TreeTraverser<File> SORTED_FILE_TREE_TRAVERSER = new TreeTraverser<File>() {
@Override
public Iterable<File> children(File file) {
// check isDirectory() just because it may be faster than listFiles() on a non-directory
if (file.isDirectory()) {
File[] files = file.listFiles();
if (files != null) {
return Ordering.natural().sortedCopy(asList(files));
}
}
return Collections.emptyList();
}
};
private final boolean isFlat;
private final File dir;
private final ImmutableMap<String, String> formats;
private Indexer(Args args) throws IOException, URISyntaxException, ClassNotFoundException
{
double fpp = args.getDoubleOption("fpp", 0.01);
ConfigurationProperties configuration = new LayoutBuilder().build().properties();
isFlat = Boolean.valueOf(args.getOption("flat", configuration.getValue(BILLING_TEXT_FLAT_DIR)));
dir = new File(args.getOption("dir", configuration.getValue(BILLING_TEXT_DIR)));
formats = getBillingFormats(configuration);
if (args.hasOption("find")) {
Collection<String> searchTerms;
if (args.hasOption("f")) {
searchTerms = Files.readLines(new File(args.getOption("f")), Charsets.UTF_8);
} else if (args.argc() > 0) {
searchTerms = args.getArguments();
} else {
searchTerms = ImmutableList.of("");
}
FluentIterable<File> filesWithPossibleMatch =
SORTED_FILE_TREE_TRAVERSER
.preOrderTraversal(dir);
if (args.hasOption("since") || args.hasOption("until")) {
LocalDate since = args.hasOption("since")
? LocalDate.parse(args.getOption("since"), CLI_DATE_FORMAT) : LocalDate.ofEpochDay(0);
LocalDate until = args.hasOption("until")
? LocalDate.parse(args.getOption("until"), CLI_DATE_FORMAT) : LocalDate.now().plusDays(1);
filesWithPossibleMatch =
filesWithPossibleMatch.filter(file -> isInRange(file, since, until));
}
if (searchTerms.contains("")) {
filesWithPossibleMatch =
filesWithPossibleMatch.filter(file -> isBillingFile(file));
} else {
filesWithPossibleMatch =
filesWithPossibleMatch.filter(isBillingFileAndMightContain(searchTerms));
}
if (args.hasOption("files")) {
for (File file : filesWithPossibleMatch) {
System.out.println(file);
}
} else if (args.hasOption("yaml")) {
try (OutputWriter out = toYaml(System.out)) {
find(searchTerms, filesWithPossibleMatch, out);
}
} else if (args.hasOption("json")) {
try (OutputWriter out = toJson(System.out)) {
find(searchTerms, filesWithPossibleMatch, out);
}
} else {
try (OutputWriter out = toText(System.out)) {
find(searchTerms, filesWithPossibleMatch, out);
}
}
} else if (args.hasOption("all")) {
for (File file : SORTED_FILE_TREE_TRAVERSER.preOrderTraversal(dir).filter(isFile())) {
Matcher matcher = BILLING_NAME_PATTERN.matcher(file.getName());
if (matcher.matches()) {
System.out.println("Indexing " + file);
index(fpp, file, getIndexFile(file.getParentFile(), matcher.group(1)));
}
}
} else if (args.hasOption("yesterday")) {
LocalDate yesterday = LocalDate.now().minusDays(1);
File billingFile = getBillingFile(yesterday);
File errorFile = getErrorFile(yesterday);
File indexFile = getIndexFile(yesterday);
if (billingFile.exists()) {
index(fpp, billingFile, indexFile);
if (args.hasOption("compress")) {
compress(billingFile);
}
}
if (errorFile.exists() && args.hasOption("compress")) {
compress(errorFile);
}
} else if (args.hasOption("index")) {
for (String name : args.getArguments()) {
File file = new File(name);
Matcher matcher = BILLING_NAME_PATTERN.matcher(file.getName());
if (!matcher.matches()) {
throw new IllegalArgumentException("File name does not follow the format of billing files: " + name);
}
index(fpp, file, getIndexFile(file.getParentFile(), matcher.group(1)));
}
} else if (args.hasOption("compress")) {
for (String name : args.getArguments()) {
compress(new File(name));
}
} else if (args.hasOption("decompress")) {
for (String name : args.getArguments()) {
decompress(new File(name));
}
} else if (args.hasOption("help")) {
help(System.err);
} else {
throw new IllegalArgumentException("Invalid arguments.");
}
}
private OutputWriter toText(final PrintStream out)
{
return new OutputWriter()
{
@Override
public void write(LocalDate date, String line) throws IOException
{
if (!line.isEmpty() && line.charAt(0) != '#') {
// Prepend year if the default timestamp format is used
try {
int year = date.getYear();
parseDefaultTimestamp(year, line);
out.append(String.valueOf(year)).append('.');
} catch (DateTimeParseException ignore) {
}
out.println(line);
}
}
@Override
public void close()
{
}
};
}
private OutputWriter toJson(final PrintStream out) throws IOException, URISyntaxException
{
return new OutputWriter()
{
final JsonWriter writer = new JsonWriter(new OutputStreamWriter(out));
final BillingParserBuilder builder =
new BillingParserBuilder(formats).addAllAttributes();
Function<String, Map<String, String>> parser = builder.buildToMap();
{
writer.setIndent(" ");
writer.beginArray();
}
@Override
public void write(LocalDate date, String line) throws IOException
{
if (line.startsWith("##")) {
parser = builder.withFormat(line).addAllAttributes().buildToMap();
} else {
Map<String, String> attributes = parser.apply(line);
if (!attributes.isEmpty()) {
fixDate(date.getYear(), attributes);
writer.beginObject();
for (Map.Entry<String, String> entry : attributes.entrySet()) {
writer.name(entry.getKey()).value(entry.getValue());
}
writer.endObject();
}
}
}
@Override
public void close() throws IOException
{
writer.endArray();
writer.flush();
out.println();
}
};
}
private OutputWriter toYaml(final PrintStream out) throws IOException, URISyntaxException
{
return new OutputWriter()
{
final BillingParserBuilder builder =
new BillingParserBuilder(formats).addAllAttributes();
Function<String, Map<String, String>> parser = builder.buildToMap();
@Override
public void write(LocalDate date, String line) throws IOException
{
if (line.startsWith("##")) {
parser = builder.withFormat(line).addAllAttributes().buildToMap();
} else {
Map<String, String> attributes = parser.apply(line);
if (attributes.isEmpty()) {
out.append("# Unknown: ").println(line);
} else {
fixDate(date.getYear(), attributes);
out.append("# ").println(line);
String format = "- %-21s %s\n";
for (Map.Entry<String, String> entry : attributes.entrySet()) {
out.printf(format, entry.getKey() + ':', entry.getValue());
format = " %-21s %s\n";
}
}
}
}
@Override
public void close()
{
}
};
}
/**
* Searches for searchTerm in files and writes any matching lines to out.
*/
private static void find(final Collection<String> searchTerms, FluentIterable<File> files, final OutputWriter out)
throws IOException
{
int threads = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(threads);
try {
List<Map.Entry<LocalDate,Reader>> readers = new ArrayList<>();
for (File file : files) {
Matcher matcher = BILLING_NAME_PATTERN.matcher(file.getName());
if (matcher.matches()) {
LocalDate date = LocalDate.parse(matcher.group(1), FILE_DATE_FORMAT);
PipedReader reader = new PipedReader(PIPE_SIZE);
PipedWriter writer = new PipedWriter(reader);
executor.submit(() -> {
try {
grep(searchTerms, file, new PrintWriter(writer));
} finally {
writer.close();
}
return null;
});
readers.add(Maps.immutableEntry(date, reader));
}
}
for (final Map.Entry<LocalDate, Reader> entry : readers) {
CharStreams.readLines(entry.getValue(), new LineProcessor<Void>()
{
@Override
public boolean processLine(String line) throws IOException
{
out.write(entry.getKey(), line);
return true;
}
@Override
public Void getResult()
{
return null;
}
});
}
} finally {
executor.shutdown();
}
}
private static void grep(final Collection<String> searchTerms, File file, PrintWriter out)
throws IOException
{
asCharSource(file, Charsets.UTF_8).readLines(new LineProcessor<Void>()
{
@Override
public boolean processLine(String line) throws IOException
{
if (!line.isEmpty() && line.charAt(0) != '#') {
for (String term : searchTerms) {
if (line.contains(term)) {
out.println(line);
break;
}
}
} else if (line.startsWith("##")) {
out.println(line);
}
return true;
}
@Override
public Void getResult()
{
return null;
}
});
}
private void index(double fpp, File billingFile, File indexFile) throws IOException
{
int threads = Runtime.getRuntime().availableProcessors();
Set<String> index = produceIndex(billingFile, threads);
BloomFilter<CharSequence> filter = produceBloomFilter(fpp, index);
writeToFile(indexFile, filter);
}
private static void decompress(File compressedFile) throws IOException
{
String path = compressedFile.getPath();
checkArgument(Files.getFileExtension(path).equals(BZ2), "File must have " + BZ2 + " extension.");
File file = new File(compressedFile.getParent(), Files.getNameWithoutExtension(path));
try (InputStream in = new BZip2CompressorInputStream(new BufferedInputStream(new FileInputStream(file)))) {
Files.asByteSink(file).writeFrom(in);
}
java.nio.file.Files.delete(compressedFile.toPath());
}
private static void compress(File file) throws IOException
{
File compressedFile = new File(file.getPath() + "." + BZ2);
try (OutputStream out = new BZip2CompressorOutputStream(Files.asByteSink(compressedFile).openBufferedStream())) {
Files.asByteSource(file).copyTo(out);
}
java.nio.file.Files.delete(file.toPath());
}
private static void help(PrintStream out)
{
out.println("COMMANDS:");
out.println(" -all [-fpp=PROP] [-dir=BASE]");
out.println(" (Re)index all billing files.");
out.println(" -compress FILE...");
out.println(" Compress FILE.");
out.println(" -decompress FILE...");
out.println(" Decompress FILE.");
out.println(" -find [-files|-json|-yaml] [-dir=BASE] [-since=DATE] [-until=DATE] [-f=FILE] [SEARCHTERM]...");
out.println(" Output billing entries that contain SEARCHTERM. Valid search terms are");
out.println(" path, pnfsid, dn and path prefixes of those. Optionally output names");
out.println(" of billing files that might contain the search term. If no search term");
out.println(" is provided, all entries are output.");
out.println(" -index [-fpp=PROP] FILE...");
out.println(" Create index for FILE.");
out.println(" -yesterday [-compress] [-fpp=PROP] [-dir=BASE] [-flat=BOOL]");
out.println(" Index yesterday's billing file. Optionally compresses the billing file");
out.println(" after indexing it.");
out.println("");
out.println("OPTIONS:");
out.println(" -dir=BASE");
out.println(" Base directory for billing files. Default is taken from dCache");
out.println(" configuration.");
out.println(" -flat=BOOLEAN");
out.println(" Chooses between flat or hierarchical directory layout. Default is");
out.println(" taken from dCache configuration.");
out.println(" -fpp=PROP");
out.println(" The false positive probability expressed as a value in (0;1]. The");
out.println(" default is 0.01.");
}
private static LocalDateTime parseDefaultTimestamp(int year, String s)
throws DateTimeParseException
{
MonthDay monthDay = MonthDay.parse(s.substring(0, 5), DEFAULT_DATE_FORMAT);
LocalTime time = LocalTime.parse(s.substring(6, 14), DEFAULT_TIME_FORMAT);
return monthDay.atYear(year).atTime(time);
}
/**
* Completes the date field of billing entries by adding a year to it.
*/
private static void fixDate(int year, Map<String,String> attributes)
{
String s = attributes.get("date");
if (s != null) {
try {
LocalDateTime timestamp = parseDefaultTimestamp(year, s);
attributes.put("date", ISO8601_FORMAT.format(timestamp.atZone(ZoneId.systemDefault())));
} catch (DateTimeParseException ignore) {
}
}
}
private File getDirectory(LocalDate date)
{
return isFlat ? dir : new File(this.dir, DIRECTORY_DATE_FORMAT.format(date));
}
private File getBillingFile(LocalDate date)
{
return new File(getDirectory(date), "billing-" + FILE_DATE_FORMAT.format(date));
}
private File getErrorFile(LocalDate date)
{
return new File(getDirectory(date), "billing-error-" + FILE_DATE_FORMAT.format(date));
}
private File getIndexFile(LocalDate date)
{
return getIndexFile(getDirectory(date), FILE_DATE_FORMAT.format(date));
}
private static File getIndexFile(File dir, String date)
{
return new File(dir, "index-" + date);
}
private Set<String> produceIndex(final File file, int threads)
throws IOException
{
try {
IndexProcessor processor = new IndexProcessor(formats);
Set<String> index;
try (ParallelizingLineProcessor<Set<String>> parallelizer = new ParallelizingLineProcessor<>(threads, processor)) {
index = asCharSource(file, Charsets.UTF_8).readLines(parallelizer);
}
return index;
} catch (IOException e) {
throw new IOException("I/O failure while reading " + file + ":" + e.getMessage(), e);
} catch (URISyntaxException e) {
throw new IOException("Invalid dCache configuration: " + e.getMessage(), e);
}
}
private static CharSource asCharSource(final File file, Charset charset)
{
ByteSource source;
if (Files.getFileExtension(file.getPath()).equals(BZ2)) {
source = new ByteSource() {
@Override
public InputStream openStream() throws IOException
{
return new BZip2CompressorInputStream(new BufferedInputStream(new FileInputStream(file)));
}
};
} else {
source = Files.asByteSource(file);
}
return source.asCharSource(charset);
}
private static BloomFilter<CharSequence> produceBloomFilter(double fpp, Set<String> index)
{
BloomFilter<CharSequence> filter =
BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), index.size(), fpp);
index.forEach(filter::put);
return filter;
}
private static void writeToFile(File outFile, Object object)
throws IOException
{
try (ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(outFile))) {
out.writeObject(object);
}
}
private static Object readFromFile(File outFile)
throws IOException, ClassNotFoundException
{
try (ObjectInputStream in = new ObjectInputStream(new FileInputStream(outFile))) {
return in.readObject();
}
}
private static boolean isBillingFile(File file)
{
if (!file.isFile()) {
return false;
}
Matcher matcher = BILLING_NAME_PATTERN.matcher(file.getName());
return matcher.matches();
}
/**
* Returns all billing format strings from configuration.
*/
private static ImmutableMap<String,String> getBillingFormats(ConfigurationProperties configuration)
{
ImmutableMap.Builder<String,String> formats = ImmutableMap.builder();
for (String name : configuration.stringPropertyNames()) {
if (name.startsWith(BILLING_TEXT_FORMAT_PREFIX)) {
formats.put(name.substring(BILLING_TEXT_FORMAT_PREFIX.length()), configuration.getValue(name));
}
}
return formats.build();
}
private static Predicate<File> isBillingFileAndMightContain(Collection<String> terms)
{
final List<String> searchTerms = terms.stream()
.map(str -> str.endsWith("/") ? str.substring(0, str.length() - 1) : str)
.collect(toList());
return new Predicate<File>()
{
@Override
public boolean apply(File file)
{
if (!file.isFile()) {
return false;
}
try {
Matcher matcher = BILLING_NAME_PATTERN.matcher(file.getName());
return matcher.matches() && mightContain(getIndexFile(file.getParentFile(), matcher.group(1)));
} catch (ClassNotFoundException | IOException e) {
throw new RuntimeException("Failed to read index", e);
}
}
@SuppressWarnings("unchecked")
private boolean mightContain(File index)
throws IOException, ClassNotFoundException
{
if (!index.exists()) {
return true;
}
BloomFilter<CharSequence> filter = (BloomFilter<CharSequence>) readFromFile(index);
for (String term : searchTerms) {
if (filter.mightContain(term)) {
return true;
}
}
return false;
}
};
}
private static boolean isInRange(File file, LocalDate since, LocalDate until)
{
Matcher matcher = BILLING_NAME_PATTERN.matcher(file.getName());
if (matcher.matches()) {
LocalDate date = LocalDate.parse(matcher.group(1), FILE_DATE_FORMAT);
if ((date.isEqual(since) || date.isAfter(since)) && date.isBefore(until)) {
return true;
}
}
return false;
}
/**
* Billing file line processor that collects strings to index.
*/
private static class IndexProcessor implements LineProcessor<Set<String>>
{
private final Set<String> result = Sets.newConcurrentHashSet();
private final BillingParserBuilder builder;
private Function<String, String[]> parser;
private IndexProcessor(ImmutableMap<String, String> formats)
throws IOException, URISyntaxException
{
builder = new BillingParserBuilder(formats)
.addAttribute("path")
.addAttribute("pnfsid")
.addAttribute("owner");
parser = builder.buildToArray();
}
@Override
public boolean processLine(String line) throws IOException
{
if (!line.isEmpty() && line.charAt(0) != '#') {
String[] value = parser.apply(line);
if (!Strings.isNullOrEmpty(value[0])) {
addAllPathPrefixes(value[0], result);
}
if (!Strings.isNullOrEmpty(value[1])) {
result.add(value[1]);
}
if (!Strings.isNullOrEmpty(value[2])) {
addAllPathPrefixes(value[2], result);
}
} else if (line.startsWith("##")) {
parser = builder.withFormat(line).buildToArray();
}
return true;
}
@Override
public Set<String> getResult()
{
return result;
}
private static void addAllPathPrefixes(String path, Set<String> paths)
{
int next;
while (paths.add(path) && (next = path.lastIndexOf('/')) > 0) {
path = path.substring(0, next);
}
}
}
public static void main(String[] arguments)
throws URISyntaxException, ExecutionException, InterruptedException,
ClassNotFoundException
{
LogManager.getLogManager().reset();
SLF4JBridgeHandler.install();
Thread.setDefaultUncaughtExceptionHandler((t, e) -> LOGGER.error("Uncaught exception", e));
try {
new Indexer(new Args(arguments));
} catch (IllegalArgumentException e) {
System.err.println(e.getMessage());
System.err.println();
help(System.err);
System.exit(1);
} catch (DateTimeParseException e) {
System.err.println(e.getMessage());
System.err.println();
System.exit(1);
} catch (IOException | URISyntaxException | ClassNotFoundException e) {
System.err.println(e);
System.exit(2);
}
}
private interface OutputWriter extends Closeable
{
void write(LocalDate date, String line) throws IOException;
}
}