/* * Copyright 1998-2015 University Corporation for Atmospheric Research/Unidata * * Portions of this software were developed by the Unidata Program at the * University Corporation for Atmospheric Research. * * Access and use of this software shall impose the following obligations * and understandings on the user. The user is granted the right, without * any fee or cost, to use, copy, modify, alter, enhance and distribute * this software, and any derivative works thereof, and its supporting * documentation for any purpose whatsoever, provided that this entire * notice appears in all copies of the software, derivative works and * supporting documentation. Further, UCAR requests that the user credit * UCAR/Unidata in any publications that result from the use of this * software or in any product that includes this software. The names UCAR * and/or Unidata, however, may not be used in any advertising or publicity * to endorse or promote any products or commercial entity unless specific * written permission is obtained from UCAR/Unidata. The user also * understands that UCAR/Unidata is not obligated to provide the user with * any support, consulting, training or assistance of any kind with regard * to the use, operation and performance of this software nor to provide * the user with any updates, revisions, new versions or "bug fixes." * * THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL, * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION * WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE. */ package thredds.client.catalog.writer; import com.google.common.base.MoreObjects; import thredds.client.catalog.Catalog; import thredds.client.catalog.CatalogRef; import thredds.client.catalog.Dataset; import thredds.client.catalog.builder.CatalogBuilder; import ucar.nc2.util.CancelTask; import ucar.nc2.util.Indent; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import java.util.Random; /** * Crawl client catalogs * * @author caron * @since 1/11/2015 */ public class CatalogCrawler { public enum Type { all, // return all datasets all_direct, // return all direct datasets, ie that have an access URL first_direct, // return first dataset in each collection of direct datasets random_direct, // return one random dataset in each collection of direct datasets random_direct_middle, // return one random dataset in each collection of direct datasets random_direct_max // return max random datasets in entire catalog } private Filter filter = null; private int max = -1; private Type type = Type.all; private Listener listen; private Random random; private int countCatrefs = 0; private int numReadFailures = 0; /** * Constructor. * * @param type CatalogCrawler.Type * @param max return max (random_direct_max only) * @param filter dont process this dataset or its descendants. may be null * @param listen each dataset gets passed to the listener */ public CatalogCrawler(Type type, int max, Filter filter, Listener listen) { this.type = type; this.max = max; this.filter = filter; this.listen = listen; if (type == Type.random_direct || type == Type.random_direct_middle || type == Type.random_direct_max) this.random = new Random(System.currentTimeMillis()); } /** * Open a catalog and crawl (depth first) all the datasets in it. * Close catalogs and release their resources as you. * * @param catUrl url of catalog to open * @param task user can cancel the task (may be null) * @param out send status messages to here (may be null) * @param context caller can pass this object in (used for thread safety) * @return number of catalogs (this + catrefs) opened and crawled */ public int crawl(String catUrl, CancelTask task, PrintWriter out, Object context) throws IOException { CatalogBuilder catFactory = new CatalogBuilder(); Catalog cat = catFactory.buildFromLocation(catUrl); boolean isValid = !catFactory.hasFatalError(); if (out != null) { out.println("catalog <" + cat.getName() + "> " + (isValid ? "is" : "is not") + " valid"); out.println(" validation output=\n" + catFactory.getErrorMessage()); } if (out != null) out.println("***CATALOG " + cat.getBaseURI()); if (isValid) return crawl(cat, task, out, context, new Indent(2)); return 0; } /** * Crawl a catalog thats already been opened. * When you get to a dataset containing leaf datasets, do all, only the first, or a randomly chosen one. * * @param cat the catalog * @param task user can cancel the task (may be null) * @param out send status messages to here (may be null) * @param context caller can pass this object in (used for thread safety) * @return number of catalog references opened and crawled */ public int crawl(Catalog cat, CancelTask task, PrintWriter out, Object context, Indent indent) throws IOException { for (Dataset ds : cat.getDatasets()) { crawlDataset(ds, true, task, out, context, indent); if ((task != null) && task.isCancel()) break; } return 1 + countCatrefs; } /** * Crawl this dataset recursively. * * @param ds the dataset * @param isTop is the top dataset * @param task user can cancel the task (may be null) * @param out send status messages to here (may be null) * @param context caller can pass this object in (used for thread safety) * @param indent print indentation */ private void crawlDataset(Dataset ds, boolean isTop, CancelTask task, PrintWriter out, Object context, Indent indent) throws IOException { if (filter != null && filter.skipAll(ds)) return; if (ds instanceof CatalogRef) { CatalogRef catref = (CatalogRef) ds; if (out != null) out.printf("%s**CATREF %s (%s)%n", indent, catref.getURI(), ds.getName()); countCatrefs++; if (!listen.getCatalogRef(catref, context)) return; Catalog cat = readCatref(catref, out, indent); if (cat == null) { numReadFailures++; return; } crawl(cat, task, out, context, indent.incr()); indent.decr(); return; } if (isTop) { if (type == Type.all || ds.hasAccess()) listen.getDataset(ds, context); } if (type == Type.all) { for (Dataset dds : ds.getDatasets()) { listen.getDataset(dds, context); crawlDataset(dds, false, task, out, context, indent.incr()); indent.decr(); if ((task != null) && task.isCancel()) break; } } else { // get datasets with data access ("leaves") List<Dataset> dlist = ds.getDatasets(); List<Dataset> leaves = new ArrayList<>(); for (Dataset dds : dlist) { if (dds.hasAccess()) leaves.add(dds); } if (leaves.size() > 0) { if (type == Type.first_direct) { Dataset dds = leaves.get(0); listen.getDataset(dds, context); } else if (type == Type.random_direct) { listen.getDataset(chooseRandom(leaves), context); } else if (type == Type.random_direct_middle) { listen.getDataset(chooseRandomNotFirstOrLast(leaves), context); } else { // do all of them for (Dataset dds : leaves) { listen.getDataset(dds, context); if ((task != null) && task.isCancel()) break; } } } } // recurse for (Dataset dds : ds.getDatasets()) { if (dds.hasNestedDatasets() || (dds instanceof CatalogRef)) { crawlDataset(dds, false, task, out, context, indent.incr()); indent.decr(); } if ((task != null) && task.isCancel()) break; } } private Catalog readCatref(CatalogRef catref, PrintWriter out, Indent indent) { CatalogBuilder builder = new CatalogBuilder(); try { Catalog cat = builder.buildFromCatref(catref); if (builder.hasFatalError() || cat == null) { if (out != null) out.printf("%sError reading catref %s err=%s%n", indent, catref.getName(), builder.getErrorMessage()); return null; } return cat; } catch (IOException e) { if (out != null) out.printf("%sError reading catref %s err=%s%n", indent,catref.getName(), e.getMessage()); } return null; } private Dataset chooseRandom(List datasets) { int index = random.nextInt(datasets.size()); return (Dataset) datasets.get(index); } private Dataset chooseRandomNotFirstOrLast(List datasets) { int index = random.nextInt(datasets.size()); if (index == 0 && datasets.size() > 1) index++; else if (index == datasets.size() - 1 && datasets.size() > 1) index--; return (Dataset) datasets.get(index); } @Override public String toString() { return MoreObjects.toStringHelper(this) .add("filter", filter) .add("max", max) .add("type", type) .add("listen", listen) .add("random", random) .add("countCatrefs", countCatrefs) .toString(); } public int getNumReadFailures() { return numReadFailures; } ////////////////////////////////////////////////////////////////////////////// static public interface Listener { /** * Gets called for each dataset found. * * @param dd the dataset * @param context caller can pass this object in (used for thread safety) */ public void getDataset(Dataset dd, Object context); /** * Gets called for each catalogRef found * * @param dd the dataset * @param context caller can pass this object in (used for thread safety) * @return true to process, false to skip */ public boolean getCatalogRef(CatalogRef dd, Object context); } static public interface Filter { public boolean skipAll(Dataset ds); } private static class FilterDatasetScan implements Filter { boolean skipDatasetScan; private FilterDatasetScan(boolean skipDatasetScan) { this.skipDatasetScan = skipDatasetScan; } @Override public boolean skipAll(Dataset ds) { return skipDatasetScan && (ds instanceof CatalogRef) && (ds.findProperty("DatasetScan") != null); } } }