/* * Copyright (c) 1998 - 2014. University Corporation for Atmospheric Research/Unidata * Portions of this software were developed by the Unidata Program at the * University Corporation for Atmospheric Research. * * Access and use of this software shall impose the following obligations * and understandings on the user. The user is granted the right, without * any fee or cost, to use, copy, modify, alter, enhance and distribute * this software, and any derivative works thereof, and its supporting * documentation for any purpose whatsoever, provided that this entire * notice appears in all copies of the software, derivative works and * supporting documentation. Further, UCAR requests that the user credit * UCAR/Unidata in any publications that result from the use of this * software or in any product that includes this software. The names UCAR * and/or Unidata, however, may not be used in any advertising or publicity * to endorse or promote any products or commercial entity unless specific * written permission is obtained from UCAR/Unidata. The user also * understands that UCAR/Unidata is not obligated to provide the user with * any support, consulting, training or assistance of any kind with regard * to the use, operation and performance of this software nor to provide * the user with any updates, revisions, new versions or "bug fixes." * * THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL, * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION * WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE. */ package thredds.server.catalog; import net.jcip.annotations.Immutable; import thredds.client.catalog.*; import thredds.client.catalog.builder.AccessBuilder; import thredds.client.catalog.builder.CatalogBuilder; import thredds.client.catalog.builder.CatalogRefBuilder; import thredds.client.catalog.builder.DatasetBuilder; import thredds.filesystem.MFileOS7; import thredds.inventory.MFile; import thredds.inventory.MFileFilter; import thredds.inventory.filter.*; import ucar.nc2.time.CalendarDate; import ucar.nc2.units.DateRange; import ucar.nc2.units.DateType; import ucar.nc2.units.TimeDuration; import ucar.nc2.util.CloseableIterator; import ucar.unidata.util.StringUtil2; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.util.*; /** * DatasetScan * * @author John * @since 1/12/2015 */ @Immutable public class DatasetScan extends CatalogRef { static private org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(DatasetScan.class); //////////////////////////////////////////////////////////////////////////////////////////////////////////////// private final DatasetScanConfig config; private final AddTimeCoverageEnhancer addTimeCoverage; private final List<RegExpNamer> namers; private final CompositeMFileFilter fileFilters; private final CompositeMFileFilter dirFilters; public DatasetScan(DatasetNode parent, String name, String xlink, Map<String, Object> flds, List<AccessBuilder> accessBuilders, List<DatasetBuilder> datasetBuilders, DatasetScanConfig config) { super(parent, name, xlink, flds, accessBuilders, datasetBuilders); this.config = config; addTimeCoverage = (config.addTimeCoverage != null) ? new AddTimeCoverageEnhancer(config.addTimeCoverage) : null; // namers if (config.namers != null && config.namers.size() > 0) { namers = new ArrayList<>(); for (DatasetScanConfig.Namer cname : config.namers) namers.add(new RegExpNamer(cname)); } else { namers = null; } // filters if (config.filters != null && config.filters.size() > 0) { fileFilters = new CompositeMFileFilter(); dirFilters = new CompositeMFileFilter(); for (DatasetScanConfig.Filter cfilter : config.filters) { makeFilter(cfilter); } } else { fileFilters = null; dirFilters = null; } } private void makeFilter(DatasetScanConfig.Filter cfilter) { MFileFilter filter; if (cfilter.wildcardAttVal != null) { filter = new WildcardMatchOnName(cfilter.wildcardAttVal); // always on name, not path } else if (cfilter.regExpAttVal != null) { filter = new RegExpMatchOnName(cfilter.regExpAttVal); } else if (cfilter.lastModLimitAttVal > 0) { filter = new LastModifiedLimit(cfilter.lastModLimitAttVal); } else { return; } if (!cfilter.includer) // excluder filter = new FilterNegate(filter); if (cfilter.collection) dirFilters.addFilter(filter); if (cfilter.atomic) fileFilters.addFilter(filter); } public String getPath() { return config.path; } public String getScanLocation() { return config.scanDir; } DatasetScanConfig getConfig() { return config; } ///////////////////////////////////////////////////////// /** * Called from DataRootHandler.makeDynamicCatalog(), called from LocalCatalogServiceController ... * <p/> * Build a catalog for the given path by scanning the location * associated with this DatasetScan. The given path must start with the path of this DatasetScan. * * @param orgPath the part of the baseURI that is the path * @param catURI the base URL for the catalog, used to resolve relative URLs. * @return the catalog for this path or null if build unsuccessful. */ public Catalog makeCatalogForDirectory(String orgPath, URI catURI) throws IOException { // Get the dataset location. String dataDirReletive = translatePathToLocation(orgPath); if (dataDirReletive == null) { String tmpMsg = "makeCatalogForDirectory(): Requesting path <" + orgPath + "> must start with \"" + config.path + "\"."; log.error(tmpMsg); return null; } String parentPath = (dataDirReletive.length() > 1) ? config.path + "/" + dataDirReletive : config.path + "/"; String parentId = (dataDirReletive.length() > 1) ? this.getId() + "/" + dataDirReletive : this.getId() + "/"; // translate any properties String scanDir = ConfigCatalog.translateAlias(config.scanDir); String dataDirComplete = (dataDirReletive.length() > 1) ? scanDir + "/" + dataDirReletive : scanDir; // Setup and create catalog builder. CatalogBuilder catBuilder = new CatalogBuilder(); catBuilder.setBaseURI(catURI); assert this.getParentCatalog() != null; for (Service s : this.getParentCatalog().getServices()) catBuilder.addService(s); DatasetBuilder top = new DatasetBuilder(null); String name = (dataDirReletive.length() > 1) ? dataDirReletive : getName(); top.transferMetadata(this, true); top.setName(name); top.put(Dataset.Id, parentId); catBuilder.addDataset(top); Path p = Paths.get(dataDirComplete); if (!Files.exists(p)) throw new FileNotFoundException("Directory does not exist =" + dataDirComplete); if (!Files.isDirectory(p)) throw new FileNotFoundException("Not a directory =" + dataDirComplete); // scan and sort the directory List<MFile> mfiles = getSortedFiles(p, config.isSortIncreasing); if (config.addLatest != null && config.addLatest.latestOnTop) top.addDataset(makeLatestProxy(top, parentId)); // create Datasets for (MFile mfile : mfiles) { DatasetBuilder ds; if (mfile.isDirectory()) { CatalogRefBuilder catref = new CatalogRefBuilder(top); catref.setTitle(makeName(mfile)); catref.setHref(mfile.getName() + "/catalog.xml"); top.addDataset(catref); ds = catref; } else { ds = new DatasetBuilder(top); ds.setName( makeName(mfile)); String urlPath = parentPath + mfile.getName(); ds.put(Dataset.UrlPath, urlPath); ds.put(Dataset.DataSize, mfile.getLength()); // <dataSize units="Kbytes">54.73</dataSize> CalendarDate date = CalendarDate.of(mfile.getLastModified()); ds.put(Dataset.Dates, new DateType(date).setType("modified")); // <date type="modified">2011-09-02T20:50:58.288Z</date> if (addTimeCoverage != null) addTimeCoverage.addMetadata(ds, mfile); top.addDataset(ds); } ds.put(Dataset.Id, parentId + mfile.getName()); } if (config.addLatest != null && !config.addLatest.latestOnTop) top.addDataset(makeLatestProxy(top, parentId)); // make the catalog return catBuilder.makeCatalog(); } private String translatePathToLocation(String dsPath) { if (dsPath == null) return null; if (dsPath.length() == 0) return null; if (dsPath.startsWith("/")) dsPath = dsPath.substring(1); if (!dsPath.startsWith(config.path)) return null; // remove the matching part, the rest is the "data directory" String dataDir = dsPath.substring(config.path.length()); if (dataDir.startsWith("/")) dataDir = dataDir.substring(1); if (!dataDir.endsWith("/")) dataDir = dataDir + "/"; return dataDir; } /////////////////////// // Scan and sort private List<MFile> getSortedFiles(Path p, final boolean isSortIncreasing) throws IOException { // scan the directory List<MFile> mfiles = new ArrayList<>(); try (MFileIterator iter = new MFileIterator(p)) { while (iter.hasNext()) mfiles.add(iter.next()); } // sort them Collections.sort(mfiles, new Comparator<MFile>() { public int compare(MFile o1, MFile o2) { if (o1.isDirectory() != o2.isDirectory()) return o1.isDirectory() ? 1 : -1; if (isSortIncreasing) return o1.getName().compareTo(o2.getName()); else return o2.getName().compareTo(o1.getName()); } }); return mfiles; } private class MFileIterator implements CloseableIterator<MFile> { DirectoryStream<Path> dirStream; Iterator<Path> dirStreamIterator; MFile nextMFile; long now; MFileIterator(Path p) throws IOException { dirStream = Files.newDirectoryStream(p); dirStreamIterator = dirStream.iterator(); now = System.currentTimeMillis(); } public boolean hasNext() { while (true) { if (!dirStreamIterator.hasNext()) { nextMFile = null; return false; } try { Path nextPath = dirStreamIterator.next(); BasicFileAttributes attr = Files.readAttributes(nextPath, BasicFileAttributes.class); nextMFile = new MFileOS7(nextPath, attr); if (accept(nextMFile)) return true; } catch (IOException e) { throw new RuntimeException(e); } } } private boolean accept(MFile mfile) { if (mfile.isDirectory()) return dirFilters == null || dirFilters.accept(mfile); return fileFilters == null || fileFilters.accept(mfile); } public MFile next() { if (nextMFile == null) throw new NoSuchElementException(); return nextMFile; } public void remove() { throw new UnsupportedOperationException(); } // better alternative is for caller to send in callback (Visitor pattern) // then we could use the try-with-resource public void close() throws IOException { dirStream.close(); } } //////////////////////////////////////////////// // Naming private String makeName(MFile mfile) { if (namers == null) return mfile.getName(); for (RegExpNamer namer : namers) { String result = namer.rename(mfile); if (result != null) return result; } return mfile.getName(); } private static class RegExpNamer { private java.util.regex.Pattern pattern; DatasetScanConfig.Namer namer; RegExpNamer(DatasetScanConfig.Namer namer) { this.pattern = java.util.regex.Pattern.compile(namer.regExp); this.namer = namer; } public String rename(MFile mfile) { String name = namer.onName ? mfile.getName() : mfile.getPath(); java.util.regex.Matcher matcher = this.pattern.matcher(name); if (!matcher.find()) return null; StringBuffer startTime = new StringBuffer(); matcher.appendReplacement(startTime, namer.replaceString); startTime.delete(0, matcher.start()); if (startTime.length() == 0) return null; return startTime.toString(); } } ////////////////////////////////////////////////////////// // add TimeCovergae private static class AddTimeCoverageEnhancer { private DatasetScanConfig.AddTimeCoverage atc; private boolean matchOnName; private String matchPattern; private java.util.regex.Pattern pattern; AddTimeCoverageEnhancer(DatasetScanConfig.AddTimeCoverage atc) { this.atc = atc; this.matchOnName = (atc.matchName != null); this.matchPattern = (atc.matchName != null) ? atc.matchName : atc.matchPath; try { this.pattern = java.util.regex.Pattern.compile(this.matchPattern); } catch (java.util.regex.PatternSyntaxException e) { log.error("ctor(): bad match pattern <" + this.matchPattern + ">, failed to compile: " + e.getMessage()); this.pattern = null; } } boolean addMetadata(DatasetBuilder dataset, MFile crDataset) { if (this.pattern == null) return false; String matchTargetString = (this.matchOnName) ? crDataset.getName() : crDataset.getPath(); java.util.regex.Matcher matcher = this.pattern.matcher(matchTargetString); if (!matcher.find()) { return (false); // Pattern not found. } StringBuffer startTime = new StringBuffer(); try { matcher.appendReplacement(startTime, atc.subst); } catch (IndexOutOfBoundsException e) { log.error("addMetadata(): capture group mismatch between match pattern <" + this.matchPattern + "> and substitution pattern <" + atc.subst + ">: " + e.getMessage()); return (false); } startTime.delete(0, matcher.start()); try { DateRange dateRange = new DateRange(new DateType(startTime.toString(), null, null), null, new TimeDuration(atc.duration), null); dataset.put(Dataset.TimeCoverage, dateRange); } catch (Exception e) { log.warn("addMetadata(): Start time <" + startTime.toString() + "> or duration <" + atc.duration + "> not parsable" + " (crDataset.getName() <" + crDataset.getName() + ">, this.matchPattern() <" + this.matchPattern + ">, this.substitutionPattern() <" + atc.subst + ">): " + e.getMessage()); return (false); } return (true); } } ////////////////// // Latest /* <dataset name="latest.xml" ID="testGridScan/latest.xml" urlPath="latest.xml"> <serviceName>latest</serviceName> </dataset> */ private DatasetBuilder makeLatestProxy(DatasetBuilder parent, String parentId) { DatasetBuilder proxy = new DatasetBuilder(parent); proxy.setName(config.addLatest.latestName); proxy.put(Dataset.UrlPath, config.addLatest.latestName); proxy.put(Dataset.Id, parentId + config.addLatest.latestName); proxy.put(Dataset.ServiceName, config.addLatest.latestServiceName); return proxy; } /** * Build a catalog for the given resolver path by scanning the * location associated with this InvDatasetScan. The given path must start * with the path of this DatasetScan and refer to a resolver * ProxyDatasetHandler that is part of this InvDatasetScan. * * @param orgPath the part of the baseURI that is the path * @param baseURI the base URL for the catalog, used to resolve relative URLs. * @return the resolver catalog for this path (uses version 1.1) or null if build unsuccessful. */ public Catalog makeLatestResolvedCatalog(String orgPath, URI baseURI) throws IOException { // Get the dataset path. String dataDirReletive = translatePathToLocation(orgPath); if (dataDirReletive == null) { String tmpMsg = "makeCatalogForDirectory(): Requesting path <" + orgPath + "> must start with \"" + config.path + "\"."; log.error(tmpMsg); return null; } String parentPath = (dataDirReletive.length() > 1) ? config.path + "/" + dataDirReletive : config.path + "/"; String parentId = (dataDirReletive.length() > 1) ? this.getId() + "/" + dataDirReletive : this.getId() + "/"; // translate any properties String scanDir = ConfigCatalog.translateAlias(config.scanDir); String dataDirComplete = (dataDirReletive.length() > 1) ? scanDir + "/" + dataDirReletive : scanDir; // Setup and create catalog builder. CatalogBuilder catBuilder = new CatalogBuilder(); catBuilder.setBaseURI(baseURI); for (Service s : this.getParentCatalog().getServices()) catBuilder.addService(s); Path p = Paths.get(dataDirComplete); if (!Files.exists(p)) throw new FileNotFoundException("Directory does not exist =" + dataDirComplete); if (!Files.isDirectory(p)) throw new FileNotFoundException("Not a directory =" + dataDirComplete); // scan and sort the directory List<MFile> mfiles = getSortedFiles(p, false); // latest on top long now = System.currentTimeMillis(); for (MFile mfile : mfiles) { if (mfile.isDirectory()) continue; if (config.addLatest.lastModLimit > 0) { if (now - mfile.getLastModified() < config.addLatest.lastModLimit) continue; } // this is the one we want DatasetBuilder ds = new DatasetBuilder(null); ds.transferMetadata(this, true); ds.setName( makeName(mfile)); String urlPath = parentPath + mfile.getName(); ds.put(Dataset.UrlPath, urlPath); ds.put(Dataset.DataSize, mfile.getLength()); // <dataSize units="Kbytes">54.73</dataSize> CalendarDate date = CalendarDate.of(mfile.getLastModified()); ds.put(Dataset.Dates, new DateType(date).setType("modified")); // <date type="modified">2011-09-02T20:50:58.288Z</date> ds.put(Dataset.Id, parentId + mfile.getName()); if (addTimeCoverage != null) addTimeCoverage.addMetadata(ds, mfile); catBuilder.addDataset(ds); break; // only the one } // make the catalog return catBuilder.makeCatalog(); } }