/* * Copyright (c) 2014 Data Harmonisation Panel * * All rights reserved. This program and the accompanying materials are made * available under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of the License, * or (at your option) any later version. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * Data Harmonisation Panel <http://www.dhpanel.eu> */ package eu.esdihumboldt.hale.app.transform; import static eu.esdihumboldt.hale.app.transform.ExecUtil.fail; import static eu.esdihumboldt.hale.app.transform.ExecUtil.info; import static eu.esdihumboldt.hale.app.transform.ExecUtil.status; import static eu.esdihumboldt.hale.app.transform.ExecUtil.warn; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.nio.file.FileVisitResult; import java.nio.file.FileVisitor; import java.nio.file.Path; import java.nio.file.PathMatcher; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.UUID; import java.util.concurrent.ExecutionException; import org.eclipse.core.runtime.content.IContentType; import com.google.common.util.concurrent.ListenableFuture; import eu.esdihumboldt.hale.common.core.io.HaleIO; import eu.esdihumboldt.hale.common.core.io.extension.IOProviderDescriptor; import eu.esdihumboldt.hale.common.core.io.project.model.IOConfiguration; import eu.esdihumboldt.hale.common.core.io.supplier.DefaultInputSupplier; import eu.esdihumboldt.hale.common.core.io.supplier.FileIOSupplier; import eu.esdihumboldt.hale.common.core.io.supplier.Locatable; import eu.esdihumboldt.hale.common.core.io.supplier.LocatableInputSupplier; import eu.esdihumboldt.hale.common.core.io.supplier.LocatableOutputSupplier; import eu.esdihumboldt.hale.common.core.report.Report; import eu.esdihumboldt.hale.common.core.report.ReportHandler; import eu.esdihumboldt.hale.common.headless.impl.ProjectTransformationEnvironment; import eu.esdihumboldt.hale.common.headless.report.ReportFile; import eu.esdihumboldt.hale.common.headless.transform.Transformation; import eu.esdihumboldt.hale.common.instance.io.InstanceReader; import eu.esdihumboldt.hale.common.instance.io.InstanceValidator; import eu.esdihumboldt.hale.common.instance.io.InstanceWriter; import eu.esdihumboldt.util.groovy.sandbox.DefaultGroovyService; import eu.esdihumboldt.util.groovy.sandbox.GroovyService; /** * Executes a transformation. * * @author Simon Templer */ public class ExecTransformation implements ConsoleConstants { /** * Visitor that collects files to be included. */ private static final class DirVisitor implements FileVisitor<Path> { private final List<Path> collectedFiles = new ArrayList<>(); private final Path parentDir; private final List<PathMatcher> includes; private final List<PathMatcher> excludes; /** * Constructor. * * @param parentDir the parent directory * @param includes the include patterns * @param excludes the exclude patterns */ public DirVisitor(Path parentDir, List<String> includes, List<String> excludes) { this.parentDir = parentDir; this.includes = new ArrayList<>(includes.size()); for (String pattern : includes) { PathMatcher matcher = parentDir.getFileSystem().getPathMatcher("glob:" + pattern); this.includes.add(matcher); } this.excludes = new ArrayList<>(excludes.size()); for (String pattern : excludes) { PathMatcher matcher = parentDir.getFileSystem().getPathMatcher("glob:" + pattern); this.excludes.add(matcher); } } @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { /* * XXX currently cannot determine from the patterns if a directory * should be inspected or not */ return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (accept(file)) { collectedFiles.add(file); } return FileVisitResult.CONTINUE; } private boolean accept(Path file) { Path relative = parentDir.relativize(file); boolean included = false; for (PathMatcher include : includes) { if (include.matches(relative)) { included = true; break; } } if (!included) { return false; } for (PathMatcher exclude : excludes) { if (exclude.matches(relative)) { return false; } } return true; } @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { // ignore, but log warn("Could not access file " + file); return FileVisitResult.CONTINUE; } @Override public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { return FileVisitResult.CONTINUE; } /** * @return the list of files collected from the directory */ public List<Path> getCollectedFiles() { return Collections.unmodifiableList(collectedFiles); } } /** * ID for the transformation */ private final String id = UUID.randomUUID().toString(); private ReportHandler reportHandler = null; private ProjectTransformationEnvironment env; private ExecContext context; private InstanceWriter target; private final List<InstanceReader> sources = new ArrayList<InstanceReader>(); private final Collection<InstanceValidator> validators = new ArrayList<>(); @SuppressWarnings("javadoc") public int run(ExecContext context) throws Exception { this.context = context; new ConsoleProgressManager(); // set up report handler setupReportHandler(); // set up transformation environment loadProject(); // set up reader for source Iterator<URI> sourceIt = context.getSources().iterator(); int index = 0; while (sourceIt.hasNext()) { URI uri = sourceIt.next(); Path path; try { path = Paths.get(uri); } catch (Exception e) { path = null; } if (path != null && java.nio.file.Files.isDirectory(path)) { // directory handling List<Path> files = getIncludedFiles(path, index); info(MessageFormat.format("{0} files identified for source {1}", files.size(), path)); for (Path file : files) { setupReader(file.toUri(), index); } } else { // file / URI handling setupReader(uri, index); } // increase source index index++; } // set up writer for target setupWriter(); // set up the target validators (if any) setupValidators(); if (target == null) { // writer could not be created // return error code return 1; } // trigger transformation transform(); // exit OK return 0; } /** * Get the files to load from a directory. * * @param parentDir the directory * @param index the source index * @return the list of file */ private List<Path> getIncludedFiles(Path parentDir, int index) { List<String> includes = context.getSourceIncludes().get(index); if (includes.isEmpty()) { // default include - all files includes.add("**"); } List<String> excludes = context.getSourceExcludes().get(index); DirVisitor visitor = new DirVisitor(parentDir, includes, excludes); try { java.nio.file.Files.walkFileTree(parentDir, visitor); } catch (IOException e) { throw new IllegalStateException("Error browsing source directory " + parentDir, e); } return visitor.getCollectedFiles(); } private void setupReportHandler() { final ReportHandler delegateTo; if (context.getReportsOut() != null) { delegateTo = new ReportFile(context.getReportsOut()); } else { delegateTo = null; } /* * The report handler writes a summary to std out */ reportHandler = new ReportHandler() { @Override public void publishReport(Report<?> report) { ExecUtil.printSummary(report); if (delegateTo != null) { delegateTo.publishReport(report); } } }; } private void loadProject() throws IOException { status("Loading HALE project..."); env = new ProjectTransformationEnvironment(id, new DefaultInputSupplier(context.getProject()), reportHandler); } private void setupReader(URI uri, int index) { LocatableInputSupplier<? extends InputStream> sourceIn = new DefaultInputSupplier(uri); // create I/O provider InstanceReader source = null; String customProvider = context.getSourceProviderIds().get(index); if (customProvider != null) { // use specified provider source = HaleIO.createIOProvider(InstanceReader.class, null, customProvider); if (source == null) { fail("Could not find instance reader with ID " + customProvider); } } if (source == null) { // find applicable reader source = HaleIO.findIOProvider(InstanceReader.class, sourceIn, uri.getPath()); } if (source == null) { throw fail("Could not determine instance reader to use for source data"); } // apply custom settings source.loadConfiguration(context.getSourcesSettings().get(index)); source.setSource(sourceIn); // source schema is set in Transformation.transform // CRS provider is set in headless transformation sources.add(source); } private void setupWriter() { String preset = context.getPreset(); String customProvider = context.getTargetProviderId(); if (preset == null && customProvider == null) { fail("Please specify the name of a data export configuration preset or provide a specific provider ID for the instance writer"); } // create I/O configuration IOConfiguration conf = null; if (preset != null) { conf = env.getExportPresets().get(preset); } if (conf == null) { if (customProvider == null) { throw fail("Data export configration preset not found: " + preset + " (please make sure you created it and saved it as part of the project)"); } else { conf = new IOConfiguration(); } } if (customProvider != null) { conf.setProviderId(customProvider); } // apply custom settings to configuration conf.getProviderConfiguration().putAll(context.getTargetSettings()); // create I/O provider String writerId = conf.getProviderId(); target = HaleIO.createIOProvider(InstanceWriter.class, null, writerId); target.setTarget(createTargetSupplier(context.getTarget())); target.setTargetSchema(env.getTargetSchema()); // determine content type to use based on file extension IOProviderDescriptor factory = HaleIO.findIOProviderFactory(InstanceWriter.class, null, writerId); if (factory == null) { throw fail("Instance writer with ID " + writerId + " not found"); } String path = context.getTarget().getPath(); List<IContentType> cts; if (path != null) { cts = HaleIO.findContentTypesFor(factory.getSupportedTypes(), null, path); } else { cts = new ArrayList<>(factory.getSupportedTypes()); } if (!cts.isEmpty()) { target.setContentType(cts.get(0)); } // apply configuration (may override content type) target.loadConfiguration(conf.getProviderConfiguration()); } private void setupValidators() { if (context.getValidateProviderIds() != null && !context.getValidateProviderIds().isEmpty()) { for (int i = 0; i < context.getValidateProviderIds().size(); i++) { String validateProviderId = context.getValidateProviderIds().get(i); if (!validateProviderId.trim().isEmpty()) { final InstanceValidator validator = HaleIO .createIOProvider(InstanceValidator.class, null, validateProviderId); if (validator == null) { throw fail( "Instance validator with ID " + validateProviderId + " not found"); } // load validator settings validator.loadConfiguration(context.getValidateSettings().get(i)); // set schemas List<? extends Locatable> schemas = target.getValidationSchemas(); validator.setSchemas(schemas.toArray(new Locatable[schemas.size()])); // set source validator.setSource(new DefaultInputSupplier(context.getTarget())); // apply target content type validator.setContentType(target.getContentType()); this.validators.add(validator); } } } } private LocatableOutputSupplier<? extends OutputStream> createTargetSupplier(final URI uri) { try { File file = new File(uri); return new FileIOSupplier(file); } catch (IllegalArgumentException e) { // TODO check for other supported URI types, e.g. FTP? // create dummy output supplier // e.g. for JDBC URIs return new LocatableOutputSupplier<OutputStream>() { @Override public OutputStream getOutput() throws IOException { throw new UnsupportedOperationException(); } @Override public URI getLocation() { return uri; } }; } } private void transform() throws InterruptedException, ExecutionException { status("Running HALE transformation..."); // configure transformation environment // override/set Groovy service GroovyService gs = new DefaultGroovyService(); gs.setRestrictionActive(context.isRestrictGroovy()); env.addService(GroovyService.class, gs); // run transformation ListenableFuture<Boolean> res = Transformation.transform(sources, target, env, reportHandler, id, validators, context.getFilters()); if (res.get()) { info("Transformation completed. Please check the reports for more details."); } else { fail("Transformation failed, please check the reports for details."); // Job threads might still be active, wait a moment to allow them to // complete and file their report (otherwise error may get lost) try { Thread.sleep(3000); } catch (Throwable e) { // ignore } } } }