/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.util.convert; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.metamodel.util.HdfsResource; import org.apache.metamodel.util.Resource; import org.datacleaner.api.Converter; import org.datacleaner.configuration.DataCleanerConfiguration; import org.datacleaner.configuration.DataCleanerConfigurationImpl; import org.datacleaner.util.ReflectionUtils; import org.datacleaner.util.SystemProperties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A converter for {@link Resource}s. Because of different {@link Resource} * implementations, this converter delegates to a number of 'handlers' which * implement part of the conversion for a specific type of resource. */ public class ResourceConverter implements Converter<Resource> { /** * Represents a component capable of handling the parsing and serializing of * a single type of resource. */ public interface ResourceTypeHandler<E extends Resource> { boolean isParserFor(Class<? extends Resource> resourceType); String getScheme(); E parsePath(String path); String createPath(Resource resource); } /** * Represents the parsed structure of a serialized resource */ public static class ResourceStructure { private final String scheme; private final String path; public ResourceStructure(final String scheme, final String path) { this.scheme = scheme; this.path = path; } public String getPath() { return path; } public String getScheme() { return scheme; } } /** * Represents the default "default scheme", for representations that does * not have a scheme in the path. This default scheme is "file". * * @deprecated use {@link #getConfiguredDefaultScheme()} by as a way to * access this */ @Deprecated public static final String DEFAULT_DEFAULT_SCHEME = FileResourceTypeHandler.DEFAULT_SCHEME; private static final Logger logger = LoggerFactory.getLogger(ResourceConverter.class); private static final Pattern RESOURCE_PATTERN = Pattern.compile("\\b([a-zA-Z]+)://(.+)"); private final DataCleanerConfiguration _configuration; private final Map<String, ResourceTypeHandler<?>> _parsers; private final String _defaultScheme; public ResourceConverter(final DataCleanerConfiguration configuration) { this(configuration, getConfiguredDefaultScheme()); } public ResourceConverter(final DataCleanerConfiguration configuration, final String defaultScheme) { this(configuration, createDefaultHandlers(configuration), defaultScheme); } public ResourceConverter(final DataCleanerConfiguration configuration, final Collection<? extends ResourceTypeHandler<?>> handlers) { this(configuration, handlers, getConfiguredDefaultScheme()); } public ResourceConverter(final Collection<? extends ResourceTypeHandler<?>> handlers, final String defaultScheme) { this(new DataCleanerConfigurationImpl(), handlers, defaultScheme); } /** * Constructs a {@link ResourceConverter} using a set of handlers. * * @param configuration * @param handlers * @param defaultScheme */ public ResourceConverter(final DataCleanerConfiguration configuration, final Collection<? extends ResourceTypeHandler<?>> handlers, final String defaultScheme) { _defaultScheme = defaultScheme; _parsers = new ConcurrentHashMap<>(); _configuration = configuration; for (final ResourceTypeHandler<?> handler : handlers) { final String scheme = handler.getScheme(); _parsers.put(scheme, handler); } } public ResourceConverter(final ResourceTypeHandler<?>... handlers) { this(new DataCleanerConfigurationImpl(), Arrays.asList(handlers), getConfiguredDefaultScheme()); } /** * Gets the "default scheme" (see {@link #DEFAULT_DEFAULT_SCHEME}) while * taking into account that this may have been configured via * {@link SystemProperties#DEFAULT_RESOURCE_SCHEME}. * * @return */ public static String getConfiguredDefaultScheme() { return SystemProperties.getString(SystemProperties.DEFAULT_RESOURCE_SCHEME, DEFAULT_DEFAULT_SCHEME); } private static List<ResourceTypeHandler<?>> createDefaultHandlers(final DataCleanerConfiguration configuration) { final List<ResourceTypeHandler<?>> result = new ArrayList<>(); result.add(new FileResourceTypeHandler(configuration.getHomeFolder())); result.add(new UrlResourceTypeHandler()); result.add(new HdfsResourceTypeHandler(HdfsResource.SCHEME_HDFS, configuration)); result.add(new HdfsResourceTypeHandler(HdfsResource.SCHEME_EMRFS, configuration)); result.add(new HdfsResourceTypeHandler(HdfsResource.SCHEME_MAPRFS, configuration)); result.add(new HdfsResourceTypeHandler(HdfsResource.SCHEME_S3, configuration)); result.add(new HdfsResourceTypeHandler(HdfsResource.SCHEME_SWIFT, configuration)); result.add(new ClasspathResourceTypeHandler()); result.add(new VfsResourceTypeHandler()); return result; } public ResourceConverter withExtraHandlers(final Collection<? extends ResourceTypeHandler<?>> extraHandlers) { final List<ResourceTypeHandler<?>> handlers = new ArrayList<>(_parsers.values()); handlers.addAll(extraHandlers); return new ResourceConverter(_configuration, handlers); } @Override public Resource fromString(final Class<?> type, final String serializedForm) { final ResourceStructure structure = parseStructure(serializedForm); if (structure == null) { throw new IllegalStateException("Invalid resource format: " + serializedForm); } final String scheme = structure.getScheme(); final ResourceTypeHandler<?> handler = _parsers.get(scheme); if (handler == null) { throw new IllegalStateException("No handler found for scheme of resource: " + serializedForm); } return handler.parsePath(structure.getPath()); } @Override public String toString(final Resource resource) { final Class<? extends Resource> resourceType = resource.getClass(); final Collection<ResourceTypeHandler<?>> values = _parsers.values(); for (final ResourceTypeHandler<?> handler : values) { if (handler.isParserFor(resourceType)) { final String path = handler.createPath(resource); final String scheme = handler.getScheme(); return scheme + "://" + path; } } throw new IllegalStateException("Could not find a resource handler for resource: " + resource); } public Collection<ResourceTypeHandler<?>> getResourceTypeHandlers() { return Collections.unmodifiableCollection(_parsers.values()); } @Override public boolean isConvertable(final Class<?> type) { return ReflectionUtils.is(type, Resource.class); } /** * Parses a string in order to produce a {@link ResourceStructure} object * * @param str * @return */ public ResourceStructure parseStructure(final String str) { final Matcher matcher = RESOURCE_PATTERN.matcher(str); if (!matcher.find()) { logger.info("Did not find any scheme definition in resource path: {}. Using default scheme: {}.", str, _defaultScheme); return new ResourceStructure(_defaultScheme, str); } final String scheme = matcher.group(1); final String path = matcher.group(2); return new ResourceStructure(scheme, path); } }