/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.util.convert; import java.net.URI; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; import org.datacleaner.configuration.ServerInformationCatalog; import org.datacleaner.server.HadoopClusterInformation; import org.datacleaner.util.HadoopResource; public class HadoopResourceBuilder { /** * A regular expression {@link Pattern} that matches resource URIs * containing template items for the server definition, for instance: * * hdfs://{myserver}/foo/bar.txt * * <ul> * <li>Group 1: The scheme (example 'hdfs')</li> * <li>Group 2: The template name (example 'myserver')</li> * <li>Group 3: The path (example '/foo/bar.txt')</li> * </ul> */ public static final Pattern RESOURCE_SCHEME_PATTERN = Pattern.compile("([\\w\\+\\-\\.]+)://\\{([\\w\\.\\W\\s]*)\\}(.*)"); private final URI _uri; private final String _clusterReferenceName; private final Configuration _configuration; public HadoopResourceBuilder(final ServerInformationCatalog catalog, final String templatedUri) { final Matcher matcher = RESOURCE_SCHEME_PATTERN.matcher(templatedUri); if (!matcher.matches()) { _clusterReferenceName = null; final String fixedUri = templatedUri.replace(" ", "%20"); final HadoopClusterInformation hadoopClusterInformation = (HadoopClusterInformation) catalog.getServer(HadoopResource.DEFAULT_CLUSTERREFERENCE); if (hadoopClusterInformation != null) { _configuration = hadoopClusterInformation.getConfiguration(); } else { _configuration = new Configuration(); } _configuration.set("fs.defaultFS", fixedUri); _uri = URI.create(fixedUri); } else { _clusterReferenceName = matcher.group(2); final HadoopClusterInformation hadoopClusterInformation = (HadoopClusterInformation) catalog.getServer(_clusterReferenceName); _configuration = hadoopClusterInformation.getConfiguration(); _uri = URI.create(matcher.group(3).replace(" ", "%20")); } } public HadoopResource build() { return new HadoopResource(_uri, _configuration, _clusterReferenceName); } }