/*
* Copyright 2015 Coursera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pegasus.generator;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.DataSchemaLocation;
import com.linkedin.data.schema.DataSchemaParserFactory;
import com.linkedin.data.schema.DataSchemaResolver;
import com.linkedin.data.schema.resolver.MultiFormatDataSchemaResolver;
import com.linkedin.util.FileUtil;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FilenameUtils;
/**
* Combines multiple file format specific parsers into a single parser for ".pdsc" and ".pdl" files.
*
* @author Joe Betz
*/
public class DataSchemaParser
{
private final String _resolverPath;
private final Map<String, FileFormatDataSchemaParser> _parserByFileExtension;
private final MultiFormatDataSchemaResolver _resolver;
/**
* @param resolverPath provides the search paths separated by the system file separator, or null for no search paths.
*/
public DataSchemaParser(String resolverPath) {
this(resolverPath, MultiFormatDataSchemaResolver.BUILTIN_FORMAT_PARSER_FACTORIES);
}
public DataSchemaParser(
String resolverPath,
List<DataSchemaParserFactory> parserFactoriesForFromats)
{
_resolverPath = resolverPath;
MultiFormatDataSchemaResolver resolver =
new MultiFormatDataSchemaResolver(resolverPath, parserFactoriesForFromats);
_parserByFileExtension = new HashMap<>();
for (DataSchemaParserFactory parserForFormat : parserFactoriesForFromats)
{
FileFormatDataSchemaParser fileFormatParser =
new FileFormatDataSchemaParser(resolverPath, resolver, parserForFormat);
_parserByFileExtension.put(parserForFormat.getLanguageExtension(), fileFormatParser);
}
this._resolver = resolver;
}
public String getResolverPath()
{
return _resolverPath;
}
private static class FileExtensionFilter implements FileFilter
{
private final Set<String> extensions;
public FileExtensionFilter(Set<String> extensions)
{
this.extensions = extensions;
}
@Override
public boolean accept(File file)
{
return extensions.contains(FilenameUtils.getExtension(file.getName()));
}
}
public DataSchemaResolver getSchemaResolver()
{
return _resolver;
}
public DataSchemaParser.ParseResult parseSources(String sources[]) throws IOException
{
Set<String> fileExtensions = _parserByFileExtension.keySet();
Map<String, List<String>> byExtension = new HashMap<>(fileExtensions.size());
for (String fileExtension : fileExtensions)
{
byExtension.put(fileExtension, new ArrayList<>());
}
for (String source : sources)
{
final File sourceFile = new File(source);
if (sourceFile.exists())
{
if (sourceFile.isDirectory())
{
final FileExtensionFilter filter = new FileExtensionFilter(fileExtensions);
final List<File> sourceFilesInDirectory = FileUtil.listFiles(sourceFile, filter);
for (File f : sourceFilesInDirectory)
{
String ext = FilenameUtils.getExtension(f.getName());
List<String> filesForExtension = byExtension.get(ext);
if (filesForExtension != null)
{
filesForExtension.add(f.getAbsolutePath());
}
}
}
else
{
String ext = FilenameUtils.getExtension(sourceFile.getName());
List<String> filesForExtension = byExtension.get(ext);
if (filesForExtension != null)
{
filesForExtension.add(sourceFile.getAbsolutePath());
}
}
}
}
List<ParseResult> results = new ArrayList<>();
for (Map.Entry<String, List<String>> entry : byExtension.entrySet())
{
String ext = entry.getKey();
List<String> files = entry.getValue();
ParseResult parseResult =
_parserByFileExtension.get(ext).parseSources(files.toArray(new String[files.size()]));
results.add(parseResult);
}
return combine(results);
}
private static ParseResult combine(Collection<ParseResult> parseResults)
{
ParseResult combined = new ParseResult();
for (ParseResult result : parseResults)
{
combined.getSchemaAndLocations().putAll(result.getSchemaAndLocations());
combined.getSourceFiles().addAll((result.getSourceFiles()));
combined.addMessage(result.getMessage());
}
return combined;
}
/**
* Represent the result of schema parsing. Consist of two parts: schema from file path and from schema name, based on user input.
* The two parts are mutually exclusive, and the union of two consists of all schema resolved.
*
* The result contains all resolved data schemas, both directly defined by the source files, or transitively referenced by the former.
* Both top-level and embedded named schemas are included. Only top-level unnamed schemas are included.
*/
public static class ParseResult
{
private final Map<DataSchema, DataSchemaLocation> _schemaAndLocations = new HashMap<>();
private final Set<File> _sourceFiles = new HashSet<>();
protected final StringBuilder _messageBuilder = new StringBuilder();
public Map<DataSchema, DataSchemaLocation> getSchemaAndLocations()
{
return _schemaAndLocations;
}
public Set<File> getSourceFiles()
{
return _sourceFiles;
}
public String getMessage()
{
return _messageBuilder.toString();
}
public ParseResult addMessage(String message)
{
_messageBuilder.append(message);
return this;
}
}
}