/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi;
import com.google.common.io.Closeables;
import com.google.common.io.Resources;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
public class Schemas {
// used to match resource:schema.avsc URIs
private static final String RESOURCE_URI_SCHEME = "resource";
public static Schema fromAvsc(InputStream in) throws IOException {
// the parser has state, so use a new one each time
return new Schema.Parser().parse(in);
}
public static Schema fromAvsc(File location) throws IOException {
return fromAvsc(
FileSystem.getLocal(DefaultConfiguration.get()),
new Path(location.getPath()));
}
public static Schema fromAvsc(FileSystem fs, Path path) throws IOException {
InputStream in = null;
boolean threw = true;
try {
in = fs.open(path);
Schema schema = new Schema.Parser().parse(in);
threw = false;
return schema;
} finally {
Closeables.close(in, threw);
}
}
public static Schema fromAvsc(Configuration conf, URI location)
throws IOException {
InputStream in = null;
boolean threw = true;
try {
in = open(conf, location);
Schema schema = fromAvsc(in);
threw = false;
return schema;
} finally {
Closeables.close(in, threw);
}
}
public static Schema fromAvro(InputStream in) throws IOException {
GenericDatumReader<GenericRecord> datumReader =
new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> stream = null;
boolean threw = true;
try {
stream = new DataFileStream<GenericRecord>(in, datumReader);
Schema schema = stream.getSchema();
threw = false;
return schema;
} finally {
Closeables.close(stream, threw);
}
}
public static Schema fromAvro(File location) throws IOException {
return fromAvro(
FileSystem.getLocal(DefaultConfiguration.get()),
new Path(location.getPath()));
}
public static Schema fromAvro(FileSystem fs, Path location)
throws IOException {
InputStream in = null;
boolean threw = true;
try {
in = fs.open(location);
Schema schema = fromAvro(in);
threw = false;
return schema;
} finally {
Closeables.close(in, threw);
}
}
public static Schema fromAvro(Configuration conf, URI location)
throws IOException {
InputStream in = null;
boolean threw = true;
try {
in = open(conf, location);
Schema schema = fromAvro(in);
threw = false;
return schema;
} finally {
Closeables.close(in, threw);
}
}
public static Schema fromParquet(File location) throws IOException {
return fromParquet(
FileSystem.getLocal(DefaultConfiguration.get()),
new Path(location.getPath()));
}
public static Schema fromParquet(FileSystem fs, Path location) throws IOException {
ParquetMetadata footer = ParquetFileReader.readFooter(fs.getConf(), location);
String schemaString = footer.getFileMetaData()
.getKeyValueMetaData().get("parquet.avro.schema");
if (schemaString == null) {
// try the older property
schemaString = footer.getFileMetaData()
.getKeyValueMetaData().get("avro.schema");
}
if (schemaString != null) {
return new Schema.Parser().parse(schemaString);
} else {
return new AvroSchemaConverter()
.convert(footer.getFileMetaData().getSchema());
}
}
public static Schema fromParquet(Configuration conf, URI location)
throws IOException {
Path path = new Path(location);
return fromParquet(path.getFileSystem(conf), path);
}
public static Schema fromJSON(String name, InputStream in) throws IOException {
return JsonUtil.inferSchema(in, name, 20);
}
public static Schema fromJSON(String name, File location) throws IOException {
return fromJSON(name,
FileSystem.getLocal(DefaultConfiguration.get()),
new Path(location.getPath()));
}
public static Schema fromJSON(String name, FileSystem fs, Path location)
throws IOException {
InputStream in = null;
boolean threw = true;
try {
in = fs.open(location);
Schema schema = fromJSON(name, in);
threw = false;
return schema;
} finally {
Closeables.close(in, threw);
}
}
public static Schema fromJSON(String name, Configuration conf, URI location)
throws IOException {
InputStream in = null;
boolean threw = true;
try {
in = open(conf, location);
Schema schema = fromJSON(name, in);
threw = false;
return schema;
} finally {
Closeables.close(in, threw);
}
}
private static InputStream open(Configuration conf, URI location)
throws IOException {
if (RESOURCE_URI_SCHEME.equals(location.getScheme())) {
return Resources.getResource(
location.getRawSchemeSpecificPart()).openStream();
} else {
Path path = new Path(location);
return path.getFileSystem(conf).open(path);
}
}
}