/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.data.filesystem; import com.cloudera.cdk.data.DatasetDescriptor; import com.cloudera.cdk.data.DatasetWriter; import com.cloudera.cdk.data.DatasetWriterException; import com.cloudera.cdk.data.Format; import com.cloudera.cdk.data.Formats; import com.cloudera.cdk.data.UnknownFormatException; import com.google.common.base.Joiner; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; abstract class FileSystemWriters { @SuppressWarnings("unchecked") // See https://github.com/Parquet/parquet-mr/issues/106 public static <E> DatasetWriter<E> newFileWriter( FileSystem fs, Path path, DatasetDescriptor descriptor) { // ensure the path exists try { fs.mkdirs(path); } catch (IOException ex) { throw new DatasetWriterException("Could not create path:" + path, ex); } final Format format = descriptor.getFormat(); final Path file = new Path(path, uniqueFilename(descriptor.getFormat())); if (Formats.PARQUET.equals(format)) { return new ParquetFileSystemDatasetWriter(fs, file, descriptor.getSchema()); } else if (Formats.AVRO.equals(format)) { return new FileSystemDatasetWriter.Builder() .fileSystem(fs) .path(file) .schema(descriptor.getSchema()) .build(); } else { throw new UnknownFormatException("Unknown format:" + format); } } private static Joiner DASH = Joiner.on('-'); private static String uniqueFilename(Format format) { // FIXME: This file name is not guaranteed to be truly unique. return DASH.join( System.currentTimeMillis(), Thread.currentThread().getId() + "." + format.getExtension()); } }