/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.catalog.utils; import org.opencb.opencga.catalog.models.File; import java.net.URI; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; /** * @author Jacobo Coll <jacobo167@gmail.com> */ public class FormatDetector { private static final Map<File.Format, Pattern> FORMAT_MAP = new HashMap<>(); static { FORMAT_MAP.put(File.Format.IMAGE, Pattern.compile(".*\\.(png|jpg|bmp|svg|gif|jpeg|tfg)(\\.[\\w]+)*", Pattern.CASE_INSENSITIVE)); } /** * * @param uri Existing file uri to the file * @return File.Format. UNKNOWN if can't detect any format. */ public static File.Format detect(URI uri) { for (Map.Entry<File.Format, Pattern> entry : FORMAT_MAP.entrySet()) { if (entry.getValue().matcher(uri.getPath()).matches()) { return entry.getKey(); } } String path = uri.getPath(); String extension = com.google.common.io.Files.getFileExtension(path); if (CompressionDetector.getCompression(extension) != File.Compression.NONE) { path = com.google.common.io.Files.getNameWithoutExtension(path); extension = com.google.common.io.Files.getFileExtension(path); } switch (extension.toLowerCase()) { case "vcf": return File.Format.VCF; case "bcf": return File.Format.BCF; case "bam": return File.Format.BAM; case "bai": return File.Format.BAI; case "sam": return File.Format.SAM; case "cram": return File.Format.CRAM; case "ped": return File.Format.PED; case "fastq": return File.Format.FASTQ; case "tsv": return File.Format.TAB_SEPARATED_VALUES; case "csv": return File.Format.COMMA_SEPARATED_VALUES; case "txt": case "log": return File.Format.PLAIN; case "xml": return File.Format.XML; case "json": return File.Format.JSON; case "proto": return File.Format.PROTOCOL_BUFFER; case "avro": return File.Format.AVRO; case "parquet": return File.Format.PARQUET; case "png": case "bmp": case "svg": case "gif": case "jpeg": case "tif": return File.Format.IMAGE; default: break; } //PLAIN return File.Format.UNKNOWN; } }