/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.core;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static fr.ens.biologie.genomique.eoulsan.core.Naming.ASCII_LETTER_OR_DIGIT;
import static fr.ens.biologie.genomique.eoulsan.util.StringUtils.toLetter;
import java.io.File;
import java.util.Objects;
import fr.ens.biologie.genomique.eoulsan.data.Data;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormat;
import fr.ens.biologie.genomique.eoulsan.data.DataFormatRegistry;
import fr.ens.biologie.genomique.eoulsan.io.CompressionType;
/**
* This class contains methods to create workflow data file names.
* @author Laurent Jourdren
* @since 2.0
*/
public class FileNaming {
private static final char SEPARATOR = '_';
private static final String FILE_INDEX_PREFIX = "file";
private static final String PART_INDEX_PREFIX = "part";
private String stepId;
private String portName;
private String dataName;
private int sampleNumber = -1;
private DataFormat format;
private int fileIndex = -1;
private int part = -1;
private CompressionType compression = CompressionType.NONE;
//
// Getters
//
/**
* Get Step Id.
* @return the step Id
*/
public String getStepId() {
return this.stepId;
}
/**
* Get the port name.
* @return the port name
*/
public String getPortName() {
return this.portName;
}
/**
* Get the data name.
* @return the data name
*/
public String getDataName() {
return this.dataName;
}
/**
* Get the sample number related to the data. This value is only use when
* generate compatible filenames.
* @return the number of the sample related to the file or -1 if not known
*/
public int getSampleNumber() {
return this.sampleNumber;
}
/**
* Get the format.
* @return the format
*/
public DataFormat getFormat() {
return this.format;
}
/**
* Get the file index.
* @return the file index
*/
public int getFileIndex() {
return this.fileIndex;
}
/**
* Get the file part.
* @return the file part
*/
public int getPart() {
return this.part;
}
/**
* Get the compression.
* @return the compression
*/
public CompressionType getCompression() {
return this.compression;
}
//
// Setters
//
/**
* Set the step id.
* @param stepId the step id
*/
public void setStepId(final String stepId) {
checkStepId(stepId);
this.stepId = stepId;
}
/**
* Set the port name.
* @param portName the port name
*/
public void setPortName(final String portName) {
checkPortName(portName);
this.portName = portName;
}
/**
* Set the data name.
* @param dataName the data name
*/
public void setDataName(final String dataName) {
checkDataName(dataName);
this.dataName = dataName;
}
/**
* Set the sample number related to the data. This value is only use when
* generate compatible filenames.
* @param sampleNumber the number of the sample related to the file or -1 if
* not known
*/
public void setSampleNumber(final int sampleNumber) {
this.sampleNumber = sampleNumber;
}
/**
* Set the format.
* @param format the format
*/
public void setFormat(final DataFormat format) {
checkNotNull(format, "format argument cannot be null");
this.format = format;
}
/**
* Set the file index.
* @param fileIndex the file index
*/
public void setFileIndex(final int fileIndex) {
this.fileIndex = fileIndex < 0 ? -1 : fileIndex;
}
/**
* Set the part number.
* @param part the part number
*/
public void setPart(final int part) {
this.part = part < 0 ? -1 : part;
}
/**
* Set the compression
* @param compression the compression type
*/
public void setCompression(final CompressionType compression) {
checkNotNull(compression, "compression argument cannot be null");
this.compression = compression;
}
/**
* Set several field of the object from a Data object.
* @param data the data object
*/
protected void set(final Data data) {
checkNotNull(data, "port argument cannot be null");
setDataName(data.getName());
setPart(data.getPart());
}
//
// Other methods
//
/**
* Return the file prefix.
* @return a string with the file prefix
*/
public String filePrefix() {
checkNotNull(this.stepId, "stepId has not been set");
checkNotNull(this.portName, "portName has not been set");
checkNotNull(this.format, "format has not been set");
return filePrefix(this.stepId, this.portName, this.format.getPrefix());
}
/**
* Return the file suffix.
* @return q string with the file suffix
*/
public String fileSuffix() {
checkNotNull(this.format, "format has not been set");
checkNotNull(this.compression, "compression has not been set");
return fileSuffix(this.format.getDefaultExtension(),
this.compression.getExtension());
}
/**
* Return the middle string of the filename.
* @return a string with the middle string of the filename
*/
public String fileMiddle() {
checkNotNull(this.dataName, "datName has not been set");
checkFormatAndFileIndex();
return fileMiddle(this.dataName, this.fileIndex, this.part);
}
/**
* Return the filename.
* @return a string with the filename
*/
public String filename() {
checkNotNull(this.stepId, "stepId has not been set");
checkNotNull(this.portName, "portName has not been set");
checkNotNull(this.format, "format has not been set");
checkNotNull(this.dataName, "datName has not been set");
checkNotNull(this.compression, "compression has not been set");
checkFormatAndFileIndex();
return filename(this.stepId, this.portName, this.format, this.dataName,
this.fileIndex, this.part, this.compression);
}
/**
* Get a glob for the filename.
* @return a glob in a string
*/
public String glob() {
return filePrefix() + '*' + fileSuffix();
}
/**
* Return the filename using Eoulsan 1.x naming.
* @return a string with the filename using Eoulsan 1.x naming
*/
public String compatibilityFilename() {
checkNotNull(this.stepId, "stepId has not been set");
checkNotNull(this.portName, "portName has not been set");
checkNotNull(this.format, "format has not been set");
checkNotNull(this.dataName, "datName has not been set");
checkNotNull(this.compression, "compression has not been set");
checkFormatAndFileIndex();
final StringBuilder sb = new StringBuilder();
final String prefix;
// Set the prefix against step name
switch (this.stepId) {
case "filterreads":
prefix = "filtered_reads";
break;
case "mapreads":
prefix = "mapper_results";
break;
case "filtersam":
case "filterandmap":
prefix = "filtered_mapper_results";
break;
default:
prefix = this.stepId;
break;
}
sb.append(prefix);
sb.append('_');
// Set the id of the sample
if (this.format.isOneFilePerAnalysis()) {
sb.append('1');
} else {
sb.append(this.sampleNumber);
}
// Set the file index if needed
if (this.fileIndex >= 0) {
sb.append(toLetter(this.fileIndex));
}
// Set the extension
sb.append(this.format.getDefaultExtension());
return sb.toString();
}
/**
* Check if the file index is valid for the current format
*/
private void checkFormatAndFileIndex() {
if (this.format.getMaxFilesCount() == 1) {
checkArgument(this.fileIndex == -1,
"Invalid fileIndex argument for format "
+ this.format.getName() + ": " + this.fileIndex);
} else {
checkArgument(this.fileIndex < this.format.getMaxFilesCount(),
"Invalid fileIndex argument for format "
+ this.format.getName() + ": " + this.fileIndex);
}
}
//
// Static methods
//
/**
* Create the prefix of a filename.
* @param stepId step id
* @param portName port name
* @param format format of the file
* @return a String with the prefix of the file
*/
public static String filePrefix(final String stepId, final String portName,
final DataFormat format) {
final FileNaming f = new FileNaming();
f.setStepId(stepId);
f.setPortName(portName);
f.setFormat(format);
return f.filePrefix();
}
/**
* Create the prefix of a filename.
* @param stepId step id
* @param portName port name
* @param formatPrefix format prefix of the file
* @return a String with the prefix of the file
*/
public static String filePrefix(final String stepId, final String portName,
final String formatPrefix) {
checkStepId(stepId);
checkPortName(portName);
checkFormatPrefix(formatPrefix);
return stepId + SEPARATOR + portName + SEPARATOR + formatPrefix + SEPARATOR;
}
//
// Middle creation
//
/**
* Create the middle of a filename.
* @param dataName data name
* @param fileIndex file index
* @param part file part
* @return a String with the suffix of a file
*/
public static String fileMiddle(final String dataName, final int fileIndex,
final int part) {
checkDataName(dataName);
StringBuilder sb = new StringBuilder();
// Set the name of the date
sb.append(dataName);
// Set the file index if needed
if (fileIndex >= 0) {
sb.append(SEPARATOR);
sb.append(FILE_INDEX_PREFIX);
sb.append(fileIndex);
}
if (part > -1) {
sb.append(SEPARATOR);
sb.append(PART_INDEX_PREFIX);
sb.append(part);
}
return sb.toString();
}
//
// Suffix creation
//
/**
* Create the suffix of a file.
* @param format format of the file
* @param compression file compression
* @return a String with the suffix of a file
*/
public static String fileSuffix(final DataFormat format,
final CompressionType compression) {
final FileNaming f = new FileNaming();
f.setFormat(format);
f.setCompression(compression);
return f.fileSuffix();
}
/**
* Create the suffix of a file.
* @param extension file extension
* @param compression file compression
* @return a String with the suffix of a file
*/
public static String fileSuffix(final String extension,
final String compression) {
checkExtension(extension);
checkCompression(compression);
StringBuilder sb = new StringBuilder();
// Set the extension
sb.append(extension);
// Set the compression extension
if (compression != null) {
sb.append(compression);
}
return sb.toString();
}
//
// File name
//
/**
* Create the filename from several parameters.
* @param stepId the step id
* @param portName the port name
* @param format the format
* @param dataName the data name
* @param fileIndex the file index
* @param part the part
* @param compression the compression type
* @return a string with the filename
*/
public static String filename(final String stepId, final String portName,
final DataFormat format, final String dataName, final int fileIndex,
final int part, final CompressionType compression) {
return filePrefix(stepId, portName, format)
+ fileMiddle(dataName, fileIndex, part)
+ fileSuffix(format, compression);
}
//
// File name parsing
//
/**
* Create a FileNaming object from a File object.
* @param file the file
* @return a new FileNaming object
*/
public static FileNaming parse(final File file) {
checkNotNull(file, "file argument cannot be null");
return parse(file.getName());
}
/**
* Create a FileNaming object from a DataFile object.
* @param file the file
* @return a new FileNaming object
*/
public static FileNaming parse(final DataFile file) {
checkNotNull(file, "file argument cannot be null");
return parse(file.getName());
}
/**
* Create a FileNaming object from a filename.
* @param filename the filename
* @return a new FileNaming object
*/
public static FileNaming parse(final String filename) {
checkNotNull(filename, "filename argument cannot be null");
final FileNaming result = new FileNaming();
final String[] extensions = filename.split("\\.");
if (extensions.length < 2 || extensions.length > 3) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
// Get format extension
final String formatExtension = '.' + extensions[1];
// Get compression
if (extensions.length == 3) {
result.setCompression(
CompressionType.getCompressionTypeByExtension('.' + extensions[2]));
}
final String[] fields = extensions[0].split("_");
if (fields.length < 4) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
if (fields[0].isEmpty() || !isStepIdValid(fields[0])) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
result.setStepId(fields[0]);
if (fields[1].isEmpty() || !isPortNameValid(fields[1])) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
result.setPortName(fields[1]);
final DataFormat format = DataFormatRegistry.getInstance()
.getDataFormatFromFilename(fields[2], formatExtension);
if (format == null) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
result.setFormat(format);
if (fields[3].isEmpty() || !isDataNameValid(fields[3])) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
result.setDataName(fields[3]);
for (int i = 4; i < fields.length; i++) {
if (fields[i].startsWith(FILE_INDEX_PREFIX)) {
if (result.getFileIndex() != -1) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
try {
result.setFileIndex(Integer
.parseInt(fields[i].substring(FILE_INDEX_PREFIX.length())));
} catch (NumberFormatException e) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
} else if (fields[i].startsWith(PART_INDEX_PREFIX)) {
if (result.getPart() != -1) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
try {
result.setPart(Integer
.parseInt(fields[i].substring(PART_INDEX_PREFIX.length())));
} catch (NumberFormatException e) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
}
}
if (result.getFormat().getMaxFilesCount() > 1
&& result.getFileIndex() == -1) {
throw new FileNamingParsingRuntimeException(
"Invalid filename: " + filename);
}
return result;
}
//
// Validation names methods
//
/**
* Test if a step id is valid.
* @param stepId the step id to check
* @return true if the step id is valid
*/
public static final boolean isStepIdValid(final String stepId) {
return isNameValid(stepId);
}
/**
* Test if a format prefix id is valid.
* @param formatPrefix the format prefix to check
* @return true if the format prefix is valid
*/
public static final boolean isFormatPrefixValid(final String formatPrefix) {
return isNameValid(formatPrefix);
}
/**
* Test if a port name is valid.
* @param portName port name to check
* @return true if the port name is valid
*/
public static final boolean isPortNameValid(final String portName) {
return isNameValid(portName);
}
/**
* Test if a data name is valid.
* @param dataName data name to check
* @return true if the data name is valid
*/
public static final boolean isDataNameValid(final String dataName) {
return isNameValid(dataName);
}
/**
* Test if name is valid.
* @param name the name to test
* @return true if the name is valid
*/
private static boolean isNameValid(final String name) {
return !(name == null
|| name.isEmpty() || !ASCII_LETTER_OR_DIGIT.matchesAllOf(name));
}
/**
* Test if a filename is valid.
* @param file the file to test.
* @return true if the filename is valid
*/
public static final boolean isFilenameValid(final DataFile file) {
checkNotNull(file, "file argument cannot be null");
return isFilenameValid(file.getName());
}
/**
* Test if a filename is valid.
* @param file the file to test.
* @return true if the filename is valid
*/
public static final boolean isFilenameValid(final File file) {
checkNotNull(file, "file argument cannot be null");
return isFilenameValid(file.getName());
}
/**
* Test if a filename is valid.
* @param filename the file to test.
* @return true if the filename is valid
*/
public static final boolean isFilenameValid(final String filename) {
try {
FileNaming.parse(filename);
} catch (FileNamingParsingRuntimeException e) {
return false;
}
return true;
}
/**
* Test if two files are related to the same data.
* @param file1 the first file
* @param file2 the second file
* @return true if the two files are related to the same data
*/
public static boolean dataEquals(final File file1, final File file2) {
checkNotNull(file1, "file1 argument cannot be null");
checkNotNull(file2, "file2 argument cannot be null");
return dataEquals(file1.getName(), file2.getName());
}
/**
* Test if two files are related to the same data.
* @param file1 the first file
* @param file2 the second file
* @return true if the two files are related to the same data
*/
public static boolean dataEquals(final DataFile file1, final DataFile file2) {
checkNotNull(file1, "file1 argument cannot be null");
checkNotNull(file2, "file2 argument cannot be null");
return dataEquals(file1.getName(), file2.getName());
}
/**
* Test if two filenames are related to the same data.
* @param filename1 the first filename
* @param filename2 the second filename
* @return true if the two files are related to the same data
*/
public static boolean dataEquals(final String filename1,
final String filename2) {
checkNotNull(filename1, "filename1 argument cannot be null");
checkNotNull(filename2, "filename2 argument cannot be null");
final FileNaming fn1;
final FileNaming fn2;
try {
fn1 = parse(filename1);
fn2 = parse(filename2);
} catch (FileNamingParsingRuntimeException e) {
return false;
}
return Objects.equals(fn1.stepId, fn2.stepId)
&& Objects.equals(fn1.portName, fn2.portName)
&& Objects.equals(fn1.format, fn2.format)
&& Objects.equals(fn1.dataName, fn2.dataName)
&& Objects.equals(fn1.part, fn2.part);
}
/**
* Check a step Id.
* @param stepId the step id to check
*/
private static void checkStepId(final String stepId) {
checkNotNull(stepId, "stepId argument cannot be null");
checkArgument(!stepId.isEmpty(), "stepId is empty");
checkArgument(isStepIdValid(stepId),
"The step id of the file name can only contains letters or digit: "
+ stepId);
}
/**
* Check a port name.
* @param portName the port to check
*/
private static void checkPortName(final String portName) {
checkNotNull(portName, "portName argument cannot be null");
checkArgument(!portName.isEmpty(), "portName argument is empty");
checkArgument(isPortNameValid(portName),
"The port name of the file name can only contains letters or digit: "
+ portName);
}
/**
* Check a format prefix.
* @param formatPrefix the format prefix
*/
private static void checkFormatPrefix(final String formatPrefix) {
checkNotNull(formatPrefix, "formatPrefix argument cannot be null");
checkArgument(!formatPrefix.isEmpty(), "formatPrefix is empty");
checkArgument(isFormatPrefixValid(formatPrefix),
"The format prefix of the file name can only contains letters or digit: "
+ formatPrefix);
}
/**
* Check a data name.
* @param dataName the data name to check
*/
private static void checkDataName(final String dataName) {
checkNotNull(dataName, "dataName argument cannot be null");
checkArgument(!dataName.isEmpty(), "dataName is empty");
checkArgument(isFormatPrefixValid(dataName),
"The data name of the file name can only contains letters or digit: "
+ dataName);
}
/**
* Check an extension.
* @param extension the extension to check
*/
private static void checkExtension(final String extension) {
checkNotNull(extension, "extension argument cannot be null");
checkArgument(!extension.isEmpty(), "A part of the file name is empty");
checkArgument(extension.charAt(0) == '.',
"The extension do not starts with a dot: " + extension);
checkArgument(ASCII_LETTER_OR_DIGIT.matchesAllOf(extension.substring(1)),
"The extension of the file name can only contains letters or digit: "
+ extension);
}
/**
* Check a compression name.
* @param compression the compression name to check
*/
private static void checkCompression(final String compression) {
checkNotNull(compression, "compression argument cannot be null");
// Empty compression string is allowed
if (compression.isEmpty()) {
return;
}
checkArgument(compression.charAt(0) == '.',
"The compression do not starts with a dot: " + compression);
checkArgument(ASCII_LETTER_OR_DIGIT.matchesAllOf(compression.substring(1)),
"The compression of the file name can only contains letters or digit: "
+ compression);
}
//
// Constructor
//
/**
* Private constructor.
*/
protected FileNaming() {
}
}