/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.io.comparators;
import static fr.ens.biologie.genomique.eoulsan.io.CompressionType.getCompressionTypeByFilename;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.io.CompressionType;
import fr.ens.biologie.genomique.eoulsan.util.EnhancedBloomFilter;
/**
* This abstract class define methods to compare files with use BloomFilter.
* @since 2.0
* @author Sandrine Perrin
*/
public abstract class AbstractComparatorWithBloomFilter
extends AbstractComparator {
// Limited create serialize bloomfilter file for size file inferior to
// size of serialize bloomfilter file 27369839 bytes with default parameters
private static final long SIZE_MINIMAL_CREATE_SERIALIZE_FILE = 40000000;
private double falsePositiveProbability = 0.1;
private int expectedNumberOfElements = 30000000;
private boolean useSerializeFile = false;
@Override
public boolean compareFiles(final File fileA, final File fileB)
throws FileNotFoundException, IOException {
// Check input files
if (!checkFiles(fileA, fileB) && checkFileSize()) {
return false;
}
// Check path file (abstract and symbolic) is the same
if (fileA.getCanonicalFile().equals(fileB.getCanonicalFile())) {
return true;
}
try (InputStream isB = new FileInputStream(fileB)) {
return compareFiles(getBloomFilter(fileA),
getCompressionTypeByFilename(fileB.getAbsolutePath())
.createInputStream(isB));
}
}
@Override
public boolean compareFiles(final InputStream isA, final InputStream isB)
throws IOException {
return compareFiles(buildBloomFilter(isA), isB);
}
/**
* Compare two files no ordered, check if they are the same contents.
* @param filter from BloomFilterUtils represented the first file
* @param is the path to the second file,
* @return boolean true if files are same.
* @throws IOException if an error occurs while comparing the files.
*/
abstract public boolean compareFiles(EnhancedBloomFilter filter,
InputStream is) throws IOException;
/**
* Initialize BloomFilter with the expected number of elements.
* @param expectedNumberOfElements expected number of elements
*/
protected static EnhancedBloomFilter initBloomFilter(
final int expectedNumberOfElements) {
return new EnhancedBloomFilter(expectedNumberOfElements);
}
/**
* In case Serialization is asked, check if the file.ser exists : true
* retrieve the bloom filter else create the filter and file.Ser
* corresponding.
* @param file source to create bloom filter
* @return bloomFilter completed with the file
*/
public EnhancedBloomFilter getBloomFilter(final File file)
throws IOException {
final File bloomFilterSer = new File(file.getAbsolutePath() + ".ser");
if (this.useSerializeFile && bloomFilterSer.exists()) {
// Retrieve marshalling bloom filter
return EnhancedBloomFilter.deserializationBloomFilter(bloomFilterSer);
}
final CompressionType zType =
getCompressionTypeByFilename(file.getAbsolutePath());
// Create new filter
try (InputStream is = new FileInputStream(file)) {
final EnhancedBloomFilter bloomFilter =
buildBloomFilter(zType.createInputStream(is));
// If need serialize bloomFilter in file only for file
if (isCreateSerializeFile(file, zType)) {
EnhancedBloomFilter.serializationBloomFilter(bloomFilterSer,
bloomFilter);
}
return bloomFilter;
}
}
/**
* Build BloomFilter represented the input stream.
* @param is the input stream source
* @return BloomFilter corresponding to the input stream
* @throws IOException if bloom filter creation fails
*/
protected EnhancedBloomFilter buildBloomFilter(final InputStream is)
throws IOException {
final EnhancedBloomFilter filter =
initBloomFilter(getExpectedNumberOfElements());
final BufferedReader reader =
new BufferedReader(new InputStreamReader(is, Globals.DEFAULT_CHARSET));
String line = null;
// Read the first file and store hashcodes
while ((line = reader.readLine()) != null) {
filter.put(line);
}
reader.close();
return filter;
}
@Override
public String toString() {
return getName()
+ " compares files with extensions " + getExtensions()
+ " use Bloom filter with parameters: expected numbers elements "
+ getExpectedNumberOfElements() + " and false positive probability "
+ getFalsePositiveProbability();
}
/**
* Define if serialization bloomfilter file is necessary according parameter
* useSerializeFile and size file.
* @param file file source for build bloomfilter
* @param zType compression type of file
* @return true if creating serialization file is necessary
*/
private boolean isCreateSerializeFile(final File file,
final CompressionType zType) {
// No serialize file require
if (!this.useSerializeFile) {
return false;
}
// Compressed file and serialize require
if (zType != CompressionType.NONE) {
return true;
}
// File size in bytes
final long fileSize = file.length();
// Check to choice
return fileSize > SIZE_MINIMAL_CREATE_SERIALIZE_FILE;
}
//
// Getters & setters
//
public boolean isUseSerializeFile() {
return this.useSerializeFile;
}
public void setUseSerializeFile(final boolean useSerializeFile) {
this.useSerializeFile = useSerializeFile;
}
protected int getExpectedNumberOfElements() {
return this.expectedNumberOfElements;
}
protected void setExpectedNumberOfElements(
final int expectedNumberOfElements) {
this.expectedNumberOfElements = expectedNumberOfElements;
}
protected void setFalsePositiveProbability(
final double falsePositiveProbability) {
this.falsePositiveProbability = falsePositiveProbability;
}
protected double getFalsePositiveProbability() {
return this.falsePositiveProbability;
}
//
// Constructor
//
/**
* Public constructor
* @param useSerializeFile true if it needed to save BloomFilter in file with
* extension '.ser'
*/
public AbstractComparatorWithBloomFilter(final boolean useSerializeFile) {
this.useSerializeFile = useSerializeFile;
}
}