/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2016 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.io;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Iterator;
import com.univocity.parsers.csv.CsvFormat;
import com.univocity.parsers.csv.CsvWriter;
import com.univocity.parsers.csv.CsvWriterSettings;
/**
* Provides methods for creating checksums CSV encoded data.
*
* @author Fabian Prasser
*/
public class CSVDataChecksum {
/** Settings. */
private final CsvWriterSettings settings;
/**
* Instantiate.
*
*/
public CSVDataChecksum() {
this(CSVSyntax.DEFAULT_DELIMITER);
}
/**
* Instantiate.
*
* @param delimiter the delimiter
*/
public CSVDataChecksum(final char delimiter) {
this(delimiter, CSVSyntax.DEFAULT_QUOTE);
}
/**
* Instantiate.
*
* @param delimiter the delimiter
* @param quote the quote
*/
public CSVDataChecksum(final char delimiter, final char quote) {
this(delimiter, quote, CSVSyntax.DEFAULT_ESCAPE);
}
/**
* Instantiate.
*
* @param delimiter the delimiter
* @param quote the quote
* @param escape the escape
*/
public CSVDataChecksum(final char delimiter, final char quote, final char escape) {
this(delimiter, quote, escape, CSVSyntax.DEFAULT_LINEBREAK);
}
/**
* Instantiate.
*
* @param delimiter the delimiter
* @param quote the quote
* @param escape the escape
* @param linebreak the linebreak
*/
public CSVDataChecksum(final char delimiter, final char quote, final char escape, final char[] linebreak) {
settings = createSettings(delimiter, quote, escape, linebreak);
}
/**
* Instantiate.
*
* @param config the config
*/
public CSVDataChecksum(final CSVSyntax config) {
this(config.getDelimiter(), config.getQuote(), config.getEscape(), config.getLinebreak());
}
/**
* Returns a hex-encoded MD5 checksum for the given data
*
* @param iterator
* @return
* @throws NoSuchAlgorithmException
*/
public String getSHA256Checksum(final Iterator<String[]> iterator) throws NoSuchAlgorithmException {
// Initialize message digest
MessageDigest md = MessageDigest.getInstance("SHA-256");
DigestOutputStream dis = new DigestOutputStream(new OutputStream() {
@Override
public void write(int b) throws IOException {
// Simply drop the data
}
}, md);
CsvWriter csvwriter = new CsvWriter(new OutputStreamWriter(dis), settings);
// Write
while (iterator.hasNext()) {
csvwriter.writeRow((Object[]) iterator.next());
}
csvwriter.close();
// Obtain digest
byte[] digest = md.digest();
// And convert to hex
StringBuilder builder = new StringBuilder();
for (int i = 0; i < digest.length; i++) {
builder.append(Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1));
}
return builder.toString();
}
/**
* Creates the settings.
*
* @param delimiter the delimiter
* @param quote the quote
* @param escape the escape
* @param linebreak the linebreak
* @return the csv writer settings
*/
private CsvWriterSettings createSettings(final char delimiter, final char quote, final char escape, final char[] linebreak) {
CsvFormat format = new CsvFormat();
format.setDelimiter(delimiter);
format.setQuote(quote);
format.setQuoteEscape(escape);
format.setLineSeparator(linebreak);
format.setNormalizedNewline(CSVSyntax.getNormalizedLinebreak(linebreak));
CsvWriterSettings settings = new CsvWriterSettings();
settings.setEmptyValue("");
settings.setNullValue("");
settings.setFormat(format);
return settings;
}
}