/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.gui.worker;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.zip.Deflater;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.deidentifier.arx.ARXLattice;
import org.deidentifier.arx.ARXLattice.ARXNode;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.DataType.DataTypeWithFormat;
import org.deidentifier.arx.criteria.PrivacyCriterion;
import org.deidentifier.arx.gui.Controller;
import org.deidentifier.arx.gui.model.Model;
import org.deidentifier.arx.gui.model.ModelConfiguration;
import org.deidentifier.arx.gui.resources.Resources;
import org.deidentifier.arx.gui.worker.io.FileBuilder;
import org.deidentifier.arx.gui.worker.io.Vocabulary;
import org.deidentifier.arx.gui.worker.io.Vocabulary_V2;
import org.deidentifier.arx.gui.worker.io.XMLWriter;
import org.deidentifier.arx.io.CSVDataOutput;
import org.deidentifier.arx.metric.InformationLoss;
import org.eclipse.core.runtime.IProgressMonitor;
/**
* This worker saves a project file to disk.
*
* @author Fabian Prasser
*/
public class WorkerSave extends Worker<Model> {
/** The vocabulary to use. */
private Vocabulary vocabulary = new Vocabulary_V2();
/** The path. */
private final String path;
/** The model. */
private final Model model;
/**
* Creates a new instance.
*
* @param path
* @param controller
* @param model
*/
public WorkerSave(final String path,
final Controller controller,
final Model model) {
this.path = path;
this.model = model;
}
@Override
public void run(final IProgressMonitor arg0) throws InvocationTargetException,
InterruptedException {
arg0.beginTask(Resources.getMessage("WorkerSave.0"), 10); //$NON-NLS-1$
try {
final FileOutputStream f = new FileOutputStream(path);
final ZipOutputStream zip = new ZipOutputStream(new BufferedOutputStream(f));
zip.setLevel(Deflater.BEST_SPEED);
model.createConfig();
writeMetadata(model, zip);
arg0.worked(1);
writeModel(model, zip);
arg0.worked(1);
writeInput(model, zip);
arg0.worked(1);
writeInputSubset(model, zip);
arg0.worked(1);
writeOutput(model, zip);
arg0.worked(1);
writeOutputSubset(model, zip);
arg0.worked(1);
writeConfiguration(model, zip);
arg0.worked(1);
final Map<String, Integer> map = writeLattice(model, zip);
arg0.worked(1);
writeClipboard(model, map, zip);
arg0.worked(1);
writeFilter(model, zip);
zip.close();
arg0.worked(1);
} catch (final Exception e) {
error = e;
arg0.done();
return;
}
arg0.worked(100);
arg0.done();
}
/**
* Converts an attribute name to a file name.
*
* @param a
* @return
*/
private String toFileName(final String a) {
return a;
}
/**
* Returns an XML representation of the lattice.
*
* @param map
* @param l
* @param zip
* @throws IOException
*/
private void toXML(final Map<String, Integer> map,
final ARXLattice l,
final ZipOutputStream zip) throws IOException {
// Build mapping
int id = 0;
for (final ARXNode[] level : l.getLevels()) {
for (final ARXNode n : level) {
final String key = Arrays.toString(n.getTransformation());
if (!map.containsKey(key)) {
map.put(key, id++);
}
}
}
// Write directly because of size
final FileBuilder b = new FileBuilder(new OutputStreamWriter(zip));
final XMLWriter writer = new XMLWriter(b);
writer.write(vocabulary.getHeader());
// Build xml
writer.indent(vocabulary.getLattice());
for (int i = 0; i < l.getLevels().length; i++) {
writer.indent(vocabulary.getLevel(), vocabulary.getDepth(), i);
for (final ARXNode n : l.getLevels()[i]) {
final String key = Arrays.toString(n.getTransformation());
final int currentId = map.get(key);
writer.indent(vocabulary.getNode2(), vocabulary.getId(), currentId);
writer.write(vocabulary.getTransformation(), n.getTransformation());
writer.write(vocabulary.getAnonymity(), n.getAnonymity());
writer.write(vocabulary.getChecked(), n.isChecked());
if (n.getPredecessors().length > 0) {
writer.write(vocabulary.getPredecessors(), n.getPredecessors(), map);
}
if (n.getSuccessors().length > 0) {
writer.write(vocabulary.getSuccessors(), n.getSuccessors(), map);
}
writer.indent(vocabulary.getInfoloss());
writer.write(vocabulary.getMax2(), n.getHighestScore().toString());
writer.write(vocabulary.getMin2(), n.getLowestScore().toString());
writer.unindent();
writer.unindent();
}
writer.unindent();
}
writer.unindent();
b.flush();
}
/**
* Returns an XML representation of the clipboard.
*
* @param map
* @param clipboard
* @return
* @throws IOException
*/
private String toXML(final Map<String, Integer> map,
final List<ARXNode> clipboard) throws IOException {
XMLWriter writer = new XMLWriter();
writer.indent(vocabulary.getClipboard()); //$NON-NLS-1$
for (final ARXNode n : clipboard) {
writer.write(vocabulary.getNode(), Arrays.toString(n.getTransformation())); //$NON-NLS-1$
}
writer.unindent();
return writer.toString();
}
/**
* Converts a model to XML.
*
* @param model
* @return
* @throws IOException
*/
private String toXML(final Model model) throws IOException {
XMLWriter writer = new XMLWriter();
writer.indent(vocabulary.getProject());
writer.write(vocabulary.getName(), model.getName());
writer.write(vocabulary.getSeparator(), model.getCSVSyntax().getDelimiter());
writer.write(vocabulary.getEscape(), model.getCSVSyntax().getEscape());
writer.write(vocabulary.getQuote(), model.getCSVSyntax().getQuote());
String linebreak = "UNIX"; //$NON-NLS-1$
char[] _linebreak = model.getCSVSyntax().getLinebreak();
if (_linebreak.length == 1 && _linebreak[0] == '\r') {
linebreak = "MAC"; //$NON-NLS-1$
} else if (_linebreak.length == 2){
linebreak = "WINDOWS"; //$NON-NLS-1$
}
writer.write(vocabulary.getLinebreak(), linebreak);
writer.write(vocabulary.getDescription(), model.getDescription());
writer.write(vocabulary.getLocale(), model.getLocale().getLanguage().toUpperCase());
writer.write(vocabulary.getHistorySize(), model.getHistorySize());
writer.write(vocabulary.getSnapshotSizeDataset(), model.getSnapshotSizeDataset());
writer.write(vocabulary.getSnapshotSizeSnapshot(), model.getSnapshotSizeSnapshot());
writer.write(vocabulary.getInitialNodesInViewer(), model.getInitialNodesInViewer());
writer.write(vocabulary.getMaxNodesInViewer(), model.getMaxNodesInViewer());
writer.write(vocabulary.getSelectedAttribute(), model.getSelectedAttribute());
writer.write(vocabulary.getInputBytes(), model.getInputBytes());
writer.unindent();
return writer.toString();
}
/**
* Converts a configuration to XML.
*
* @param config
* @return
* @throws IOException
*/
private String toXML(final ModelConfiguration config) throws IOException {
XMLWriter writer = new XMLWriter();
writer.indent(vocabulary.getConfig());
writer.write(vocabulary.getSuppressionAlwaysEnabled(), config.isSuppressionAlwaysEnabled());
// Write suppressed attribute types
writer.indent(vocabulary.getSuppressedAttributeTypes());
for (AttributeType type : new AttributeType[]{AttributeType.QUASI_IDENTIFYING_ATTRIBUTE,
AttributeType.SENSITIVE_ATTRIBUTE,
AttributeType.INSENSITIVE_ATTRIBUTE}) {
if (config.isAttributeTypeSuppressed(type)) {
writer.write(vocabulary.getType(), type.toString());
}
}
writer.unindent();
writer.write(vocabulary.getPracticalMonotonicity(), config.isPracticalMonotonicity());
writer.write(vocabulary.getRelativeMaxOutliers(), config.getAllowedOutliers());
writer.write(vocabulary.getMetric(), config.getMetric().toString());
// Write weights
writer.indent(vocabulary.getAttributeWeights());
for (Entry<String, Double> entry : config.getAttributeWeights().entrySet()) {
writer.indent(vocabulary.getAttributeWeight());
writer.write(vocabulary.getAttribute(), entry.getKey());
writer.write(vocabulary.getWeight(), entry.getValue());
writer.unindent();
}
writer.unindent();
// Write criteria
writer.indent(vocabulary.getCriteria());
for (PrivacyCriterion c : config.getCriteria()) {
if (c != null) {
writer.write(vocabulary.getCriterion(), c.toString());
}
}
writer.unindent();
writer.unindent();
return writer.toString();
}
/**
* Returns an XML representation of the data definition.
*
* @param config
* @param handle
* @param definition
* @return
* @throws IOException
*/
private String toXML(final ModelConfiguration config,
final DataHandle handle,
final DataDefinition definition) throws IOException {
XMLWriter writer = new XMLWriter();
writer.indent(vocabulary.getDefinition());
for (int i = 0; i < handle.getNumColumns(); i++) {
final String attr = handle.getAttributeName(i);
AttributeType t = definition.getAttributeType(attr);
DataType<?> dt = definition.getDataType(attr);
if (t == null) t = AttributeType.IDENTIFYING_ATTRIBUTE;
if (dt == null) dt = DataType.STRING;
writer.indent(vocabulary.getAssigment());
writer.write(vocabulary.getName(), attr);
writer.write(vocabulary.getType(), t.toString());
writer.write(vocabulary.getDatatype(), dt.getDescription().getLabel());
if (dt.getDescription().hasFormat()){
String format = ((DataTypeWithFormat)dt).getFormat();
if (format != null){
writer.write(vocabulary.getFormat(), format);
}
}
// Do we have a hierarchy
if (definition.getHierarchy(attr) != null && definition.getHierarchy(attr).length != 0 &&
definition.getHierarchy(attr)[0].length != 0) {
writer.write(vocabulary.getRef(), "hierarchies/" + toFileName(attr) + ".csv"); //$NON-NLS-1$ //$NON-NLS-2$
Integer min = config.getMinimumGeneralization(attr);
Integer max = config.getMaximumGeneralization(attr);
writer.write(vocabulary.getMin(), min == null ? "All" : String.valueOf(min)); //$NON-NLS-1$
writer.write(vocabulary.getMax(), max == null ? "All" : String.valueOf(max)); //$NON-NLS-1$
}
// Do we have a microaggregate function
if (definition.getMicroAggregationFunction(attr) != null) {
writer.write(vocabulary.getMicroAggregationFunction(), config.getMicroAggregationFunction(attr).getLabel());
writer.write(vocabulary.getMicroAggregationIgnoreMissingData(), config.getMicroAggregationIgnoreMissingData(attr));
}
writer.unindent();
}
writer.unindent();
return writer.toString();
}
/**
* Writes the clipboard to the file.
*
* @param model
* @param map
* @param zip
* @throws IOException
*/
private void writeClipboard(final Model model,
final Map<String, Integer> map,
final ZipOutputStream zip) throws IOException {
if (model.getClipboard().getClipboardEntries().isEmpty()) { return; }
// Write clipboard
zip.putNextEntry(new ZipEntry("clipboard.xml")); //$NON-NLS-1$
final Writer w = new OutputStreamWriter(zip);
w.write(toXML(map, model.getClipboard().getClipboardEntries()));
w.flush();
}
/**
* Writes the configuration to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeConfiguration(final Model model, final ZipOutputStream zip) throws IOException {
if (model.getInputConfig() != null) {
writeConfiguration(model.getInputConfig(), "input/", zip); //$NON-NLS-1$
}
if (model.getOutputConfig() != null) {
writeConfiguration(model.getOutputConfig(), "output/", zip); //$NON-NLS-1$
}
}
/**
* Writes the configuration to the file.
*
* @param config
* @param prefix
* @param zip
* @throws IOException
*/
private void writeConfiguration(final ModelConfiguration config,
final String prefix,
final ZipOutputStream zip) throws IOException {
zip.putNextEntry(new ZipEntry(prefix + "config.dat")); //$NON-NLS-1$
final ObjectOutputStream oos = new ObjectOutputStream(zip);
oos.writeObject(config);
oos.flush();
zip.putNextEntry(new ZipEntry(prefix + "config.xml")); //$NON-NLS-1$
final Writer w = new OutputStreamWriter(zip);
w.write(toXML(config));
w.flush();
writeDefinition(config, prefix, zip);
writeHierarchies(config, prefix, zip);
}
/**
* Writes the data definition to the file.
*
* @param config
* @param prefix
* @param zip
* @throws IOException
*/
private void writeDefinition(final ModelConfiguration config,
final String prefix,
final ZipOutputStream zip) throws IOException {
// Obtain definition
DataDefinition definition = null;
if (config == model.getInputConfig()) definition = model.getInputDefinition();
else definition = model.getOutputDefinition();
// Store
if (definition != null) {
zip.putNextEntry(new ZipEntry(prefix + "definition.xml")); //$NON-NLS-1$
final Writer w = new OutputStreamWriter(zip);
w.write(toXML(config, config.getInput().getHandle(), definition));
w.flush();
}
}
/**
* Writes the current filter to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeFilter(final Model model, final ZipOutputStream zip) throws IOException {
if ((model.getAnonymizer() == null) || (model.getResult() == null)) { return; }
zip.putNextEntry(new ZipEntry("filter.dat")); //$NON-NLS-1$
final ObjectOutputStream oos = new ObjectOutputStream(zip);
oos.writeObject(model.getNodeFilter());
oos.flush();
}
/**
* Writes the hierarchies to the file.
*
* @param config
* @param prefix
* @param zip
* @throws IOException
*/
private void writeHierarchies(final ModelConfiguration config,
final String prefix,
final ZipOutputStream zip) throws IOException {
// Store all from config
Set<String> saved = new HashSet<>();
for (Entry<String, Hierarchy> entry : config.getHierarchies().entrySet()) {
// Store this hierarchy
zip.putNextEntry(new ZipEntry(prefix + "hierarchies/" + toFileName(entry.getKey()) + ".csv")); //$NON-NLS-1$ //$NON-NLS-2$
CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
out.write(entry.getValue().getHierarchy());
saved.add(entry.getKey());
}
// This additional code implements a bugfix. ARX automatically creates hierarchies
// implementing the identity function when the user does not specify one but defines the attribute
// to be a quasi-identifier. These hierarchies were not serialized into project files in ARX 3.4.1,
// leading to inconsistent files which could not be loaded any more. We now do our best to save
// every relevant hierarchy:
// Obtain definition
DataDefinition definition = null;
if (config == model.getInputConfig()) definition = model.getInputDefinition();
else definition = model.getOutputDefinition();
// Store all from definition that have not yet been stored
if (config.getInput() != null) {
DataHandle handle = config.getInput().getHandle();
for (int i = 0; i < handle.getNumColumns(); i++) {
final String attr = handle.getAttributeName(i);
// Do we have a hierarchy
if (!saved.contains(attr) && definition.getHierarchy(attr) != null &&
definition.getHierarchy(attr).length != 0 &&
definition.getHierarchy(attr)[0].length != 0) {
// Store this hierarchy
zip.putNextEntry(new ZipEntry(prefix + "hierarchies/" + toFileName(attr) + ".csv")); //$NON-NLS-1$ //$NON-NLS-2$
CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
out.write(definition.getHierarchy(attr));
saved.add(attr);
}
}
}
}
/**
* Writes the input to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeInput(final Model model, final ZipOutputStream zip) throws IOException {
if (model.getInputConfig().getInput() != null) {
if (model.getInputConfig().getInput().getHandle() != null) {
zip.putNextEntry(new ZipEntry("data/input.csv")); //$NON-NLS-1$
final CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
out.write(model.getInputConfig()
.getInput()
.getHandle()
.iterator());
}
}
}
/**
* Writes the input subset to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeInputSubset(final Model model, final ZipOutputStream zip) throws IOException {
if (model.getInputConfig().getInput() != null) {
if (model.getInputConfig().getInput().getHandle() != null) {
zip.putNextEntry(new ZipEntry("data/input_subset.csv")); //$NON-NLS-1$
final CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
out.write(model.getInputConfig().getInput().getHandle().getView().iterator());
}
}
}
/**
* Writes the lattice to the file.
*
* @param model
* @param zip
* @return
* @throws IOException
*/
private Map<String, Integer> writeLattice(final Model model, final ZipOutputStream zip) throws IOException {
// Mapping
final Map<String, Integer> map = new HashMap<String, Integer>();
if ((model.getResult() == null) ||
(model.getResult().getLattice() == null)) { return map; }
// Write lattice
final ARXLattice l = model.getResult().getLattice();
zip.putNextEntry(new ZipEntry("lattice.xml")); //$NON-NLS-1$
toXML(map, l, zip);
zip.putNextEntry(new ZipEntry("lattice.dat")); //$NON-NLS-1$
ObjectOutputStream oos = new ObjectOutputStream(zip);
oos.writeObject(model.getResult().getLattice());
oos.writeObject(model.getResult()
.getLattice()
.access()
.getAttributeMap());
oos.flush();
// Write score
zip.putNextEntry(new ZipEntry("infoloss.dat")); //$NON-NLS-1$
final Map<Integer, InformationLoss<?>> max = new HashMap<Integer, InformationLoss<?>>();
final Map<Integer, InformationLoss<?>> min = new HashMap<Integer, InformationLoss<?>>();
for (final ARXNode[] level : l.getLevels()) {
for (final ARXNode n : level) {
final String key = Arrays.toString(n.getTransformation());
min.put(map.get(key), n.getLowestScore());
max.put(map.get(key), n.getHighestScore());
}
}
oos = new ObjectOutputStream(zip);
oos.writeObject(min);
oos.writeObject(max);
oos.flush();
min.clear();
max.clear();
// Write attributes
zip.putNextEntry(new ZipEntry("attributes.dat")); //$NON-NLS-1$
final Map<Integer, Map<Integer, Object>> attrs = new HashMap<Integer, Map<Integer, Object>>();
for (final ARXNode[] level : l.getLevels()) {
for (final ARXNode n : level) {
final String key = Arrays.toString(n.getTransformation());
attrs.put(map.get(key), n.getAttributes());
}
}
oos = new ObjectOutputStream(zip);
oos.writeObject(attrs);
oos.flush();
attrs.clear();
// Return mapping
return map;
}
/**
* Writes the meta data to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeMetadata(final Model model, final ZipOutputStream zip) throws IOException {
// Write metadata
zip.putNextEntry(new ZipEntry("metadata.xml")); //$NON-NLS-1$
final OutputStreamWriter w = new OutputStreamWriter(zip);
XMLWriter writer = new XMLWriter(new FileBuilder(w));
writer.indent(vocabulary.getMetadata());
writer.write(vocabulary.getVersion(), Resources.getVersion());
writer.write(vocabulary.getVocabulary(), vocabulary.getVocabularyVersion());
writer.unindent();
w.flush();
}
/**
* Writes the project to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeModel(final Model model, final ZipOutputStream zip) throws IOException {
zip.putNextEntry(new ZipEntry("project.dat")); //$NON-NLS-1$
final ObjectOutputStream oos = new ObjectOutputStream(zip);
oos.writeObject(model);
oos.flush();
zip.putNextEntry(new ZipEntry("project.xml")); //$NON-NLS-1$
final Writer w = new OutputStreamWriter(zip);
w.write(toXML(model));
w.flush();
}
/**
* Writes the output to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeOutput(final Model model, final ZipOutputStream zip) throws IOException {
if (model.getOutput() != null) {
zip.putNextEntry(new ZipEntry("data/output.csv")); //$NON-NLS-1$
final CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
out.write(model.getOutput().iterator());
}
}
/**
* Writes the output to the file.
*
* @param model
* @param zip
* @throws IOException
*/
private void writeOutputSubset(final Model model, final ZipOutputStream zip) throws IOException {
if (model.getOutput() != null) {
zip.putNextEntry(new ZipEntry("data/output_subset.csv")); //$NON-NLS-1$
final CSVDataOutput out = new CSVDataOutput(zip, model.getCSVSyntax().getDelimiter());
out.write(model.getOutput().getView().iterator());
}
}
}