/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import org.apache.poi.ss.formula.functions.T; import org.deidentifier.arx.io.CSVDataInput; import org.deidentifier.arx.io.CSVSyntax; import org.deidentifier.arx.io.ImportAdapter; import org.deidentifier.arx.io.ImportConfiguration; /** * Represents input data for the ARX framework. * * @author Fabian Prasser * @author Florian Kohlmayer */ public abstract class Data { /** * The default implementation of a data object. It allows the user to * programmatically define its content. * * @author Fabian Prasser * @author Florian Kohlmayer */ public static class DefaultData extends Data { /** List of tuples. */ private final List<String[]> data = new ArrayList<String[]>(); /** * Adds a row to this data object. * * @param row the row */ public void add(final String... row) { data.add(row); } @Override protected Iterator<String[]> iterator() { return data.iterator(); } } /** * A data object for arrays. * * @author Fabian Prasser * @author Florian Kohlmayer */ static class ArrayData extends Data { /** The array. */ private final String[][] array; /** * Creates a new instance. * * @param array the array */ private ArrayData(final String[][] array) { this.array = array; } @Override protected Iterator<String[]> iterator() { return new Iterator<String[]>() { private int pos = 0; @Override public boolean hasNext() { return pos < array.length; } @Override public String[] next() throws NoSuchElementException { if (hasNext()) { return array[pos++]; } else { throw new NoSuchElementException(); } } @Override public void remove() { throw new UnsupportedOperationException(); } }; } } /** * A data object for iterators. * * @author Fabian Prasser * @author Florian Kohlmayer */ static class IterableData extends Data { /** Iterator over tuples. */ private Iterator<String[]> iterator = null; /** * Creates a new instance. * * @param iterator the iterator */ private IterableData(final Iterator<String[]> iterator) { this.iterator = iterator; } @Override protected Iterator<String[]> iterator() { return iterator; } } /** * Creates a new default data object. * * @return A Data object */ public static DefaultData create() { return new DefaultData(); } /** * Creates a new data object from the given data source specification. * * @param source The source that should be used to import data * @return Data object as described by the data source * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final DataSource source) throws IOException { ImportConfiguration config = source.getConfiguration(); ImportAdapter adapter = ImportAdapter.create(config); return create(adapter); } /** * Creates a new data object from a CSV file. * * @param file the file * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset) throws IOException { return new IterableData(new CSVDataInput(file, charset).iterator()); } /** * Creates a new data object from a CSV file. * * @param file A file * @param delimiter The utilized separator character * @return A Data object * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset, final char delimiter) throws IOException { return new IterableData(new CSVDataInput(file, charset, delimiter).iterator()); } /** * Creates a new data object from a CSV file. * * @param file A file * @param delimiter The utilized separator character * @param quote The delimiter for strings * @return A Data object * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset, final char delimiter, final char quote) throws IOException { return new IterableData(new CSVDataInput(file, charset, delimiter, quote).iterator()); } /** * Creates a new data object from a CSV file. * * @param file the file * @param delimiter the delimiter * @param quote the quote * @param escape the escape * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset, final char delimiter, final char quote, final char escape) throws IOException { return new IterableData(new CSVDataInput(file, charset, delimiter, quote, escape).iterator()); } /** * Creates a new data object from a CSV file. * * @param file the file * @param delimiter the delimiter * @param quote the quote * @param escape the escape * @param linebreak the linebreak * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset, final char delimiter, final char quote, final char escape, final char[] linebreak) throws IOException { return new IterableData(new CSVDataInput(file, charset, delimiter, quote, escape, linebreak).iterator()); } /** * Creates a new data object from a CSV file. * * @param file the file * @param config the config * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset, final CSVSyntax config) throws IOException { return new IterableData(new CSVDataInput(file, charset, config).iterator()); } /** * Creates a new data object from a CSV file. * * @param file the file * @param config the config * @param datatypes the datatypes * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final File file, final Charset charset, final CSVSyntax config, final DataType<T>[] datatypes) throws IOException { return new IterableData(new CSVDataInput(file, charset, config, datatypes).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream the stream * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset) throws IOException { return new IterableData(new CSVDataInput(stream, charset).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream An input stream * @param delimiter The utilized separator character * @return A Data object * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset, final char delimiter) throws IOException { return new IterableData(new CSVDataInput(stream, charset, delimiter).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream An input stream * @param delimiter The utilized separator character * @param quote The delimiter for strings * @return A Data object * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset, final char delimiter, final char quote) throws IOException { return new IterableData(new CSVDataInput(stream, charset, delimiter, quote).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream the stream * @param delimiter the delimiter * @param quote the quote * @param escape the escape * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset, final char delimiter, final char quote, final char escape) throws IOException { return new IterableData(new CSVDataInput(stream, charset, delimiter, quote, escape).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream the stream * @param delimiter the delimiter * @param quote the quote * @param escape the escape * @param linebreak the linebreak * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset, final char delimiter, final char quote, final char escape, final char[] linebreak) throws IOException { return new IterableData(new CSVDataInput(stream, charset, delimiter, quote, escape, linebreak).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream the stream * @param config the config * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset, final CSVSyntax config) throws IOException { return new IterableData(new CSVDataInput(stream, charset, config).iterator()); } /** * Creates a new data object from a CSV file. * * @param stream the stream * @param config the config * @param datatypes the datatypes * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final InputStream stream, final Charset charset, final CSVSyntax config, final DataType<T>[] datatypes) throws IOException { return new IterableData(new CSVDataInput(stream, charset, config, datatypes).iterator()); } /** * Creates a new data object from an iterator over tuples. * * @param iterator An iterator * @return A Data object */ public static Data create(final Iterator<String[]> iterator) { // Obtain data IterableData result = new IterableData(iterator); // Update definition, if needed if (iterator instanceof ImportAdapter) { result.getDefinition().parse((ImportAdapter) iterator); } // Return return result; } /** * Creates a new data object from a list. * * @param list The list * @return A Data object */ public static Data create(final List<String[]> list) { return new IterableData(list.iterator()); } /** * Creates a new data object from a CSV file. * * @param path the path * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset) throws IOException { return new IterableData(new CSVDataInput(path, charset).iterator()); } /** * Creates a new data object from a CSV file. * * @param path A path to the file * @param delimiter The utilized separator character * @return A Data object * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset, final char delimiter) throws IOException { return new IterableData(new CSVDataInput(path, charset, delimiter).iterator()); } /** * Creates a new data object from a CSV file. * * @param path A path to the file * @param delimiter The utilized separator character * @param quote The delimiter for strings * @return A Data object * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset, final char delimiter, final char quote) throws IOException { return new IterableData(new CSVDataInput(path, charset, delimiter, quote).iterator()); } /** * Creates a new data object from a CSV file. * * @param path the path * @param delimiter the delimiter * @param quote the quote * @param escape the escape * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset, final char delimiter, final char quote, final char escape) throws IOException { return new IterableData(new CSVDataInput(path, charset, delimiter, quote, escape).iterator()); } /** * Creates a new data object from a CSV file. * * @param path the path * @param delimiter the delimiter * @param quote the quote * @param escape the escape * @param linebreak the linebreak * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset, final char delimiter, final char quote, final char escape, final char[] linebreak) throws IOException { return new IterableData(new CSVDataInput(path, charset, delimiter, quote, escape, linebreak).iterator()); } /** * Creates a new data object from a CSV file. * * @param path the path * @param config the config * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset, final CSVSyntax config) throws IOException { return new IterableData(new CSVDataInput(path, charset, config).iterator()); } /** * Creates a new data object from a CSV file. * * @param path the path * @param config the config * @param datatypes the datatypes * @return the data * @throws IOException Signals that an I/O exception has occurred. */ public static Data create(final String path, final Charset charset, final CSVSyntax config, final DataType<T>[] datatypes) throws IOException { return new IterableData(new CSVDataInput(path, charset, config, datatypes).iterator()); } /** * Creates a new data object from a two-dimensional string array. * * @param array The array * @return A Data object */ public static Data create(final String[][] array) { return new ArrayData(array); } /** The data handle. */ private DataHandleInput handle; /** The data definition. */ private DataDefinition definition = new DataDefinition(); /** * Returns the data definition. * * @return the definition */ public DataDefinition getDefinition() { return definition; } /** * Returns a data handle. * * @return the handle */ public DataHandle getHandle() { if (handle == null) { handle = new DataHandleInput(this); } else { handle.update(this); } return handle; } /** * Iterator. * * @return the iterator */ protected abstract Iterator<String[]> iterator(); }