/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.examples;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.DataSource;
import org.deidentifier.arx.DataType;
/**
* This class demonstrates the use of the data import facilities provided by the
* ARX framework. Data can be imported from various types of sources, e.g. CSV
* files, Excel files and databases (using JDBC). The API is mostly the same for
* all of these sources, although not all options might be available in each
* case. Refer to the comments further down below for details about particular
* sources.
*
* @author Karol Babioch
* @author Fabian Prasser
*/
public class Example21 extends Example {
/**
* Main entry point.
*
* @param args
* @throws IOException
* @throws SQLException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException,
SQLException,
ClassNotFoundException {
exampleCSV();
exampleExcel();
exampleJDBC();
}
/**
* This method demonstrates the import of data from a simple CSV file. It
* uses more advanced features than {@link #Example2}. Columns are renamed,
* and individual columns can be ignored. Furthermore a data type for each
* column is specified, which describes the format of the appropriate data.
*
* @throws IOException
*/
private static void exampleCSV() throws IOException {
// Define configuration for CSV file
// The most interesting parameter is the last one, which defines
// whether or not the file contains a header assigning a name to each
// individual column, which can be used to address the column later on
DataSource source = DataSource.createCSVSource("data/test.csv", StandardCharsets.UTF_8, ';', true);
// Add columns
// Note that there are different means to specify a column. The first
// two columns are addressed based on their name. It is also possible
// to rename columns, which might be an interesting option to manipulate
// the output. Be aware however, that name based addressing will only
// work for types that implement the {@link IImportColumnNamed}
// interface. CSV and Excel files need to contain a header for this to
// work. "Index based" addressing on the other hand is currently
// supported by all types and is therefore guaranteed to work. This
// is the way the last column is addressed by. If the source does not
// contain a dedicated name for this column one will be assigned
// automatically, following the "Column #x" style, where x will be
// the number of the column.
source.addColumn(2, DataType.STRING); // zipcode (index based addressing)
source.addColumn("gender", DataType.STRING); // gender (named addressing)
source.addColumn("age", "renamed", DataType.INTEGER); // age (named addressing + alias name)
// In the output dataset, the columns will appear in the same order as
// specified by the order of calls to addColumn().
// Create data object
Data data = Data.create(source);
// Print to console
print(data.getHandle());
System.out.println("\n");
}
/**
* This method demonstrates the import of data from an Excel file. It uses
* more advanced features than {@link #Example2}. Columns are renamed, and
* individual columns can be ignored. Furthermore a data type for each
* column is specified, which describes the format of the appropriate data.
*
* Internally it makes use of <a href="https://poi.apache.org/">POI<a/>.
*
* Refer to {@link #exampleCSV()} for detailed comments about the meaning of
* certain parameters, as basically everything mentioned there also applies
* here.
*
* @throws IOException
* In case of IO errors with the given file
*/
private static void exampleExcel() throws IOException {
// Define configuration for Excel file
DataSource source = DataSource.createExcelSource("data/test.xls", 0, true);
// Add columns
source.addColumn(2, DataType.STRING); // zipcode (index based addressing)
source.addColumn("gender", DataType.STRING); // gender (named addressing)
source.addColumn("age", "renamed", DataType.INTEGER); // age (named addressing + alias name)
// Create data object
Data data = Data.create(source);
// Print to console
print(data.getHandle());
System.out.println("\n");
}
/**
* This method demonstrates the import of data from a JDBC data source.
* Columns can be renamed, or selected individually. Furthermore a data type
* for each column is specified, which describes the format of the
* appropriate data.
*
* This example uses SQLite, and uses the example database that is contained
* within the `data` directory. Note however, that in principal every JDBC
* connection can be used here.
*
* Refer to {@link #exampleCSV()} for detailed comments about the meaning of
* certain parameters, as basically everything mentioned there also applies
* here. Obviously columns can always be addressed by name in this scenario.
*
* @throws IOException
* In case of IO errors with the given file
* @throws SQLException
* In case of SQL errors with given database
* @throws ClassNotFoundException
* In case there is no JDBC driver
*/
private static void exampleJDBC() throws IOException,
SQLException,
ClassNotFoundException {
// Load JDBC driver
Class.forName("org.sqlite.JDBC");
// Configuration for JDBC source
DataSource source = DataSource.createJDBCSource("jdbc:sqlite:data/test.db",
"test");
// Add columns
source.addColumn(2, DataType.STRING); // zipcode (index based addressing)
source.addColumn("gender", DataType.STRING); // gender (named addressing)
source.addColumn("age", "renamed", DataType.INTEGER); // age (named addressing + alias name)
// Create data object
Data data = Data.create(source);
// Print to console
print(data.getHandle());
System.out.println("\n");
}
}