/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.risk; import java.util.ArrayList; import java.util.List; import org.deidentifier.arx.DataHandleInternal; import org.deidentifier.arx.common.WrappedBoolean; import org.deidentifier.arx.exceptions.ComputationInterruptedException; import org.deidentifier.arx.risk.HIPAAIdentifierMatch.HIPAAIdentifier; import org.deidentifier.arx.risk.HIPAAIdentifierMatch.MatchType; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherAge; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherCity; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherDate; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherEMail; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherFirstName; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherIP; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherLastName; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherSSN; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherState; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherURL; import org.deidentifier.arx.risk.HIPAAMatcherAttributeValue.HIPAAMatcherZIP; /** * Encapsulates the validation process for the safe harbor method. * @author David Gassmann * @author Florian Kohlmayer * */ class RiskModelHIPAASafeHarbor { /** All configurations*/ private final List<HIPAAIdentifierConfig> configurations; /** * Constructor */ RiskModelHIPAASafeHarbor() { this.configurations = getConfigurations(); } /** * Returns a list of matches with HIPAA identifiers * * @param handle * @param threshold * @param stop * @return An array of warnings */ public HIPAAIdentifierMatch[] getMatches(DataHandleInternal handle, double threshold, WrappedBoolean stop) { // Prepare List<HIPAAIdentifierMatch> results = new ArrayList<HIPAAIdentifierMatch>(); // Check each attribute for (int column = 0; column < handle.getNumColumns(); column++) { // Init String attribute = handle.getAttributeName(column); String[] values = handle.getDistinctValues(column); // Match attribute name for (HIPAAIdentifierConfig config : configurations) { if (stop.value) { throw new ComputationInterruptedException(); } String match = config.getMatchingAttributeName(attribute); if (match != null) { results.add(new HIPAAIdentifierMatch(attribute, config.getIdentifier(), config.getInstance(), MatchType.ATTRIBUTE_NAME, match)); } } // Match attribute values for (HIPAAIdentifierConfig config : configurations) { int matches = 0; int nonmatches = 0; for (String value : values) { if (stop.value) { throw new ComputationInterruptedException(); } // Count matching values if (config.getMatchingAttributeValue(value) != null) { matches++; } else { // Break if too many non-matching values nonmatches++; double nonpercentage = (double)nonmatches / (double)values.length; if (nonpercentage > 1d - threshold) { break; } } } double percentage = (double)matches / (double)values.length; if (percentage > threshold) { results.add(new HIPAAIdentifierMatch(attribute, config.getIdentifier(), config.getInstance(), MatchType.ATTRIBUTE_VALUE, String.valueOf(percentage))); } } } // Return return results.toArray(new HIPAAIdentifierMatch[results.size()]); } /** * Creates the list of attributes */ private List<HIPAAIdentifierConfig> getConfigurations() { HIPAAConstants constants = HIPAAConstants.getUSData(); List<HIPAAIdentifierConfig> configurations = new ArrayList<HIPAAIdentifierConfig>(); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.NAME, "First name", new HIPAAMatcherFirstName(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.NAME, "Last name", new HIPAAMatcherLastName(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "City", new HIPAAMatcherCity(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "ZIP Code", new HIPAAMatcherZIP(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "State", new HIPAAMatcherState(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "Generic geographic subdivision", new HIPAAMatcherState(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.DATE, "Date/Time", new HIPAAMatcherDate(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.DATE, "Age", new HIPAAMatcherAge(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.EMAIL_ADDRESS, "Email address", new HIPAAMatcherEMail(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.SOCIAL_SECURITY_NUMBER, "Social security number", new HIPAAMatcherSSN())); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.URL, "URL", new HIPAAMatcherURL(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.IP, "IP Address", new HIPAAMatcherIP(constants))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.NAME, "Generic name", constants.getNameMatchers("Generic name"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.NAME, "First name", constants.getNameMatchers("First name"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.NAME, "Last name", constants.getNameMatchers("Last name"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "City", constants.getNameMatchers("City"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "ZIP Code", constants.getNameMatchers("ZIP Code"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "State", constants.getNameMatchers("State"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.GEOGRAPHIC_SUBDIVISION, "Generic geographic subdivision", constants.getNameMatchers("Generic geographic subdivision"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.DATE, "Date/Time", constants.getNameMatchers("Date/Time"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.TELEPHONE_NUMBER, "Phone number", constants.getNameMatchers("Phone number"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.FAX_NUMBER, "Fax number", constants.getNameMatchers("Phone number"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.EMAIL_ADDRESS, "Email address", constants.getNameMatchers("Email address"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.SOCIAL_SECURITY_NUMBER, "Social security number", constants.getNameMatchers("Social security number"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.CERTIFICATE_NUMBER, "Certificate number", constants.getNameMatchers("Certificate number"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.VEHICLE_IDENTIFIER, "Vehicle identifier", constants.getNameMatchers("Vehicle identifier"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.DEVICE_IDENTIFIER, "Device identifier", constants.getNameMatchers("Device identifier"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.URL, "URL", constants.getNameMatchers("URL"))); configurations.add(new HIPAAIdentifierConfig(HIPAAIdentifier.IP, "IP Address", constants.getNameMatchers("IP Address"))); return configurations; } }