/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.risk; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.deidentifier.arx.risk.resources.us.HIPAAConstantsUS; /** * Utility class providing access to important constants for finding HIPAA identifiers. * * @author Fabian Prasser */ public abstract class HIPAAConstants { /** US data*/ private static final HIPAAConstants dataUS = new HIPAAConstantsUS(); /** * Returns constants for the US * @return */ public static HIPAAConstants getUSData() { return dataUS; } /** Cities */ private Set<String> cities = null; /** First names */ private Set<String> firstnames = null; /** Last names */ private Set<String> lastnames = null; /** States */ private Set<String> states = null; /** Zip codes */ private Set<String> zipcodes = null; /** Labels */ private Map<String, Map<String, Integer>> labels = null; /** Default charset */ private static final Charset CHARSET = StandardCharsets.UTF_8; /** * Returns all matchers for the given category * @param category * @return */ public List<HIPAAMatcherAttributeName> getNameMatchers(String category) { // Check if (!getNameConfigurations().containsKey(category)) { return new ArrayList<HIPAAMatcherAttributeName>(); } // Collect each matcher List<HIPAAMatcherAttributeName> result = new ArrayList<HIPAAMatcherAttributeName>(); for (Entry<String, Integer> entry2 : getNameConfigurations().get(category).entrySet()) { result.add(new HIPAAMatcherAttributeName(entry2.getKey(), entry2.getValue())); } // Return return result; } /** * Cities * @param value * @return */ public boolean isCity(String value) { return getCities().contains(value); } /** * First names * @param value * @return */ public boolean isFirstname(String value) { return getFirstnames().contains(value); } /** * Last names * * @param value * @return */ public boolean isLastname(String value) { return getLastnames().contains(value); } /** * States * * @param value * @return */ public boolean isState(String value) { return getStates().contains(value); } /** * Zip codes * * @param value * @return */ public boolean isZipcode(String value) { return getZipcodes().contains(value); } /** Cities */ private Set<String> getCities() { if (cities == null) { cities = load("cities.csv"); } return cities; } /** First names */ private Set<String> getFirstnames() { if (firstnames == null) { firstnames = load("firstnames.csv"); } return firstnames; } /** Last names */ private Set<String> getLastnames() { if (lastnames == null) { lastnames = load("lastnames.csv"); } return lastnames; } /** * Returns all name configurations * @return */ private Map<String, Map<String, Integer>> getNameConfigurations() { if (this.labels == null) { InputStream stream = getInputStream("labels.properties"); BufferedReader br = new BufferedReader(new InputStreamReader(stream, CHARSET)); this.labels = new HashMap<String, Map<String, Integer>>(); try { String line = br.readLine(); while (line != null) { String[] parts = line.split("="); String label = parts[0]; parts = parts[1].split(","); Map<String, Integer> map = new HashMap<String, Integer>(); for (int i = 0; i < parts.length; i += 2) { map.put(parts[i], Integer.valueOf(parts[i+1])); } labels.put(label, map); line = br.readLine(); } } catch (Exception e) { e.printStackTrace(); } finally { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } return this.labels; } /** States */ private Set<String> getStates() { if (states == null) { states = load("states.csv"); } return states; } /** Zip codes */ private Set<String> getZipcodes() { if (zipcodes == null) { zipcodes = load("zipcodes.csv"); } return zipcodes; } /** * Loads the given set of resources * @param file * @return */ private Set<String> load(String file) { InputStream stream = getInputStream(file); BufferedReader br = new BufferedReader(new InputStreamReader(stream, CHARSET)); Set<String> set = new HashSet<String>(); try { String line = br.readLine(); while (line != null) { set.add(line); line = br.readLine(); } } catch (IOException e) { e.printStackTrace(); } finally { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } return set; } /** * Implement this to load the according file * @param file * @return */ protected abstract InputStream getInputStream(String file); }