/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.risk;
import java.util.Calendar;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.validator.routines.CalendarValidator;
import org.apache.commons.validator.routines.DateValidator;
import org.apache.commons.validator.routines.EmailValidator;
import org.apache.commons.validator.routines.InetAddressValidator;
import org.apache.commons.validator.routines.UrlValidator;
import org.apache.commons.validator.routines.checkdigit.IBANCheckDigit;
import org.deidentifier.arx.DataType;
/**
* Interfaces the patterns
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
abstract class HIPAAMatcherAttributeValue {
/**
* Pattern which matches numbers and checks whether they are ages
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherAge extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherAge(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
if (value.isEmpty()) {
return false;
}
try {
int number = Integer.valueOf(value);
return (number >= 0 && number <= 130);
} catch (Exception e) {
return false;
}
}
}
/**
* Pattern which matches a city with a predefined list of cities
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherCity extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherCity(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
value = value.trim().toLowerCase();
return constants.isCity(value);
}
}
/**
* Pattern which matches Dates and years older than 89
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherDate extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherDate(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
if (value.isEmpty()) {
return false;
}
value = value.toLowerCase().trim();
if (isDate(value)) {
return true;
}
return isYearOlderThan89(value);
}
/**
* @param value Cell content
* @return True if input is a date
*/
private boolean isDate(String value) {
DateValidator validator = DateValidator.getInstance();
for (String format : DataType.DATE.getDescription().getExampleFormats()) {
if (validator.isValid(value, format)) {
return true;
}
}
return false;
}
/**
* @param value Cell content
* @return True if input is a year and older than 89
*/
private boolean isYearOlderThan89(String value) {
if (value.length() != 4) {
return false;
}
CalendarValidator validator = CalendarValidator.getInstance();
Calendar date = validator.validate(value, "yyyy");
if (date == null) {
return false;
}
int birthYear = validator.validate(value, "yyyy").get(Calendar.YEAR);
int currentYear = Calendar.getInstance().get(Calendar.YEAR);
return ((currentYear - birthYear) > 89) && ((currentYear - birthYear) < 130); // Filter out differences above 130, as humans do not get older than that
}
}
/**
* Pattern which matches email addresses
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherEMail extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherEMail(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
EmailValidator validator = EmailValidator.getInstance();
return validator.isValid(value);
}
}
/**
* Pattern which matches first names with a predefined list of names
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherFirstName extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherFirstName(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
value = value.trim().toLowerCase();
return constants.isFirstname(value);
}
}
/**
* Pattern which matches IBAN account numbers
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherIBAN extends HIPAAMatcherString {
HIPAAMatcherIBAN() {
super("[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,16}");
}
@Override
public boolean matches(String value) {
if (!super.matches(value)) {
return false;
}
value = value.replaceAll("\\s+", "");
IBANCheckDigit validator = new IBANCheckDigit();
return validator.isValid(value);
}
}
/**
* Pattern which maches IPv4 and IPv6 addresses
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherIP extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherIP(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
InetAddressValidator validator = InetAddressValidator.getInstance();
return validator.isValid(value);
}
}
/**
* Pattern which matches last names with a predefined list of names
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherLastName extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherLastName(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
value = value.trim().toLowerCase();
return constants.isLastname(value);
}
}
/**
* Pattern which matches the social security numbers
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherSSN extends HIPAAMatcherString {
HIPAAMatcherSSN() {
super("[0-9]{3}-[0-9]{2}-[0-9]{4}|[0-9]{9}");
}
}
/**
* Pattern which matches a state with a predefined list of states
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherState extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherState(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
value = value.trim().toLowerCase();
return constants.isState(value);
}
}
/**
* Pattern which matches a string with the provided regular expression
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static abstract class HIPAAMatcherString extends HIPAAMatcherAttributeValue {
Matcher matcher;
HIPAAMatcherString(String regex) {
super(null);
Pattern pattern = Pattern.compile(regex);
matcher = pattern.matcher("");
}
@Override
public boolean matches(String value) {
return matcher.reset(value).matches();
}
}
/**
* Pattern which matches an URL
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherURL extends HIPAAMatcherAttributeValue {
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherURL(HIPAAConstants constants) {
super(constants);
}
@Override
public boolean matches(String value) {
UrlValidator validator = UrlValidator.getInstance();
return validator.isValid(value);
}
}
/**
* Pattern which matches names a vehicle identification number
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherVIN extends HIPAAMatcherString {
HIPAAMatcherVIN() {
super("[0-9A-Z]{17}");
}
@Override
public boolean matches(String value) {
value = value.replaceAll("\\s+", "").replaceAll("-", "");
return super.matches(value);
}
}
/**
* Pattern which matches a ZIP code
* @author Florian Kohlmayer, Fabian Prasser, David Gassmann
*/
static class HIPAAMatcherZIP extends HIPAAMatcherAttributeValue {
private Set<String> zipCodes;
/**
* Creates a new instance
* @param constants
*/
HIPAAMatcherZIP(HIPAAConstants constants) {
super(constants);
zipCodes = new HashSet<>();
zipCodes.add("036");
zipCodes.add("059");
zipCodes.add("063");
zipCodes.add("102");
zipCodes.add("203");
zipCodes.add("556");
zipCodes.add("692");
zipCodes.add("790");
zipCodes.add("821");
zipCodes.add("823");
zipCodes.add("830");
zipCodes.add("831");
zipCodes.add("878");
zipCodes.add("879");
zipCodes.add("884");
zipCodes.add("890");
zipCodes.add("893");
}
@Override
public boolean matches(String value) {
value = value.replaceAll("\\s+", "").replaceAll("-", "");
if (constants.isZipcode(value)) {
if (value.length() >= 3) {
String zipCode = value.substring(0, 3);
return zipCodes.contains(zipCode);
}
}
return false;
}
}
/** Constants*/
protected final HIPAAConstants constants;
/**
* Creates a new instance
* @param constants
*/
public HIPAAMatcherAttributeValue(HIPAAConstants constants) {
this.constants = constants;
}
/**
* Returns true if the value matches the given Pattern.
* @param value
* @return
*/
public abstract boolean matches(String value);
}