package edu.harvard.i2b2.crc.util; import java.util.StringTokenizer; import edu.harvard.i2b2.crc.dao.DAOFactoryHelper; public class ContainsUtil { public static final String REMOVE_PUNCTUATION="[\\p{Punct}&&[^-()<>.\\*%/]]"; public String formatValue(String containsValue, String dbServerType) { if (containsValue == null) { return null; } //1: check if value is enclosed in [] if (containsValue.startsWith("[") && containsValue.endsWith("]")) { return containsValue.substring(1,containsValue.length()-1).replaceAll("'","''"); } //2: check if value is enclosed in "" if (containsValue.startsWith("\"") && containsValue.endsWith("\"")) { if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.ORACLE) == false) { return "\"" + containsValue.substring(1,containsValue.length()-1).replaceAll(REMOVE_PUNCTUATION,"") + "\""; } else { return containsValue.substring(1,containsValue.length()-1).replaceAll(REMOVE_PUNCTUATION,""); } } boolean textWithoutOperator = true; if (containsValue.indexOf("-") > 0 || containsValue.indexOf("AND") > 0 || containsValue.indexOf("OR") > 0 || containsValue.indexOf("*") > 0) { textWithoutOperator = false; } //3: remove punctuation String punctuationStr = containsValue.replaceAll(REMOVE_PUNCTUATION,""); //4 word start with "-", then add NOT StringTokenizer strTokenizer = new StringTokenizer(punctuationStr); String singleToken = null; String notStr = ""; boolean noOperator = true; while(strTokenizer.hasMoreTokens()) { singleToken = strTokenizer.nextToken(); if (singleToken.startsWith("-")) { notStr += singleToken.replaceAll("(-)"," NOT "); noOperator = false; } else { notStr += singleToken; if (strTokenizer.hasMoreTokens()) { notStr += " "; } } } //5 replace CAPS AND with accum (only for oracle) String accumStr = ""; if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.ORACLE)) { accumStr = notStr.replaceAll("\\s(AND)\\s", " ACCUM "); } else { accumStr = notStr.replaceAll("\\s(AND)\\s", " "); } //6: replace "*" with %, or *(only for oracle) String starStr = accumStr; if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.ORACLE)) { starStr = accumStr.replaceAll("(\\*)", "%"); } System.out.println("start value [" + starStr + "]"); //7: replace DB_AND with AND String andStr = starStr.replaceAll("\\s(DB_AND)\\s", " AND "); //8: replace OR with minus(only for oracle) String orStr = andStr; if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.ORACLE)) { orStr = andStr.replaceAll("\\s(OR)\\s", " MINUS "); } //9: single String finalStr = orStr; if (textWithoutOperator) { //split the words String defaultAccumStr = ""; StringTokenizer accumTokenizer = new StringTokenizer(finalStr); while(accumTokenizer.hasMoreTokens()) { singleToken = accumTokenizer.nextToken(); if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.ORACLE) || dbServerType.equalsIgnoreCase(DAOFactoryHelper.POSTGRESQL)) { defaultAccumStr += singleToken; } else if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.SQLSERVER)) { defaultAccumStr += "\"" + singleToken + "\""; } if (accumTokenizer.hasMoreTokens()) { if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.ORACLE)) { defaultAccumStr += " ACCUM "; } else if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.SQLSERVER)) { defaultAccumStr += " OR "; } else if (dbServerType.equalsIgnoreCase(DAOFactoryHelper.POSTGRESQL)) { defaultAccumStr += " | "; } } } return defaultAccumStr; } else { return finalStr; } } public static void main(String[] args) { ContainsUtil conUtil = new ContainsUtil(); String formattedVal = conUtil.formatValue("MRI Knee","SQLSERVER"); System.out.println("formattedVal[" + formattedVal + "]"); } }