// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.survivorship.services;
import java.util.HashMap;
import java.util.HashSet;
import org.talend.survivorship.model.Attribute;
import org.talend.survivorship.model.DataSet;
/**
* Service to determine the longest, shortest value, etc. of a given column.
*/
public class StringService extends AbstractService {
protected HashMap<String, HashSet<String>> longestValueMap;
protected HashMap<String, HashSet<String>> shortestValueMap;
protected HashMap<String, HashSet<String>> secondLongestValueMap;
protected HashMap<String, HashSet<String>> secondShortestValueMap;
/**
* StringService constructor.
*
* @param dataset
*/
public StringService(DataSet dataset) {
super(dataset);
longestValueMap = new HashMap<>();
shortestValueMap = new HashMap<>();
secondLongestValueMap = new HashMap<>();
secondShortestValueMap = new HashMap<>();
}
/**
* Put attribute values into the longest/shortest value map of a given column.
*
* @param column
* @param ignoreBlanks
* @param ignoreBlanks
* @return
*/
public void putAttributeValues(String column, boolean ignoreBlanks) {
HashSet<String> longestValues = new HashSet<>();
longestValueMap.put(column, longestValues);
HashSet<String> shortestValues = new HashSet<>();
shortestValueMap.put(column, shortestValues);
HashSet<String> secondLongestValues = new HashSet<>();
secondLongestValueMap.put(column, secondLongestValues);
HashSet<String> secondShortestValues = new HashSet<>();
secondShortestValueMap.put(column, secondShortestValues);
int max = 0;
int secondMax = 0;
int min = -1;
int secondMin = -1;
for (Attribute attr : dataset.getAttributesByColumn(column)) {
if (attr.isAlive()) {
String value = (String) attr.getValue();
if (value == null || (ignoreBlanks && "".equals(value.trim()))) { //$NON-NLS-1$
continue;
}
int length = value.length();
if (length > max) {
// max value changed so that orginal max value change to second max value
secondLongestValues.clear();
secondLongestValues.addAll(longestValues);
secondMax = secondMax == -1 ? Integer.MIN_VALUE : max;
longestValues.clear();
longestValues.add(value);
max = length;
} else if (length == max) {
longestValues.add(value);
} else if (secondMax < length && length < max) {
// find new second max value
secondLongestValues.clear();
secondLongestValues.add(value);
secondMax = length;
} else if (length == secondMax) {
// find another second max value
secondLongestValues.add(value);
}
if (length < min || min == -1) {
// min value changed so that orginal min value change to second min value
secondShortestValues.clear();
secondShortestValues.addAll(shortestValues);
secondMin = secondMin == -1 ? Integer.MAX_VALUE : min;
shortestValues.clear();
shortestValues.add(value);
min = length;
} else if (length == min) {
shortestValues.add(value);
} else if (secondMin > length && length > min) {
// find new second min value
secondShortestValues.clear();
secondShortestValues.add(value);
secondMin = length;
} else if (length == secondMin) {
// find another second min value
secondShortestValues.add(value);
}
}
}
}
/**
* Determine if an object is the longest value of a given column.
*
* @param var
* @param column
* @param ignoreBlanks
* @return
*/
public boolean isLongestValue(Object var, String column, boolean ignoreBlanks) {
if (longestValueMap.get(column) == null) {
putAttributeValues(column, ignoreBlanks);
}
return longestValueMap.get(column).contains(var);
}
/**
* Determine if an object is the second longest value of a given column.
*
* @param var
* @param column
* @param ignoreBlanks
* @return
*/
public boolean isSecondLongestValue(Object var, String column, boolean ignoreBlanks) {
if (secondLongestValueMap.get(column) == null || secondLongestValueMap.get(column).size() == 0) {
if (longestValueMap.get(column) == null) {
putAttributeValues(column, ignoreBlanks);
} else {
// when secondLongestValueMap is null but longestValueMap is not mean that there is exist same length data only so
// that secondLongest equals Longest
return longestValueMap.get(column).contains(var);
}
}
return secondLongestValueMap.get(column).contains(var);
}
/**
* Determine if an object is the second shortest value of a given column.
*
* @param var
* @param column
* @return
*/
public boolean isShortestValue(Object var, String column, boolean ignoreBlanks) {
if (shortestValueMap.get(column) == null) {
putAttributeValues(column, ignoreBlanks);
}
return shortestValueMap.get(column).contains(var);
}
/**
* Determine if an object is the shortest value of a given column.
*
* @param var
* @param column
* @return
*/
public boolean isSecondShortestValue(Object var, String column, boolean ignoreBlanks) {
if (secondShortestValueMap.get(column) == null || secondShortestValueMap.get(column).size() == 0) {
if (shortestValueMap.get(column) == null) {
putAttributeValues(column, ignoreBlanks);
} else {
// when secondshortestValueMap is null but shortestValueMap is not mean that there is exist same length data only
// so
// that secondshortest equals shortest
return shortestValueMap.get(column).contains(var);
}
}
return secondShortestValueMap.get(column).contains(var);
}
/*
* (non-Javadoc)
*
* @see org.talend.survivorship.services.AbstractService#init()
*/
@Override
public void init() {
longestValueMap.clear();
shortestValueMap.clear();
secondShortestValueMap.clear();
secondLongestValueMap.clear();
}
}