/**
* <copyright> </copyright>
*
* $Id$
*/
package org.talend.dataquality.indicators.impl;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.lang.StringUtils;
import org.eclipse.core.runtime.FileLocator;
import org.eclipse.core.runtime.Platform;
import org.eclipse.emf.ecore.EClass;
import org.osgi.framework.Bundle;
import org.talend.core.model.metadata.builder.connection.Connection;
import org.talend.core.model.metadata.builder.connection.MetadataColumn;
import org.talend.cwm.helper.ConnectionHelper;
import org.talend.cwm.helper.SwitchHelpers;
import org.talend.dataquality.indicators.DatePatternFreqIndicator;
import org.talend.dataquality.indicators.IndicatorsPackage;
import org.talend.dataquality.indicators.mapdb.DBMap;
import org.talend.dataquality.matching.date.pattern.DatePatternRetriever;
import org.talend.dataquality.matching.date.pattern.ModelMatcher;
/**
* <!-- begin-user-doc --> An implementation of the model object '<em><b>Date Pattern Freq Indicator</b></em>'. <!--
* end-user-doc -->
* <p>
* </p>
*
* @generated
*/
public class DatePatternFreqIndicatorImpl extends FrequencyIndicatorImpl implements DatePatternFreqIndicator {
private static final String PATTERNS_FILENAME = "PatternsNameAndRegularExpressions.txt";
private DatePatternRetriever dateRetriever;
private boolean isDelimtedFile = false;
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
* @generated
*/
protected DatePatternFreqIndicatorImpl() {
super();
}
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
* @generated
*/
@Override
protected EClass eStaticClass() {
return IndicatorsPackage.Literals.DATE_PATTERN_FREQ_INDICATOR;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.PatternFreqIndicatorImpl#prepare()
*/
@Override
public boolean prepare() {
initDateRetriever();
// MOD qiongli 2011-11-15 TDQ-3864,judge if it is file connection.
MetadataColumn mdColumn = SwitchHelpers.METADATA_COLUMN_SWITCH.doSwitch(this.getAnalyzedElement());
if (mdColumn != null) {
Connection Connection = ConnectionHelper.getTdDataProvider(mdColumn);
if (Connection != null && SwitchHelpers.DELIMITEDFILECONNECTION_SWITCH.doSwitch(Connection) != null) {
isDelimtedFile = true;
}
}
return super.prepare();
}
/**
* Extract it from 'prepare()'.initialize DatePatternRetriever.
*/
private void initDateRetriever() {
dateRetriever = new DatePatternRetriever();
URL url = null;
if (Platform.isRunning()) {
Bundle bundle = Platform.getBundle("org.talend.dataquality.matching"); //$NON-NLS-1$
url = bundle.getResource(PATTERNS_FILENAME);
String filepath = null;
try {
filepath = FileLocator.toFileURL(url).getFile();
} catch (IOException e) {
e.printStackTrace();
}
File file = new File(filepath);
dateRetriever.initModel2Regex(file);
} else {
InputStream inStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(PATTERNS_FILENAME);
dateRetriever.initModel2Regex(inStream);
}
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.PatternFreqIndicatorImpl#handle(java.lang.Object)
*/
@Override
public boolean handle(Object data) {
mustStoreRow = false;
if (data != null) {
// MOD qiongli 2011-11-11 TDQ-3864,format the date for file connection.
if (data instanceof Date && isDelimtedFile) {
MetadataColumn mdColumn = SwitchHelpers.METADATA_COLUMN_SWITCH.doSwitch(this.getAnalyzedElement());
String pattern = mdColumn.getPattern();
if (pattern != null) {
pattern = StringUtils.replace(pattern, "\"", StringUtils.EMPTY);
SimpleDateFormat sdf = new SimpleDateFormat(pattern);
data = sdf.format((Date) data);
}
}
List<ModelMatcher> findMatchers = dateRetriever.findMatchers(String.valueOf(data));
for (ModelMatcher matcher : findMatchers) {
if (matcher != null) {
data = matcher.getModel();
matcher.increment();
mustStoreRow = mustStoreRow || this.checkMustStoreCurrentRow(Long.valueOf(matcher.getScore() - 1));
}
}
} else {
nullCount++;
}
count++;
return true;
}
@Override
public Double getFrequency(Object dataValue) {
if (this.count.compareTo(0L) == 0) {
return Double.NaN;
}
ModelMatcher matcher = null;
if (dataValue instanceof ModelMatcher) {
matcher = (ModelMatcher) dataValue;
return ((double) matcher.getScore()) / this.getCount().longValue();
} else {
return super.getFrequency(dataValue);
}
}
@Override
public boolean finalizeComputation() {
List<ModelMatcher> modelMatchers = dateRetriever.getModelMatchers();
HashMap<Object, Long> map = new HashMap<Object, Long>();
for (ModelMatcher matcher : modelMatchers) {
map.put(matcher.getModel(), (long) matcher.getScore());
}
// this clear is necessary, because in the map contains the parent'result.
getMapForFreq().clear();
getMapForFreq().putAll(map);
return super.finalizeComputation();
}
@Override
public List<ModelMatcher> getModelMatcherList() {
return dateRetriever.getModelMatchers();
}
/**
* return List for ModelMatcher which Score more than 1.
*
* @deprecated use {@link #getResult()} instead
*/
@Deprecated
@Override
public List<Object> getRealModelMatcherList() {
List<Object> realModelMatcherList = new ArrayList<Object>();
for (ModelMatcher matcher : dateRetriever.getModelMatchers()) {
if (matcher.getScore() > 0) {
realModelMatcherList.add(matcher);
}
}
return realModelMatcherList;
}
@Override
public String getModel(Object matcher) {
if (matcher instanceof ModelMatcher) {
return ((ModelMatcher) matcher).getModel();
} else {
return null;
}
}
@Override
public int getScore(Object matcher) {
if (matcher instanceof ModelMatcher) {
return ((ModelMatcher) matcher).getScore();
} else {
return -1;
}
}
/**
*
* DOC zshen Comment method "getRegex".
*
* @param model the model of matcher.
* @return if can find corresponding to matcher return it's the Regex of matcher else return null;
*/
@Override
public String getRegex(String model) {
// TDQ-9779. Avoid NPE.
if (dateRetriever == null) {
initDateRetriever();
}
return this.dateRetriever.getRegex(model);
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.DatePatternFreqIndicator#getResult()
*/
@Override
public Map<String, Long> getResult() {
Map<String, Long> result = new TreeMap<String, Long>();
HashMap<Object, Long> values = this.getValueToFreq();
// add the value which greater than zero into the result
Iterator<Object> iterator = values.keySet().iterator();
while (iterator.hasNext()) {
Object key = iterator.next();
Long value = values.get(key);
if (value > 0) {
result.put(key.toString(), value);
}
}
return result;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.FrequencyIndicatorImpl#handleDrillDownData(java.lang.Object,
* java.util.List)
*/
@Override
public void handleDrillDownData(Object masterObject, List<Object> inputRowList) {
List<ModelMatcher> matchers = dateRetriever.findMatchers(String.valueOf(masterObject));
for (ModelMatcher matcher : matchers) {
drillDownMap = (DBMap<Object, List<Object>>) getMapDB(matcher.getModel());
// check the size of limite
if (this.checkMustStoreCurrentRow(Long.valueOf(matcher.getScore() - 1))) {
drillDownMap.put(matcher.getScore(), inputRowList);
}
}
}
} // DatePatternFreqIndicatorImpl