// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.statistics.text; import java.util.List; import org.talend.dataquality.common.inference.Analyzer; import org.talend.dataquality.common.inference.ResizableList; /** * Text length analyzer compute the length of min,max and average length of the record sets.<br> * For more details please refer to documentation: <a * href="https://help.talend.com/pages/viewpage.action?pageId=261412880&thc_login=done">Text statistics</a> * * @author zhao * */ public class TextLengthAnalyzer implements Analyzer<TextLengthStatistics> { private static final long serialVersionUID = -9106960246571082963L; private ResizableList<TextLengthStatistics> textStatistics = new ResizableList<>(TextLengthStatistics.class); @Override public void init() { textStatistics.clear(); } @Override public boolean analyze(String... record) { if (record == null) { return true; } textStatistics.resize(record.length); for (int i = 0; i < record.length; i++) { TextLengthStatistics freqStats = textStatistics.get(i); freqStats.add(record[i]); } return true; } @Override public void end() { // Nothing to be done. } @Override public List<TextLengthStatistics> getResult() { return textStatistics; } @Override public Analyzer<TextLengthStatistics> merge(Analyzer<TextLengthStatistics> another) { int idx = 0; TextLengthAnalyzer mergedTextLengthAnalyzer = new TextLengthAnalyzer(); ((ResizableList<TextLengthStatistics>) mergedTextLengthAnalyzer.getResult()).resize(textStatistics.size()); for (TextLengthStatistics stats : textStatistics) { TextLengthStatistics mergedStats = mergedTextLengthAnalyzer.getResult().get(idx); TextLengthStatistics anotherStats = another.getResult().get(idx); mergedStats.setCount(stats.getCount() + anotherStats.getCount()); mergedStats.setCountIgnoreBlank(stats.getCountIgnoreBlank() + anotherStats.getCountIgnoreBlank()); // Merge min mergedStats.setMinTextLength(mergeMinMaxStats(stats.getMinTextLength(), anotherStats.getMinTextLength(), true)); mergedStats.setMinTextLengthIgnoreBlank(mergeMinMaxWithBlankStats(stats.getMinTextLengthIgnoreBlank(), anotherStats.getMinTextLengthIgnoreBlank(), true)); // Merge max mergedStats.setMaxTextLength(mergeMinMaxStats(stats.getMaxTextLength(), anotherStats.getMaxTextLength(), false)); mergedStats.setMaxTextLengthIgnoreBlank(mergeMinMaxWithBlankStats(stats.getMaxTextLengthIgnoreBlank(), anotherStats.getMaxTextLengthIgnoreBlank(), false)); // Merge sum mergedStats.setSumTextLength(mergeSumStats(stats.getSumTextLength(), anotherStats.getSumTextLength())); mergedStats.setSumTextLengthIgnoreBlank( mergeSumStats(stats.getSumTextLengthIgnoreBlank(), anotherStats.getSumTextLengthIgnoreBlank())); idx++; } return mergedTextLengthAnalyzer; } private Integer mergeMinMaxStats(Integer one, Integer another, boolean isMin) { if (isMin) { return one < another ? one : another; } else { return one > another ? one : another; } } private Integer mergeMinMaxWithBlankStats(Integer one, Integer another, boolean isMin) { if (one == null || one == 0) { return another; } else { if (another != null && another != 0) { if (isMin) { return one < another ? one : another; } else { return one > another ? one : another; } } else { return one; } } } private Integer mergeSumStats(Integer one, Integer another) { if (one == null) { return another; } else { if (another != null) { return one + another; } else { return one; } } } @Override public void close() throws Exception { } }