/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2009 Alex Buloichik
2010 Arno Peters
2013-2014 Alex Buloichik
2015 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.statistics;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.omegat.core.Core;
import org.omegat.core.data.IProject;
import org.omegat.core.data.IProject.FileInfo;
import org.omegat.core.data.ProjectProperties;
import org.omegat.core.data.ProtectedPart;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.data.TMXEntry;
import org.omegat.core.threads.LongProcessThread;
import org.omegat.gui.stat.StatisticsPanel;
import org.omegat.util.OConsts;
import org.omegat.util.OStrings;
import org.omegat.util.StaticUtils;
import org.omegat.util.gui.TextUtil;
/**
* Thread for calculate standard statistics.
*
* Calculation requires two different tags stripping: one for calculate unique and remaining, and second for
* calculate number of words and chars.
*
* Number of words/chars calculation requires to just strip all tags, protected parts, placeholders(see StatCount.java).
*
* Calculation of unique and remaining also requires to just strip all tags, protected parts, placeholders for
* standard calculation.
*
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Arno Peters
* @author Aaron Madlon-Kay
*/
public class CalcStandardStatistics extends LongProcessThread {
private static final String[] HT_HEADERS = { "", OStrings.getString("CT_STATS_Segments"),
OStrings.getString("CT_STATS_Words"), OStrings.getString("CT_STATS_Characters_NOSP"),
OStrings.getString("CT_STATS_Characters"), OStrings.getString("CT_STATS_Files") };
private static final String[] HT_ROWS = { OStrings.getString("CT_STATS_Total"),
OStrings.getString("CT_STATS_Remaining"), OStrings.getString("CT_STATS_Unique"),
OStrings.getString("CT_STATS_Unique_Remaining") };
private static final boolean[] HT_ALIGN = new boolean[] { false, true, true, true, true, true };
private static final String[] FT_HEADERS = { OStrings.getString("CT_STATS_FILE_Name"),
OStrings.getString("CT_STATS_FILE_Total_Segments"),
OStrings.getString("CT_STATS_FILE_Remaining_Segments"),
OStrings.getString("CT_STATS_FILE_Unique_Segments"),
OStrings.getString("CT_STATS_FILE_Unique_Remaining_Segments"),
OStrings.getString("CT_STATS_FILE_Total_Words"),
OStrings.getString("CT_STATS_FILE_Remaining_Words"),
OStrings.getString("CT_STATS_FILE_Unique_Words"),
OStrings.getString("CT_STATS_FILE_Unique_Remaining_Words"),
OStrings.getString("CT_STATS_FILE_Total_Characters_NOSP"),
OStrings.getString("CT_STATS_FILE_Remaining_Characters_NOSP"),
OStrings.getString("CT_STATS_FILE_Unique_Characters_NOSP"),
OStrings.getString("CT_STATS_FILE_Unique_Remaining_Characters_NOSP"),
OStrings.getString("CT_STATS_FILE_Total_Characters"),
OStrings.getString("CT_STATS_FILE_Remaining_Characters"),
OStrings.getString("CT_STATS_FILE_Unique_Characters"),
OStrings.getString("CT_STATS_FILE_Unique_Remaining_Characters"), };
private static final boolean[] FT_ALIGN = { false, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true, true, };
private final StatisticsPanel callback;
public CalcStandardStatistics(StatisticsPanel callback) {
this.callback = callback;
}
@Override
public void run() {
IProject p = Core.getProject();
String result = buildProjectStats(p, null, callback);
callback.setTextData(result);
callback.finishData();
String internalDir = p.getProjectProperties().getProjectInternal();
// removing old stats
try {
File oldstats = new File(internalDir + "word_counts");
if (oldstats.exists()) {
oldstats.delete();
}
} catch (Exception e) {
}
// now dump file based word counts to disk
String fn = internalDir + OConsts.STATS_FILENAME;
Statistics.writeStat(fn, result);
callback.setDataFile(fn);
}
/** Convenience method */
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat) {
return buildProjectStats(project, hotStat, null);
}
/**
* Builds a file with statistic info about the project. The total word &
* character count of the project, the total number of unique segments, plus
* the details for each file.
*/
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat,
final StatisticsPanel callback) {
StatCount total = new StatCount();
StatCount remaining = new StatCount();
StatCount unique = new StatCount();
StatCount remainingUnique = new StatCount();
// find unique segments
Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
Set<String> translated = new HashSet<String>();
for (SourceTextEntry ste : project.getAllEntries()) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
if (!uniqueSegment.containsKey(src)) {
uniqueSegment.put(src, ste);
}
TMXEntry tr = project.getTranslationInfo(ste);
if (tr.isTranslated()) {
translated.add(src);
}
}
Set<String> filesUnique = new HashSet<String>();
Set<String> filesRemainingUnique = new HashSet<String>();
for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(en.getValue());
// add to unique
unique.add(count);
filesUnique.add(en.getValue().getKey().file);
// add to unique remaining
if (!translated.contains(en.getKey())) {
remainingUnique.add(count);
filesRemainingUnique.add(en.getValue().getKey().file);
}
}
unique.addFiles(filesUnique.size());
remainingUnique.addFiles(filesRemainingUnique.size());
List<FileData> counts = new ArrayList<FileData>();
Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
for (FileInfo file : project.getProjectFiles()) {
FileData numbers = new FileData();
numbers.filename = file.filePath;
counts.add(numbers);
int fileTotal = 0;
int fileRemaining = 0;
for (SourceTextEntry ste : file.entries) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(ste);
// add to total
total.add(count);
fileTotal = 1;
// add to remaining
TMXEntry tr = project.getTranslationInfo(ste);
if (!tr.isTranslated()) {
remaining.add(count);
fileRemaining = 1;
}
// add to file's info
numbers.total.add(count);
Boolean firstSeen = firstSeenUniqueSegment.get(src);
if (firstSeen == null) {
firstSeenUniqueSegment.put(src, false);
numbers.unique.add(count);
if (!tr.isTranslated()) {
numbers.remainingUnique.add(count);
}
}
if (!tr.isTranslated()) {
numbers.remaining.add(count);
}
}
total.addFiles(fileTotal);
remaining.addFiles(fileRemaining);
}
StringBuilder result = new StringBuilder();
result.append(OStrings.getString("CT_STATS_Project_Statistics"));
result.append("\n\n");
String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
if (callback != null) {
callback.setProjectTableData(HT_HEADERS, headerTable);
}
result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
result.append("\n\n");
// STATISTICS BY FILE
result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
result.append("\n\n");
String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
if (callback != null) {
callback.setFilesTableData(FT_HEADERS, filesTable);
}
result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
if (hotStat != null) {
hotStat.numberOfSegmentsTotal = total.segments;
hotStat.numberofTranslatedSegments = translated.size();
hotStat.numberOfUniqueSegments = unique.segments;
hotStat.uniqueCountsByFile.clear();
for (FileData fd : counts) {
hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
}
}
return result.toString();
}
protected static String[][] calcHeaderTable(final StatCount[] result) {
String[][] table = new String[result.length][6];
for (int i = 0; i < result.length; i++) {
table[i][0] = HT_ROWS[i];
table[i][1] = Integer.toString(result[i].segments);
table[i][2] = Integer.toString(result[i].words);
table[i][3] = Integer.toString(result[i].charsWithoutSpaces);
table[i][4] = Integer.toString(result[i].charsWithSpaces);
table[i][5] = Integer.toString(result[i].files);
}
return table;
}
protected static String[][] calcFilesTable(final ProjectProperties config, final List<FileData> counts) {
String[][] table = new String[counts.size()][17];
int r = 0;
for (FileData numbers : counts) {
table[r][0] = StaticUtils.makeFilenameRelative(numbers.filename, config.getSourceRoot());
table[r][1] = Integer.toString(numbers.total.segments);
table[r][2] = Integer.toString(numbers.remaining.segments);
table[r][3] = Integer.toString(numbers.unique.segments);
table[r][4] = Integer.toString(numbers.remainingUnique.segments);
table[r][5] = Integer.toString(numbers.total.words);
table[r][6] = Integer.toString(numbers.remaining.words);
table[r][7] = Integer.toString(numbers.unique.words);
table[r][8] = Integer.toString(numbers.remainingUnique.words);
table[r][9] = Integer.toString(numbers.total.charsWithoutSpaces);
table[r][10] = Integer.toString(numbers.remaining.charsWithoutSpaces);
table[r][11] = Integer.toString(numbers.unique.charsWithoutSpaces);
table[r][12] = Integer.toString(numbers.remainingUnique.charsWithoutSpaces);
table[r][13] = Integer.toString(numbers.total.charsWithSpaces);
table[r][14] = Integer.toString(numbers.remaining.charsWithSpaces);
table[r][15] = Integer.toString(numbers.unique.charsWithSpaces);
table[r][16] = Integer.toString(numbers.remainingUnique.charsWithSpaces);
r++;
}
return table;
}
public static class FileData {
public String filename;
public StatCount total, unique, remaining, remainingUnique;
public FileData() {
total = new StatCount();
unique = new StatCount();
remaining = new StatCount();
remainingUnique = new StatCount();
}
}
}