/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey, Maxym Mykhalchuk, and Henry Pijffers
2007 Zoltan Bartko
2009 Didier Briel, Alex Buloichik
2012 Thomas Cordonnier
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.statistics;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.text.BreakIterator;
import java.text.DateFormat;
import java.util.Date;
import org.omegat.tokenizer.DefaultTokenizer;
import org.omegat.util.Log;
import org.omegat.util.PatternConsts;
import org.omegat.util.StaticUtils;
/**
* Save project statistic into text file.
*
* @author Keith Godfrey
* @author Henry Pijffers (henry.pijffers@saxnot.com)
* @author Maxym Mykhalchuk
* @author Zoltan Bartko (bartkozoltan@bartkozoltan.com)
* @author Didier Briel
* @author Alex Buloichik (alex73mail@gmail.com)
* @author Thomas Cordonnier
*/
public class Statistics {
protected static final int PERCENT_EXACT_MATCH = 101;
protected static final int PERCENT_REPETITIONS = 102;
/**
* Computes the number of characters excluding spaces in a string. Special
* char for tag replacement not calculated.
*/
public static int numberOfCharactersWithoutSpaces(String str) {
int chars = 0;
for (int cp, i = 0; i < str.length(); i += Character.charCount(cp)) {
cp = str.codePointAt(i);
if (cp != StaticUtils.TAG_REPLACEMENT_CHAR && !Character.isSpaceChar(cp)) {
chars++;
}
}
return chars;
}
/**
* Computes the number of characters with spaces in a string. Special char
* for tag replacement not calculated.
*/
public static int numberOfCharactersWithSpaces(String str) {
int chars = 0;
for (int cp, i = 0; i < str.length(); i += Character.charCount(cp)) {
cp = str.codePointAt(i);
if (cp != StaticUtils.TAG_REPLACEMENT_CHAR) {
chars++;
}
}
return chars;
}
/** Computes the number of words in a string. */
public static int numberOfWords(String str) {
int len = str.length();
if (len == 0)
return 0;
int nTokens = 0;
BreakIterator breaker = DefaultTokenizer.getWordBreaker();
breaker.setText(str);
String tokenStr = "";
int start = breaker.first();
for (int end = breaker.next(); end != BreakIterator.DONE; start = end, end = breaker.next()) {
tokenStr = str.substring(start, end);
boolean word = false;
for (int cp, i = 0; i < tokenStr.length(); i += Character.charCount(cp)) {
cp = tokenStr.codePointAt(i);
if (Character.isLetterOrDigit(cp)) {
word = true;
break;
}
}
if (word && !PatternConsts.OMEGAT_TAG.matcher(tokenStr).matches()) {
nTokens++;
}
}
return nTokens;
}
/**
* Write text to file.
*
* @param filename
* @param data
*/
public static void writeStat(String filename, String text) {
try {
OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(filename), StandardCharsets.UTF_8);
try {
out.write(DateFormat.getInstance().format(new Date()) + "\n");
out.write(text);
out.flush();
} finally {
out.close();
}
} catch (Exception ex) {
Log.log(ex);
}
}
}