/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2015 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.mozilla.universalchardet.UniversalDetector;
public final class EncodingDetector {
private EncodingDetector() {
}
/**
* Detect the encoding of the supplied file.
* Convenience method for {@link #detectEncoding(java.io.InputStream)}.
*/
public static String detectEncoding(File inFile) throws IOException {
try (FileInputStream stream = new FileInputStream(inFile)) {
return detectEncoding(stream);
}
}
/**
* Detect the encoding of the supplied file. The caller is responsible for closing the stream.
*
* @see <a href="https://code.google.com/p/juniversalchardet/">Original</a>
* @see <a href="https://github.com/amake/juniversalchardet">Fork</a>
*/
public static String detectEncoding(InputStream stream) throws IOException {
UniversalDetector detector = new UniversalDetector(null);
byte[] buffer = new byte[4096];
int read;
while ((read = stream.read(buffer)) > 0 && !detector.isDone()) {
detector.handleData(buffer, 0, read);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
return encoding;
}
/**
* Detect the encoding of the supplied file. If detection fails, return the supplied
* default encoding.
*/
public static String detectEncodingDefault(File inFile, String defaultEncoding) {
String detected = null;
try {
detected = detectEncoding(inFile);
} catch (IOException ex) {
// Ignore
}
return detected == null ? defaultEncoding : detected;
}
}