/* * :tabSize=4:indentSize=4:noTabs=false: * :folding=explicit:collapseFolds=1: * * Copyright (C) 2008 Kazutoshi Satoda * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package org.gjt.sp.jedit.io; import java.io.InputStream; import java.io.InputStreamReader; import java.io.IOException; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.nio.CharBuffer; /** * An encoding detector which finds regex pattern. * * This reads the sample in the system default encoding for first some * lines and look for a regex pattern. This can fail if the * stream cannot be read in the system default encoding or the * pattern is not found at near the top of the stream. * * @since 4.3pre16 * @author Kazutoshi Satoda */ public class RegexEncodingDetector implements EncodingDetector { /** * A regex pattern matches to "Charset names" specified for * java.nio.charset.Charset. * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html#names">Charset names</a> */ public static final String VALID_ENCODING_PATTERN = "\\p{Alnum}[\\p{Alnum}\\-.:_]*"; private final Pattern pattern; private final String replacement; public RegexEncodingDetector(String pattern, String replacement) { this.pattern = Pattern.compile(pattern); this.replacement = replacement; } public String detectEncoding(InputStream sample) throws IOException { InputStreamReader reader = new InputStreamReader(sample); final int bufferSize = 1024; char[] buffer = new char[bufferSize]; int readSize = reader.read(buffer, 0, bufferSize); if (readSize > 0) { Matcher matcher = pattern.matcher( CharBuffer.wrap(buffer, 0, readSize)); // Tracking of this implicit state within Matcher // is required to know where is the start of // replacement after calling appendReplacement(). int appendPosition = 0; while (matcher.find()) { String extracted = extractReplacement( matcher, appendPosition, replacement); if (EncodingServer.hasEncoding(extracted)) { return extracted; } appendPosition = matcher.end(); } } return null; } /** * Returns a replaced string for a Matcher which has been matched * by find() method. */ private static String extractReplacement( Matcher found, int appendPosition, String replacement) { /* * It doesn't make sense to read before start, but * appendReplacement() requires to to it. */ int found_start = found.start(); int found_end = found.end(); int source_length = found_end - found_start; int length_before_match = found_start - appendPosition; StringBuffer replaced = new StringBuffer( length_before_match + (source_length * 2)); found.appendReplacement(replaced, replacement); return replaced.substring(length_before_match); } }