/* * Copyright (C) 2000 - 2008 TagServlet Ltd * * This file is part of Open BlueDragon (OpenBD) CFML Server Engine. * * OpenBD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * Free Software Foundation,version 3. * * OpenBD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenBD. If not, see http://www.gnu.org/licenses/ * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with any of the JARS listed in the README.txt (or a modified version of * (that library), containing parts covered by the terms of that JAR, the * licensors of this Program grant you additional permission to convey the * resulting work. * README.txt @ http://www.openbluedragon.org/license/README.txt * * http://www.openbluedragon.org/ */ package com.naryx.tagfusion.cfm.file; /** * This class is for handling source files that may or may not * use an encoding other than the jvm default. * * Create an instance with the File/InputStream that you wish to use, * then call getReader() with the same File */ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import org.apache.commons.vfs.FileObject; import com.nary.util.Localization; import com.naryx.tagfusion.cfm.engine.catchDataFactory; import com.naryx.tagfusion.cfm.engine.cfCatchData; import com.naryx.tagfusion.cfm.engine.cfmBadFileException; public class cfFileEncoding { private byte offset = 0; private String encoding; private boolean foundBOM; public cfFileEncoding( File _f ) throws cfmBadFileException, IOException{ init( _f ); } public cfFileEncoding( File _f, boolean _s ) throws cfmBadFileException, IOException{ init( _f, _s ); } public cfFileEncoding(FileObject _f) throws cfmBadFileException, IOException { init(_f.getContent().getInputStream()); } public cfFileEncoding(FileObject _f, boolean _s) throws cfmBadFileException, IOException { init(_f.getContent().getInputStream(), _s); } public cfFileEncoding(InputStream _i) throws cfmBadFileException { init(_i); } public boolean containsBOM() { return foundBOM; } public String getEncoding() { return encoding; } private void init(File _file) throws cfmBadFileException, FileNotFoundException { init(_file, true); } private void init(File _file, boolean _s) throws cfmBadFileException, FileNotFoundException { FileInputStream fileIn = new FileInputStream(_file); init(fileIn, _s); } private void init(InputStream _fileIn) throws cfmBadFileException { init(_fileIn, true); } /** * Initialise the class with the specified file * * @param _fileIn * - the file to get the encoding from * @param _search * - if true, the search for the file encoding will include looking * at the first 4096 bytes for the presence of a * cfprocessingdirective tag with pageencoding specified. * @throws cfmBadFileException */ private void init(InputStream _fileIn, boolean _search) throws cfmBadFileException { encoding = System.getProperty("file.encoding"); int buffersize = 4096; byte[] buffer = new byte[4096]; foundBOM = true; try { int bytesread = _fileIn.read(buffer, 0, buffersize); // read BOM (byte order mark) if (buffer[0] == (byte) 0xef && buffer[1] == (byte) 0xbb && buffer[2] == (byte) 0xbf) { // utf-8 encoding = Localization.convertCharSetToCharEncoding("utf-8"); offset = 3; } else if (buffer[0] == (byte) 0xff && buffer[1] == (byte) 0xfe) { // ucs-2le, // ucs-4le, // and // ucs-16le encoding = Localization.convertCharSetToCharEncoding("utf-16LE"); offset = 2; } else if (buffer[0] == (byte) 0xfe && buffer[1] == (byte) 0xff) { // utf-16 // and // ucs-2 encoding = Localization.convertCharSetToCharEncoding("utf-16BE"); offset = 2; } /* * utf-32BE/LE not currently supported else if ( buffer[0] == 0 && * buffer[1] == 0 && buffer[2] == (byte)0xfe && buffer[3] == (byte)0xff){ * // utf-32BE encoding = "utf-32BE"; ignoreBytes = 4; }else if ( * buffer[0] == (byte)0xff && buffer[1] == (byte)0xfe && buffer[2] == 0 && * buffer[3] == 0){ // utf-32LE encoding = "utf-32LE"; ignoreBytes = 4; } */ else { foundBOM = false; } // don't need to do the searching if (!_search) return; // now try looking for cfprocessingdirective String body = bytesread > 0 ? new String(buffer, 0, bytesread, encoding) : ""; String foundEnc = findProcessingDirective(body); // if the page encoding was discovered in a cfprocessingdirective and not // a BOM if (!foundBOM && foundEnc.length() != 0) { encoding = foundEnc; // else if it was discovered in both cfprocessingdirective and BOM, and // they didn't match } else if (foundBOM && foundEnc.length() != 0 && !encoding.equalsIgnoreCase(foundEnc)) { cfCatchData catchData = catchDataFactory.badEncodingException("The page encoding specified [" + encoding + "] via the CFPROCESSINGDIRECTIVE tag does not match the Byte Order Mark (BOM) of the file."); throw new cfmBadFileException(catchData); } } catch (IOException i) { cfCatchData catchData = catchDataFactory.badEncodingException("Failed to determine encoding due to error reading file [" + i.getMessage() + "]"); throw new cfmBadFileException(catchData); } finally { try { _fileIn.close(); } catch (Exception ignored) { } } } public BufferedReader getReader(String _filename) throws cfmBadFileException, FileNotFoundException, IOException { return getReader(new File(_filename)); } public BufferedReader getReader(File _file) throws FileNotFoundException, cfmBadFileException { FileInputStream fileIn = new FileInputStream(_file); // have to re-init since some has been read return new BufferedReader( getReader( fileIn ) ); } public InputStreamReader getReader( FileObject _file ) throws IOException, cfmBadFileException { return getReader( _file.getContent().getInputStream() ); // input stream is already buffered } public InputStreamReader getReader(InputStream _in) throws cfmBadFileException { if (offset > 0) { try { _in.read(new byte[offset]); // read in the BOM since we don't want to // render that part } catch (IOException ignored) { } // if somethings gone wrong let someone else deal with it } try { return new InputStreamReader( _in, encoding ); } catch (UnsupportedEncodingException u) { cfCatchData catchData = catchDataFactory.badEncodingException("The page encoding specified [" + encoding + "] is not supported."); throw new cfmBadFileException(catchData); } } // this searches for the cfprocessingdirective tag private static String findProcessingDirective(String _body) throws cfmBadFileException { cfFile file = new cfFile(_body); String pageEncoding = file.getEncoding(); return (pageEncoding != null ? pageEncoding : ""); } }