/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.filters2.html2; import java.io.BufferedWriter; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.nio.charset.Charset; import java.util.regex.Matcher; import org.omegat.util.PatternConsts; /** * This class acts as an interceptor of output: First it collects all the output * inside itself in a string. Then it adds a META with a given charset (or * replaces the existing charset with a new one) and (if the file is XHTML and * has XML header) adds encoding declaration (or replaces the encoding in the * XML header). Next it writes out to the file. * <p> * Note that if <code>encoding</code> parameter of the * {@link #HTMLWriter(String, String) constructor} is null, no encoding * declaration is added, and the file is written in OS-default encoding. This is * done to fix a bug <a href="https://sourceforge.net/p/omegat/bugs/101/">[1.6 * RC2] Bug with Target Encoding set to <auto> for (x)HTML</a>. * * @author Maxym Mykhalchuk */ public class HTMLWriter extends Writer { /** Internal Buffer to collect the output */ private StringWriter writer; /** real writer to a file */ private BufferedWriter realWriter; /** Replacement string for HTML content-type META */ private String HTML_META; /** Replacement string for XML (XHTML) header */ private String XML_HEADER; /** * Encoding to write this file in. null value means no encoding declaration. */ private String encoding; /** HTML filter options. */ private HTMLOptions options; /** * Creates new HTMLWriter. * * @param fileName * - file name to write to * @param encoding * - the encoding to write HTML file in (null means OS-default * encoding) */ public HTMLWriter(String fileName, String encoding, HTMLOptions options) throws FileNotFoundException, UnsupportedEncodingException { this.encoding = encoding; this.options = options; writer = new StringWriter(); FileOutputStream fos = new FileOutputStream(fileName); OutputStreamWriter osw; if (encoding != null) { osw = new OutputStreamWriter(fos, encoding); } else { osw = new OutputStreamWriter(fos, Charset.defaultCharset()); } realWriter = new BufferedWriter(osw); } /** The minimal size of already written HTML that will be appended headers */ private static final int minHeaderedBufferSize = 4096; /** The maximal size of a buffer before flush */ private static final int maxBufferSize = 65536; /** * Signals that the writer is being closed, hence it needs to write any * (little) buffer out. */ private boolean signalClosing = false; /** * Signals that the writer was already flushed, i.e. already wrote out the * headers stuff. */ private boolean signalAlreadyFlushed = false; /** * Flushes the writer (which does the real write-out of data) and closes the * real writer. */ public void close() throws IOException { signalClosing = true; flush(); realWriter.close(); } /** * Does the real write-out of the data, first adding/replacing encoding * statement. */ public void flush() throws IOException { StringBuffer buffer = writer.getBuffer(); if (signalAlreadyFlushed || encoding == null) { // already flushed, i.e. already wrote out the headers stuff // or we don't add any metas (encoding is null) realWriter.write(buffer.toString()); buffer.setLength(0); } else if (signalClosing || buffer.length() >= minHeaderedBufferSize) { // else if we're closing or the buffer is big enough // to (hopefully) contain all the existing headers signalAlreadyFlushed = true; String contents = buffer.toString(); if (options.getRewriteEncoding() != HTMLOptions.REWRITE_MODE.NEVER) { Matcher matcher_header = PatternConsts.XML_HEADER.matcher(contents); boolean xhtml = false; if (matcher_header.find()) { XML_HEADER = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>"; contents = matcher_header.replaceFirst(XML_HEADER); xhtml = true; } HTML_META = "<meta http-equiv=\"content-type\" content=\"text/html; charset=" + encoding + "\""; if (xhtml) HTML_META += " />"; else HTML_META += ">"; Matcher matcher_enc = PatternConsts.HTML_ENCODING.matcher(contents); if (matcher_enc.find()) contents = matcher_enc.replaceFirst(HTML_META); else if (options.getRewriteEncoding() != HTMLOptions.REWRITE_MODE.IFMETA) { Matcher matcher_head = PatternConsts.HTML_HEAD.matcher(contents); if (matcher_head.find()) contents = matcher_head.replaceFirst("<head>\n " + HTML_META); else if (options.getRewriteEncoding() != HTMLOptions.REWRITE_MODE.IFHEADER) { Matcher matcher_html = PatternConsts.HTML_HTML.matcher(contents); if (matcher_html.find()) contents = matcher_html.replaceFirst("<html>\n<head>\n " + HTML_META + "\n</head>\n"); else contents = "<html>\n<head>\n " + HTML_META + "\n</head>\n" + contents; } } } realWriter.write(contents); buffer.setLength(0); } } /** * Write a portion of an array of characters. Simply calls * <code>write(char[], int, int)</code> of the internal * <code>StringWriter</code>. * * @param cbuf * - Array of characters * @param off * - Offset from which to start writing characters * @param len * - Number of characters to write * @throws IOException * - If an I/O error occurs */ public void write(char[] cbuf, int off, int len) throws IOException { writer.write(cbuf, off, len); if (writer.getBuffer().length() >= maxBufferSize) flush(); } }