/*
* Copyright (C) 2010-2011 Geometer Plus <contact@geometerplus.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
package org.geometerplus.fbreader.network.opds;
import java.util.HashMap;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import org.geometerplus.zlibrary.core.html.*;
import org.geometerplus.zlibrary.core.constants.MimeTypes;
import org.geometerplus.zlibrary.core.xml.ZLXMLProcessor;
import org.geometerplus.zlibrary.core.xml.ZLStringMap;
import org.geometerplus.fbreader.formats.xhtml.XHTMLReader;
import org.geometerplus.fbreader.network.atom.ATOMConstants;
public class HtmlToString {//hym 改动 为了 html 中文
private String myLastOpenedTag;
private String myTextType;
private StringBuilder myTextContent = new StringBuilder();
private HtmlToStringReader myHtmlToStringReader = new HtmlToStringReader();
public void setupTextContent(String type) {
if (type == null) {
myTextType = ATOMConstants.TYPE_DEFAULT;
} else {
myTextType = type;
}
myTextContent.delete(0, myTextContent.length());
}
public String finishTextContent(String bufferContent) {
if (bufferContent != null) {
myTextContent.append(bufferContent);
}
char[] contentArray = myTextContent.toString().trim().toCharArray();
String result;
if (contentArray.length == 0) {
result = null;
} else {
result = new String(contentArray);
}
if (result != null) {
if (myTextType == ATOMConstants.TYPE_HTML || myTextType == ATOMConstants.TYPE_XHTML ||
myTextType == MimeTypes.MIME_TEXT_HTML || myTextType == MimeTypes.MIME_TEXT_XHTML) {
myHtmlToStringReader.readFromString(result);
result = myHtmlToStringReader.getString();
}
}
myTextType = null;
myTextContent.delete(0, myTextContent.length());
return result;
}
public void processTextContent(boolean closeTag, String tag, ZLStringMap attributes, String bufferContent) {
if (myTextType == ATOMConstants.TYPE_XHTML || myTextType == MimeTypes.MIME_TEXT_XHTML) {
if (bufferContent != null) {
myTextContent.append(bufferContent);
}
if (closeTag) {
final int index = myTextContent.length() - 1;
if (tag == myLastOpenedTag && bufferContent == null && myTextContent.charAt(index) == '>') {
myTextContent.insert(index, '/'); // TODO: Is it necessary in HTML???????
} else {
myTextContent.append("</").append(tag).append(">");
}
myLastOpenedTag = null;
} else {
myLastOpenedTag = tag;
StringBuilder buffer = new StringBuilder("<").append(tag);
for (int i = 0; i < attributes.getSize(); ++i) {
final String key = attributes.getKey(i);
final String value = attributes.getValue(key);
buffer.append(" ").append(key).append("=\"");
if (value != null) {
buffer.append(value);
}
buffer.append("\"");
}
buffer.append(" >");
myTextContent.append(buffer.toString());
}
} else {
if (bufferContent != null) {
myTextContent.append(bufferContent);
}
}
}
private static class HtmlToStringReader implements ZLHtmlReader {
private StringBuilder myBuffer = new StringBuilder();
private char[] myByteData;//hym 修改 byte --》char
private int myByteDataLength;
private HashMap<String,char[]> myEntityMap;
public void readFromString(String htmlString) {
final StringBuilder html = new StringBuilder();
html.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">")
.append("<html><head>")
.append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />")
.append("<title></title>")
.append("</head><body>")
.append(htmlString)
.append("</body></html>");
final byte[] bytes;
try {
bytes = html.toString().getBytes("UTF-8");
} catch (UnsupportedEncodingException ex) {
throw new RuntimeException("It's impossible!!! UTF-8 charset is not supported!!!", ex);
}
ZLHtmlProcessor.read(this, new ByteArrayInputStream(bytes));
}
public String getString() {
return new String(myBuffer.toString().trim().toCharArray());
}
public void startDocumentHandler() {
myBuffer.delete(0, myBuffer.length());
myByteDataLength = 0;
}
public void endDocumentHandler() {
processByteData();
}
public void startElementHandler(String tag, int offset, ZLHtmlAttributeMap attributes) {
processByteData();
tag = tag.toLowerCase().intern();
if (tag == "br") {
if (myBuffer.length() > 0) {
myBuffer.append('\n');
}
} else if (tag == "hr") {
if (myBuffer.length() > 0) {
if (myBuffer.charAt(myBuffer.length() - 1) != '\n') {
myBuffer.append('\n');
}
myBuffer.append('\n');
}
}
}
public void endElementHandler(String tag) {
processByteData();
tag = tag.toLowerCase().intern();
if (tag == "p") {
if (myBuffer.length() > 0) {
myBuffer.append('\n');
}
}
}
private void processByteData() {
if (myByteDataLength == 0) {
return;
}
final String data;
try {
data = new String(myByteData, 0, myByteDataLength);
} catch (Exception ex) {
throw new RuntimeException("It's impossible!!! UTF-8 charset is not supported!!!", ex);
}
myByteDataLength = 0;
if (data.length() == 0) {
return;
}
if (myBuffer.length() > 0 && !Character.isWhitespace(myBuffer.charAt(myBuffer.length() - 1))) {
myBuffer.append(' ');
}
int index = 0;
while (index < data.length() && Character.isWhitespace(data.charAt(index))) {
++index;
}
boolean lastSpace = false;
while (index < data.length()) {
final char ch = data.charAt(index++);
if (Character.isWhitespace(ch)) {
lastSpace = true;
} else {
if (lastSpace) {
myBuffer.append(' ');
lastSpace = false;
}
myBuffer.append(ch);
}
}
}
public void entityDataHandler(String entity) {
processByteData();
if (entity.length() == 0) {
return;
}
if (myEntityMap == null) {
myEntityMap = new HashMap<String,char[]>(ZLXMLProcessor.getEntityMap(XHTMLReader.xhtmlDTDs()));
}
char[] data = myEntityMap.get(entity);
if (data == null) {
if (entity.charAt(0) == '#') {
try {
int number;
if (entity.charAt(1) == 'x') {
number = Integer.parseInt(entity.substring(2), 16);
} else {
number = Integer.parseInt(entity.substring(1));
}
data = new char[] { (char)number };
} catch (NumberFormatException e) {
}
}
if (data == null) {
data = new char[0];
}
myEntityMap.put(entity, data);
}
//System.err.println("FBREADER -- ENTITY: &" + entity + "; --> " + new String(data));
myBuffer.append(data);
}
public void charDataHandler(char[] data, int start, int length) {//hym 修改 byte --》char
if (length <= 0) {
return;
}
if (myByteData == null) {
myByteData = new char[length];
System.arraycopy(data, start, myByteData, 0, length);
myByteDataLength = length;
} else {
if (myByteData.length < myByteDataLength + length) {
final char[] oldData = myByteData;
myByteData = new char[myByteDataLength + length];
System.arraycopy(oldData, 0, myByteData, 0, myByteDataLength);
}
System.arraycopy(data, start, myByteData, myByteDataLength, length);
myByteDataLength += length;
}
}
}
}