/* * Copyright (C) 2015-2017 Emanuel Moecklin * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.onegravity.rteditor.converter; import android.text.Annotation; import android.text.Editable; import android.text.Html; import android.text.Spanned; import com.onegravity.rteditor.api.format.RTHtml; import com.onegravity.rteditor.api.format.RTPlainText; import com.onegravity.rteditor.api.media.RTAudio; import com.onegravity.rteditor.api.media.RTImage; import com.onegravity.rteditor.api.media.RTVideo; import org.xml.sax.XMLReader; import java.util.Collections; import java.util.HashSet; import java.util.Locale; import java.util.Set; /** * Converts html to plain text */ public class ConverterHtmlToText { /** * When generating previews, Spannable objects that can't be converted into a String are * represented as 0xfffc. When displayed, these show up as undisplayed squares. These constants * define the object character and the replacement character. */ private static final char PREVIEW_OBJECT_CHARACTER = (char) 0xfffc; private static final char PREVIEW_OBJECT_REPLACEMENT = (char) 0x20; // space /** * toHtml() converts non-breaking spaces into the UTF-8 non-breaking space, which doesn't get * rendered properly in some clients. Replace it with a simple space. */ private static final char NBSP_CHARACTER = (char) 0x00a0; // utf-8 non-breaking space private static final char NBSP_REPLACEMENT = (char) 0x20; // space public static RTPlainText convert(RTHtml<? extends RTImage, ? extends RTAudio, ? extends RTVideo> input) { String result = Html.fromHtml(input.getText(), null, new HtmlToTextTagHandler()) .toString() .replace(PREVIEW_OBJECT_CHARACTER, PREVIEW_OBJECT_REPLACEMENT) .replace(NBSP_CHARACTER, NBSP_REPLACEMENT); return new RTPlainText(result); } public static String convert(String text) { return Html.fromHtml(text, null, new HtmlToTextTagHandler()) .toString() .replace(PREVIEW_OBJECT_CHARACTER, PREVIEW_OBJECT_REPLACEMENT) .replace(NBSP_CHARACTER, NBSP_REPLACEMENT); } /** * Custom tag handler to use when converting HTML messages to text. It currently handles text * representations of HTML tags that Android's built-in parser doesn't understand and hides code * contained in STYLE and SCRIPT blocks. */ private static class HtmlToTextTagHandler implements Html.TagHandler { // List of tags whose content should be ignored. private static final Set<String> TAGS_WITH_IGNORED_CONTENT; static { Set<String> set = new HashSet<String>(); set.add("style"); set.add("script"); set.add("title"); set.add("!"); // comments TAGS_WITH_IGNORED_CONTENT = Collections.unmodifiableSet(set); } @Override public void handleTag(boolean opening, String tag, Editable output, XMLReader xmlReader) { tag = tag.toLowerCase(Locale.US); if (tag.equals("hr") && opening) { // In the case of an <hr>, replace it with a bunch of underscores. This is roughly // the behaviour of Outlook in Rich Text mode. output.append("_____________________________________________\n"); } else if (TAGS_WITH_IGNORED_CONTENT.contains(tag)) { handleIgnoredTag(opening, output); } } private static final String IGNORED_ANNOTATION_KEY = "RT_ANNOTATION"; private static final String IGNORED_ANNOTATION_VALUE = "hiddenSpan"; /** * When we come upon an ignored tag, we mark it with an Annotation object with a specific key * and value as above. We don't really need to be checking these values since Html.fromHtml() * doesn't use Annotation spans, but we should do it now to be safe in case they do start using * it in the future. * * @param opening If this is an opening tag or not. * @param output Spannable string that we're working with. */ private void handleIgnoredTag(boolean opening, Editable output) { int len = output.length(); if (opening) { output.setSpan(new Annotation(IGNORED_ANNOTATION_KEY, IGNORED_ANNOTATION_VALUE), len, len, Spanned.SPAN_MARK_MARK); } else { Object start = getOpeningAnnotation(output); if (start != null) { int where = output.getSpanStart(start); // Remove the temporary Annotation span. output.removeSpan(start); // Delete everything between the start of the Annotation and the end of the string // (what we've generated so far). output.delete(where, len); } } } /** * Fetch the matching opening Annotation object and verify that it's the one added by us. * * @param output Spannable string we're working with. * @return Starting Annotation object. */ private Object getOpeningAnnotation(Editable output) { Object[] objs = output.getSpans(0, output.length(), Annotation.class); for (int i = objs.length - 1; i >= 0; i--) { Annotation span = (Annotation) objs[i]; if (output.getSpanFlags(objs[i]) == Spanned.SPAN_MARK_MARK && span.getKey().equals(IGNORED_ANNOTATION_KEY) && span.getValue().equals(IGNORED_ANNOTATION_VALUE)) { return objs[i]; } } return null; } } }