/****************************************************************************************
* Copyright (c) 2009 Brennan D'Aguilar <brennan.daguilar@gmail.com> *
* *
* This program is free software; you can redistribute it and/or modify it under *
* the terms of the GNU General Public License as published by the Free Software *
* Foundation; either version 3 of the License, or (at your option) any later *
* version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT ANY *
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A *
* PARTICULAR PURPOSE. See the GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along with *
* this program. If not, see <http://www.gnu.org/licenses/>. *
****************************************************************************************/
package com.ichi2.utils;
/**
* Parses text input from Anki cards to display ruby text correctly in AnkiDroid. Anki's Japanese language support
* handles ruby text as: <code>basetext[rubytext]</code> where the base text begins after the first spacer proceeding
* the ruby text, or the beginning of the string of text if no spacers exist before the start of the base text. This is
* converted to basic ruby markup: <code><ruby><rb>baseText</rb><rt>rubyText</rt></ruby></code> While webkit on android
* devices does not support ruby markup yet, the text can be adjusted adequately using css.
*/
public class RubyParser {
// private static final char RUBY_SPACER_JAP_SPACE = ' ';
// private static final char RUBY_SPACER_JAP_COMMA = '\u3001';
// private static final char RUBY_TEXT_START = '[';
// private static final char RUBY_TEXT_END = ']';
// private static final char HTML_TAG_START = '<';
// private static final char HTML_TAG_END = '>';
/**
* Converts ruby text from the format used by Anki's Japanese support plugin to html ruby markup.
*
* @param sourceText the japanese text containing ruby text
* @return html ruby markup equivalent of the input text.
*/
public static String ankiRubyToMarkup(String sourceText) {
return sourceText.replaceAll(" ?([^ >]+?)\\[([^(sound:)].*?)\\]", "<ruby><rb>$1</rb><rt>$2</rt></ruby>");
/*int cursorIndex = 0;
int nextRubyTextStart; // The first '[' after the cursorIndex
int nextSpacer; // The first spacer (' ', or '、') after the cursorIndex
int nextRubyTextEnd; // The first ']' after the cursorIndex
int nextHtmlTagStart; // The first '<' after the cursorIndex
StringBuilder builder = new StringBuilder();
// Loop until the entire string is parsed
while (cursorIndex < sourceText.length() - 1) {
// Find the location of the beginning of the next ruby text
nextRubyTextStart = sourceText.indexOf(RUBY_TEXT_START, cursorIndex);
// Find the location of the next spacing character (only -1 if neither possible
// spacing character remains.
nextSpacer = sourceText.indexOf(RUBY_SPACER_JAP_SPACE, cursorIndex);
if (nextSpacer == -1) {
nextSpacer = Math.max(nextSpacer, sourceText.indexOf(RUBY_SPACER_JAP_COMMA, cursorIndex));
}
// Check for html tags that come before any ruby text. If found, pass the full tag
// without parsing.
nextHtmlTagStart = sourceText.indexOf(HTML_TAG_START, cursorIndex);
if (nextHtmlTagStart != -1 && (nextSpacer == -1 || nextHtmlTagStart < nextSpacer)
&& (nextRubyTextStart == -1 || nextHtmlTagStart < nextRubyTextStart)) {
int nextHtmlTagEnd = sourceText.indexOf(HTML_TAG_END, nextHtmlTagStart);
builder.append(sourceText.substring(cursorIndex, nextHtmlTagEnd + 1));
cursorIndex = nextHtmlTagEnd + 1;
} else // If no html tag is passed through on this cycle, check for ruby text.
{
// If any unparsed ruby text remains
if (nextRubyTextStart != -1) {
// If there is any text before the next ruby tag that is part of the ruby base text,
// pass it through unparsed.
if (nextSpacer < nextRubyTextStart && nextSpacer != -1) {
// Remove spaces from the text
if (sourceText.charAt(nextSpacer) == RUBY_SPACER_JAP_SPACE) {
builder.append(sourceText.substring(cursorIndex, nextSpacer));
} else // If spacing character is not a space (eg. a comma), pass it through as well.
{
builder.append(sourceText.substring(cursorIndex, nextSpacer + 1));
}
cursorIndex = nextSpacer + 1;
} else {
// Find the end of the ruby text, and parse it into html tags.
nextRubyTextEnd = sourceText.indexOf(RUBY_TEXT_END, cursorIndex);
builder.append(newRubyPair(sourceText.substring(cursorIndex, nextRubyTextStart),
sourceText.substring(nextRubyTextStart + 1, nextRubyTextEnd)));
cursorIndex = nextRubyTextEnd + 1;
}
} else {
// If no ruby text remains to be parsed, pass any remaining text through and finish.
builder.append(sourceText.substring(cursorIndex));
cursorIndex = sourceText.length();
}
}
}
return builder.toString();*/
}
/**
* Strips kanji from ruby markup. Used for reading in question
*
* @param sourceText the japanese text containing ruby text
* @return text with kanji substituted by it's reading
*/
public static String ankiStripKanji(String sourceText) {
return sourceText.replaceAll(" ?([^ >]+?)\\[([^(sound:)].*?)\\]", "$2");
}
/*
private static String newRubyPair(String baseText, String rubyText) {
return "<ruby><rb>" + baseText + "</rb><rt>" + rubyText + "</rt></ruby>";
}
*/
}