package net.bible.service.device.speak;
import android.content.SharedPreferences;
import android.util.Log;
import net.bible.service.common.CommonUtils;
import org.apache.commons.lang3.StringUtils;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** Keep track of a list of chunks of text being fed to TTS
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class SpeakTextProvider {
private List<String> mTextToSpeak = new ArrayList<String>();
private int nextTextToSpeak = 0;
// this fraction supports pause/rew/ff; if o then speech occurs normally, if 0.5 then next speech chunk is half completed...
private float fractionOfNextSentenceSpoken = 0;
// Before ICS Android would split up long text for you but since ICS this error occurs:
// if (mText.length() >= MAX_SPEECH_ITEM_CHAR_LENGTH) {
// Log.w(TAG, "Text too long: " + mText.length() + " chars");
private static final int MAX_SPEECH_ITEM_CHAR_LENGTH = 4000;
// require DOTALL to allow . to match new lines which occur in books like JOChrist
private static Pattern BREAK_PATTERN = Pattern.compile(".{100,2000}[a-z]+[.?!][\\s]{1,}+", Pattern.DOTALL);
private static class StartPos {
boolean found = false;
private int startPosition = 0;
private String text = "";
private float actualFractionOfWhole = 1;
}
// enable state to be persisted if paused for a long time
private static final String PERSIST_SPEAK_TEXT = "SpeakText";
private static final String PERSIST_SPEAK_TEXT_SEPARATOR = "XXSEPXX";
private static final String PERSIST_NEXT_TEXT = "NextText";
private static final String PERSIST_FRACTION_SPOKEN = "FractionSpoken";
private static final String TAG = "Speak";
public void addTextsToSpeak(List<String> textsToSpeak) {
for (String text : textsToSpeak) {
this.mTextToSpeak.addAll(breakUpText(text));
}
Log.d(TAG, "Total Num blocks in speak queue:"+mTextToSpeak.size());
}
public boolean isMoreTextToSpeak() {
//TODO: there seems to be an occasional problem when using ff/rew/pause in the last chunk
return nextTextToSpeak<mTextToSpeak.size();
}
public String getNextTextToSpeak() {
String text = getNextTextChunk();
// if a pause occurred then skip the first part
if (fractionOfNextSentenceSpoken>0) {
Log.d(TAG, "Getting part of text to read. Fraction:"+fractionOfNextSentenceSpoken);
StartPos textFraction = getPrevTextStartPos(text, fractionOfNextSentenceSpoken);
if (textFraction.found) {
fractionOfNextSentenceSpoken = textFraction.actualFractionOfWhole;
text = textFraction.text;
} else {
Log.e(TAG, "Eror finding next text. fraction:"+fractionOfNextSentenceSpoken);
// try to prevent recurrence of error, but do not say anything
fractionOfNextSentenceSpoken = 0;
text = "";
}
}
return text;
}
private String getNextTextChunk() {
String text = peekNextTextChunk();
nextTextToSpeak++;
return text;
}
private String peekNextTextChunk() {
if (!isMoreTextToSpeak()) {
Log.e(TAG, "Error: passed end of Speaktext. nextText:"+nextTextToSpeak+" textToSpeak size:"+mTextToSpeak.size());
return "";
}
return mTextToSpeak.get(nextTextToSpeak);
}
/** fractionCompleted may be a fraction of a fraction of the current block if this is not the first pause in this block
*
* @param fractionCompleted of last block of text returned by getNextTextToSpeak
*/
public void pause(float fractionCompleted) {
Log.d(TAG, "Pause CurrentSentence:"+nextTextToSpeak);
// accumulate these fractions until we reach the end of a chunk of text
// if pause several times the fraction of text completed becomes a fraction of the fraction left i.e. 1-previousFractionCompleted
// also ensure the fraction is never greater than 1/all text
fractionOfNextSentenceSpoken += Math.min(1,
((1.0-fractionOfNextSentenceSpoken)*fractionCompleted));
Log.d(TAG, "Fraction of current sentence spoken:"+fractionOfNextSentenceSpoken);
backOneChunk();
}
public void rewind() {
// go back to start of current sentence
StartPos textFraction = getPrevTextStartPos(peekNextTextChunk(), fractionOfNextSentenceSpoken);
// if could not find a previous sentence end
if (!textFraction.found) {
if (backOneChunk()) {
textFraction = getPrevTextStartPos(peekNextTextChunk(), 1.0f);
}
} else {
// go back a little bit further in the current chunk
StartPos extraFraction = getPrevTextStartPos(peekNextTextChunk(), getStartPosFraction(textFraction.startPosition, peekNextTextChunk()));
if (extraFraction.found) {
textFraction = extraFraction;
}
}
if (textFraction.found) {
fractionOfNextSentenceSpoken = textFraction.actualFractionOfWhole;
} else {
Log.e(TAG, "Could not rewind");
}
Log.d(TAG, "Rewind chunk length start position:"+fractionOfNextSentenceSpoken);
}
public void forward() {
Log.d(TAG, "Forward nextText:"+nextTextToSpeak);
// go back to start of current sentence
StartPos textFraction = getForwardTextStartPos(peekNextTextChunk(), fractionOfNextSentenceSpoken);
// if could not find the next sentence start
if (!textFraction.found && forwardOneChunk()) {
textFraction = getForwardTextStartPos(peekNextTextChunk(), 0.0f);
}
if (textFraction.found) {
fractionOfNextSentenceSpoken = textFraction.actualFractionOfWhole;
} else {
Log.e(TAG, "Could not forward");
}
Log.d(TAG, "Forward chunk length start position:"+fractionOfNextSentenceSpoken);
}
public void finishedUtterance(String utteranceId) {
// reset pause info as a chunk is now finished and it may have been started using continue
fractionOfNextSentenceSpoken = 0;
}
/** current chunk needs to be re-read (at least a fraction of it after pause)
*/
private boolean backOneChunk() {
if (nextTextToSpeak > 0) {
nextTextToSpeak--;
return true;
} else {
return false;
}
}
/** current chunk needs to be re-read (at least a fraction of it after pause)
*/
private boolean forwardOneChunk() {
if (nextTextToSpeak < mTextToSpeak.size()-1) {
nextTextToSpeak++;
return true;
} else {
return false;
}
}
public void reset() {
if (mTextToSpeak!=null) {
mTextToSpeak.clear();
}
nextTextToSpeak = 0;
fractionOfNextSentenceSpoken = 0;
}
/** save state to allow long pauses
*/
public void persistState() {
if (mTextToSpeak.size()>0) {
CommonUtils.getSharedPreferences()
.edit()
.putString(PERSIST_SPEAK_TEXT, StringUtils.join(mTextToSpeak, PERSIST_SPEAK_TEXT_SEPARATOR))
.putInt(PERSIST_NEXT_TEXT, nextTextToSpeak)
.putFloat(PERSIST_FRACTION_SPOKEN, fractionOfNextSentenceSpoken)
.commit();
}
}
/** restore state to allow long pauses
*
* @return state restored
*/
public boolean restoreState() {
boolean isRestored = false;
SharedPreferences sharedPreferences = CommonUtils.getSharedPreferences();
if (sharedPreferences.contains(PERSIST_SPEAK_TEXT)) {
mTextToSpeak = new ArrayList<String>(Arrays.asList(sharedPreferences.getString(PERSIST_SPEAK_TEXT, "").split(PERSIST_SPEAK_TEXT_SEPARATOR)));
nextTextToSpeak = sharedPreferences.getInt(PERSIST_NEXT_TEXT, 0);
fractionOfNextSentenceSpoken = sharedPreferences.getFloat(PERSIST_FRACTION_SPOKEN, 0);
clearPersistedState();
isRestored = true;
}
return isRestored;
}
public void clearPersistedState() {
CommonUtils.getSharedPreferences().edit().remove(PERSIST_SPEAK_TEXT)
.remove(PERSIST_NEXT_TEXT)
.remove(PERSIST_FRACTION_SPOKEN)
.commit();
}
private StartPos getPrevTextStartPos(String text, float fraction) {
StartPos retVal = new StartPos();
int allTextLength = text.length();
int nextTextOffset = (int)(Math.min(1,fraction)*allTextLength);
BreakIterator breakIterator = BreakIterator.getSentenceInstance();
breakIterator.setText(text);
int startPos = 0;
try {
// this can rarely throw an Exception
startPos = breakIterator.preceding(nextTextOffset);
} catch (Exception e) {
Log.e(TAG, "Error finding previous sentence start", e);
}
retVal.found = startPos>=0;
if (retVal.found) {
retVal.startPosition = startPos;
// because we don't return an exact fraction, but go to the beginning of a sentence, we need to update the fractionAlreadySpoken
retVal.actualFractionOfWhole = ((float)retVal.startPosition)/allTextLength;
retVal.text = text.substring(retVal.startPosition);
}
return retVal;
}
private StartPos getForwardTextStartPos(String text, float fraction) {
StartPos retVal = new StartPos();
int allTextLength = text.length();
int nextTextOffset = (int)(Math.min(1,fraction)*allTextLength);
BreakIterator breakIterator = BreakIterator.getSentenceInstance();
breakIterator.setText(text);
int startPos = 0;
try {
// this can rarely throw an Exception
startPos = breakIterator.following(nextTextOffset);
} catch (Exception e) {
Log.e(TAG, "Error finding next sentence start", e);
}
retVal.found = startPos>=0;
if (retVal.found) {
// nudge the startPos past the beginning of sentence so this sentence start is found when searching for previous block in getNextSentence
retVal.startPosition = startPos<text.length()-1-1? startPos+1 : startPos;
// because we don't return an exact fraction, but go to the beginning of a sentence, we need to update the fractionAlreadySpoken
retVal.actualFractionOfWhole = ((float)retVal.startPosition)/allTextLength;
retVal.text = text.substring(retVal.startPosition);
}
return retVal;
}
/** ICS rejects text longer than 4000 chars so break it up
*
*/
private List<String> breakUpText(String text) {
//
// first try to split text nicely at the end of sentences
//
List<String> chunks1 = new ArrayList<>();
// is the text short enough to use as is
if (text.length()<MAX_SPEECH_ITEM_CHAR_LENGTH) {
chunks1.add(text);
} else {
// break up the text at sentence ends
Matcher matcher = BREAK_PATTERN.matcher(text);
int matchedUpTo = 0;
while (matcher.find()) {
int nextEnd = matcher.end();
chunks1.add(text.substring(matchedUpTo, nextEnd));
matchedUpTo = nextEnd;
}
// add on the final part of the text, if there is any
if (matchedUpTo < text.length()) {
chunks1.add(text.substring(matchedUpTo));
}
}
//
// If any text is still too long because the regexp was not matched then forcefully split it up
// All chunks are probably now less than 4000 chars as required by tts but go through again for languages that don't have '. ' at the end of sentences
//
List<String> chunks2 = new ArrayList<>();
for (String chunk : chunks1) {
if (chunk.length()<MAX_SPEECH_ITEM_CHAR_LENGTH) {
chunks2.add(chunk);
} else {
// force chunks to be correct length -10 is just to allow a bit of extra room
chunks2.addAll(splitEqually(chunk, MAX_SPEECH_ITEM_CHAR_LENGTH-10));
}
}
return chunks2;
}
private List<String> splitEqually(String text, int size) {
// Give the list the right capacity to start with. You could use an array instead if you wanted.
List<String> ret = new ArrayList<>((text.length() + size - 1) / size);
for (int start = 0; start < text.length(); start += size) {
ret.add(text.substring(start, Math.min(text.length(), start + size)));
}
return ret;
}
private float getStartPosFraction(int startPos, String text) {
float startFraction = ((float)startPos)/text.length();
// ensure fraction is between 0 and 1
startFraction = Math.max(0, startFraction);
startFraction = Math.min(1, startFraction);
return startFraction;
}
public long getTotalChars() {
long totChars = 0;
for (String chunk: mTextToSpeak) {
totChars += chunk.length();
}
return totChars;
}
/** this relies on fraction which is set at pause
*/
public long getSpokenChars() {
long spokenChars = 0;
if (mTextToSpeak.size()>0) {
for (int i=0; i<nextTextToSpeak-1; i++) {
String chunk = mTextToSpeak.get(i);
spokenChars += chunk.length();
}
if (nextTextToSpeak<mTextToSpeak.size()) {
spokenChars += fractionOfNextSentenceSpoken * (float)mTextToSpeak.get(nextTextToSpeak).length();
}
}
return spokenChars;
}
// private List<String> nonREbreakUpText(String text) {
// List<String> chunks = new ArrayList<String>();
//
// int matchedUpTo = 0;
// int count = 0;
// while (text.length()-matchedUpTo>1000) {
// int nextEnd = text.indexOf(". ",matchedUpTo+100)+2;
// if (nextEnd!=-1) {
// Log.d(TAG, "Match "+(++count)+" from "+matchedUpTo+" to "+nextEnd);
// chunks.add(text.substring(matchedUpTo, nextEnd));
// matchedUpTo = nextEnd;
// }
// }
// // add on the final part of the text
// chunks.add(text.substring(matchedUpTo));
//
// return chunks;
// }
//
}