/** * Copyright (c) 2010-2016 by the respective copyright holders. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package org.openhab.io.multimedia.internal.tts; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Prepare text to be suitable for calling the Google translate service. * * @author Dominic Lerbs * @since 1.7.0 * */ public class GoogleTTSTextProcessor { private static final Logger logger = LoggerFactory.getLogger(GoogleTTSTextProcessor.class); private final int maxSentenceLength; private String sentenceDelimiters = "!.?:;"; public GoogleTTSTextProcessor(int maxSentenceLength) { this.maxSentenceLength = maxSentenceLength; } public void setCustomSentenceDelimiters(String delimiters) { sentenceDelimiters = delimiters; } /** * Splits the given text into small chunks which are processible by the Google translate service. Guarantees that a * single item in the result list is not longer than {@link #maxSentenceLength}. * * @param text * The text to split into chunks. * @return List containing the text chunks */ public List<String> splitIntoChunks(String text) { List<String> splitChunks = new ArrayList<String>(); Iterator<String> sentenceIterator = Arrays.asList(text.split("[" + sentenceDelimiters + "]")).iterator(); while (sentenceIterator.hasNext()) { String nextSentence = sentenceIterator.next().trim(); splitChunks.addAll(splitSentence(nextSentence)); } return splitChunks; } /** * Splits a sentence into multiple chunks if the sentence exceeds the {@link #maxSentenceLength}. * * @param sentence * The sentence to split * @return A list containing the split chunks of the sentence */ private List<String> splitSentence(String sentence) { List<String> parts = new ArrayList<String>(); StringBuilder sentencePart = new StringBuilder(); Iterator<String> wordIterator = Arrays.asList(StringUtils.split(sentence, ' ')).iterator(); while (wordIterator.hasNext()) { String nextWord = wordIterator.next().trim(); if (wordLengthWithinLimits(nextWord)) { if (sentencePart.length() + nextWord.length() <= maxSentenceLength) { sentencePart.append(nextWord).append(' '); } else { parts.add(sentencePart.toString().trim()); sentencePart = new StringBuilder(nextWord).append(' '); } } } if (sentencePart.length() > 0) { parts.add(sentencePart.toString().trim()); } return parts; } private boolean wordLengthWithinLimits(String word) { if (word.isEmpty()) { return false; } else if (word.length() > maxSentenceLength) { logger.warn("Unable to say '{}' as this word is longer than the maximum sentence allowed ({})", word, maxSentenceLength); return false; } return true; } /** * Encodes the given sentence into URL compatible format. * * @param sentence * The sentence to convert * @return The sentence in URL compatible format */ public static String urlEncodeSentence(String sentence) { String encodedSentence = ""; try { logger.trace("Encoding sentence to URL format: {}", sentence); encodedSentence = URLEncoder.encode(sentence, "UTF-8"); } catch (UnsupportedEncodingException e) { logger.warn("Failed to encode sentence '" + sentence + "'", e); } encodedSentence = encodedSentence.replace("+", "%20"); logger.debug("Encoded sentence: " + encodedSentence); return encodedSentence; } }