/****************************************************************************************
* Copyright (c) 2013 Bibek Shrestha <bibekshrestha@gmail.com> *
* Copyright (c) 2013 Zaur Molotnikov <qutorial@gmail.com> *
* Copyright (c) 2013 Nicolas Raoul <nicolas.raoul@gmail.com> *
* Copyright (c) 2013 Flavio Lerda <flerda@gmail.com> *
* *
* This program is free software; you can redistribute it and/or modify it under *
* the terms of the GNU General Public License as published by the Free Software *
* Foundation; either version 3 of the License, or (at your option) any later *
* version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT ANY *
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A *
* PARTICULAR PURPOSE. See the GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along with *
* this program. If not, see <http://www.gnu.org/licenses/>. *
****************************************************************************************/
package com.ichi2.anki.multimediacard.beolingus.parsing;
/**
* This class parses beolingus pages
*/
public class BeolingusParser {
private static String PRONUNC_STOPPER = "<img src=\"/pics/s1.png\"";
private static String MP3_STOPPER = ".mp3\">Listen";
/**
* @param translationHtml = html page from beolingus, with translation of the word we search
* @param wordToSearchFor
* @return "no" or http address of the page with translation First this function searches for the picture as
* described above, this picture is in the pronunciation link. Then picture title is being compared to the
* word we search. If they match, means word found, and we have to go back in text, from image, inside the
* link, <a href="... and find there the address with pronunciation page, which is returned
*/
public static String getPronounciationAddressFromTranslation(String translationHtml, String wordToSearchFor) {
String pronounciationIndicator = PRONUNC_STOPPER;
if (!translationHtml.contains(pronounciationIndicator)) {
return "no";
}
int indIndicator = 0;
do {
indIndicator = translationHtml.indexOf(pronounciationIndicator, indIndicator + 1);
if (indIndicator == -1) {
return "no";
}
String title = "title=\"";
int indTitle = translationHtml.indexOf(title, indIndicator);
if (indTitle == -1) {
return "no";
}
int indNextQuote = translationHtml.indexOf("\"", indTitle + title.length());
if (indNextQuote == -1) {
return "no";
}
// Must be equal to the word translating
String titleValue = translationHtml.substring(indTitle + title.length(), indNextQuote);
if (!titleValue.contentEquals(wordToSearchFor)) {
continue;
}
break;
// indIndicator is pointing to the right one indicator!
} while (true);
String href = "href=\"";
// Rolling back for the reference
while (indIndicator > 0) {
indIndicator -= 1;
if (!translationHtml.substring(indIndicator, indIndicator + href.length()).contentEquals(href)) {
continue;
}
break;
// indIndicator contains where href starts;
}
int indNextQuote = translationHtml.indexOf("\"", indIndicator + href.length());
if (indNextQuote == -1) {
return "no";
}
String pronounciationAddress = translationHtml.substring(indIndicator + href.length(), indNextQuote);
return "http://dict.tu-chemnitz.de" + pronounciationAddress;
}
// It searches for a link to mp3 file
// First "mp3" is found, than it takes all the address, going before it.
/**
* @param pronunciationPageHtml
* @return "no" is returned or the http address of the mp3 file
*/
public static String getMp3AddressFromPronounciation(String pronunciationPageHtml) {
if (pronunciationPageHtml.startsWith("FAILED")) {
return "no";
}
String mp3 = MP3_STOPPER;
if (!pronunciationPageHtml.contains(mp3)) {
return "no";
}
int indMp3 = pronunciationPageHtml.indexOf(mp3);
int indAddrEnd = indMp3 + ".mp3".length();
int addrStart = 0;
// Back to find the address start;
while (indMp3 > 0) {
indMp3 -= 1;
if (pronunciationPageHtml.charAt(indMp3) == '\"') {
addrStart = indMp3 + 1;
break;
}
}
return "http://dict.tu-chemnitz.de" + pronunciationPageHtml.substring(addrStart, indAddrEnd);
}
}