/***************************************************************************************
* *
* Copyright (c) 2016 Timothy Rae <perceptualchaos2@gmail.com> *
* *
* This program is free software; you can redistribute it and/or modify it under *
* the terms of the GNU Lesser General Public License as published by the Free Software *
* Foundation; either version 3 of the License, or (at your option) any later *
* version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT ANY *
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A *
* PARTICULAR PURPOSE. See the GNU General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License along with *
* this program. If not, see <http://www.gnu.org/licenses/>. *
****************************************************************************************/
package com.ichi2.anki.api;
import android.text.Html;
import android.text.TextUtils;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utilities class for the API
*/
class Utils {
// Regex pattern used in removing tags from text before checksum
private static final Pattern stylePattern = Pattern.compile("(?s)<style.*?>.*?</style>");
private static final Pattern scriptPattern = Pattern.compile("(?s)<script.*?>.*?</script>");
private static final Pattern tagPattern = Pattern.compile("<.*?>");
private static final Pattern imgPattern = Pattern.compile("<img src=[\\\"']?([^\\\"'>]+)[\\\"']? ?/?>");
private static final Pattern htmlEntitiesPattern = Pattern.compile("?\\w+;");
static String joinFields(String[] list) {
return list != null ? TextUtils.join("\u001f", list): null;
}
static String[] splitFields(String fields) {
return fields != null? fields.split("\\x1f", -1): null;
}
static String joinTags(Set<String> tags) {
if (tags == null || tags.isEmpty()) {
return "";
}
for (String t : tags) {
t.replaceAll(" ", "_");
}
return TextUtils.join(" ", tags);
}
static String[] splitTags(String tags) {
if (tags == null) {
return null;
}
return tags.trim().split("\\s+");
}
static Long fieldChecksum(String data) {
data = stripHTMLMedia(data);
try {
MessageDigest md = MessageDigest.getInstance("SHA1");
byte[] digest = md.digest(data.getBytes("UTF-8"));
BigInteger biginteger = new BigInteger(1, digest);
String result = biginteger.toString(16);
// pad checksum to 40 bytes, as is done in the main AnkiDroid code
if (result.length() < 40) {
String zeroes = "0000000000000000000000000000000000000000";
result = zeroes.substring(0, zeroes.length() - result.length()) + result;
}
return Long.valueOf(result.substring(0, 8), 16);
} catch (Exception e) {
// This is guaranteed to never happen
throw new IllegalStateException("Error making field checksum with SHA1 algorithm and UTF-8 encoding", e);
}
}
/**
* Strip HTML but keep media filenames
*/
private static String stripHTMLMedia(String s) {
Matcher imgMatcher = imgPattern.matcher(s);
return stripHTML(imgMatcher.replaceAll(" $1 "));
}
private static String stripHTML(String s) {
Matcher htmlMatcher = stylePattern.matcher(s);
s = htmlMatcher.replaceAll("");
htmlMatcher = scriptPattern.matcher(s);
s = htmlMatcher.replaceAll("");
htmlMatcher = tagPattern.matcher(s);
s = htmlMatcher.replaceAll("");
return entsToTxt(s);
}
/**
* Takes a string and replaces all the HTML symbols in it with their unescaped representation.
* This should only affect substrings of the form &something; and not tags.
* Internet rumour says that Html.fromHtml() doesn't cover all cases, but it doesn't get less
* vague than that.
* @param html The HTML escaped text
* @return The text with its HTML entities unescaped.
*/
private static String entsToTxt(String html) {
// entitydefs defines nbsp as \xa0 instead of a standard space, so we
// replace it first
html = html.replace(" ", " ");
Matcher htmlEntities = htmlEntitiesPattern.matcher(html);
StringBuffer sb = new StringBuffer();
while (htmlEntities.find()) {
htmlEntities.appendReplacement(sb, Html.fromHtml(htmlEntities.group()).toString());
}
htmlEntities.appendTail(sb);
return sb.toString();
}
}