package com.nostra13.socialsharing.twitter.extpack.winterwell.jtwitter;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Field;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Comparator;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.nostra13.socialsharing.twitter.extpack.winterwell.json.JSONException;
import com.nostra13.socialsharing.twitter.extpack.winterwell.json.JSONObject;
/**
* Utility methods used in Twitter. This class is public in case anyone else
* wants to use these methods. WARNING: they don't really form part of the
* JTwitter API, and may be changed or reorganised in future versions.
* <p>
* NB: Some of these are copies (sometimes simplified) of methods in
* winterwell.utils.Utils
*
* @author daniel
* @testedby {@link InternalUtilsTest}
*/
public class InternalUtils {
public static String stripUrls(String text) {
return URL_REGEX.matcher(text).replaceAll("");
}
public static final Pattern TAG_REGEX = Pattern.compile("<!?/?[\\[\\-a-zA-Z][^>]*>", Pattern.DOTALL);
static final DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
/**
* The date format used by Marko from Marakana. This is needed for *some*
* installs of Status.Net, though not for Identi.ca.
*/
static final DateFormat dfMarko = new SimpleDateFormat(
"EEE MMM dd HH:mm:ss ZZZZZ yyyy");
/**
* Matches latitude, longitude, including with the UberTwitter UT: prefix
* Group 2 = latitude, Group 3 = longitude.
* <p>
* Weird: I saw this as an address - "ÜT: 25.324488,55.376224t" Is it just a
* one-off typo? Should we match N/S/E/W markers?
*/
public static final Pattern latLongLocn = Pattern
.compile("(\\S+:)?\\s*(-?[\\d\\.]+)\\s*,\\s*(-?[\\d\\.]+)");
static final Comparator<Status> NEWEST_FIRST = new Comparator<Status>() {
@Override
public int compare(Status o1, Status o2) {
return -o1.id.compareTo(o2.id);
}
};
public static final Pattern REGEX_JUST_DIGITS = Pattern.compile("\\d+");
/**
* Matches urls. Note: Excludes any trailing .
*
* @testedy {@link WebUtilsTest#testUrlRegex()}
*/
static final Pattern URL_REGEX = Pattern
.compile("[hf]tt?ps?://[a-zA-Z0-9_%\\-\\.,\\?&\\/=\\+'~#!\\*:]+[a-zA-Z0-9_%\\-&\\/=\\+]");
static ConcurrentHashMap<String, Long> usage;
/**
* Create a map from a list of key, value pairs. An easy way to make small
* maps, basically the equivalent of {@link Arrays#asList(Object...)}. If
* the value is null, the key will not be included.
*/
@SuppressWarnings("unchecked")
public static Map asMap(Object... keyValuePairs) {
assert keyValuePairs.length % 2 == 0;
Map m = new HashMap(keyValuePairs.length / 2);
for (int i = 0; i < keyValuePairs.length; i += 2) {
Object v = keyValuePairs[i + 1];
if (v == null) {
continue;
}
m.put(keyValuePairs[i], v);
}
return m;
}
public static void close(Closeable strm) {
try {
strm.close();
} catch (IOException e) {
// ignore
}
}
/**
* Count API usage for api usage stats.
*
* @param url
*/
static void count(String url) {
if (usage == null)
return;
// ignore parameters
int i = url.indexOf("?");
if (i != -1) {
url = url.substring(0, i);
}
// for clarity
i = url.indexOf("/1/");
if (i != -1) {
url = url.substring(i + 3);
}
// some calls - eg statuses/show - include the tweet id
url = url.replaceAll("\\d+", "");
// non-blocking (we could just ignore the race condition I suppose)
for (int j = 0; j < 100; j++) { // give up if you lose >100 races
Long v = usage.get(url);
boolean done;
if (v == null) {
Long old = usage.putIfAbsent(url, 1L);
done = old == null;
} else {
long nv = v + 1;
done = usage.replace(url, v, nv);
}
if (done) {
break;
}
}
}
static String encode(Object x) {
String encd;
try {
encd = URLEncoder.encode(String.valueOf(x), "UTF-8");
} catch (UnsupportedEncodingException e) {
// This shouldn't happen as UTF-8 is standard
encd = URLEncoder.encode(String.valueOf(x));
}
return encd.replace("+", "%20");
}
/**
* @return a map of API endpoint to count-of-calls. null if switched off
* (which is the default).
*
* @see #setTrackAPIUsage(boolean)
*/
static public ConcurrentHashMap<String, Long> getAPIUsageStats() {
return usage;
}
/**
* Convenience method for making Dates. Because Date is a tricksy bugger of
* a class.
*
* @param year
* @param month
* @param day
* @return date object
*/
public static Date getDate(int year, String month, int day) {
try {
Field field = GregorianCalendar.class.getField(month.toUpperCase());
int m = field.getInt(null);
Calendar date = new GregorianCalendar(year, m, day);
return date.getTime();
} catch (Exception x) {
throw new IllegalArgumentException(x.getMessage());
}
}
static Boolean getOptBoolean(JSONObject obj, String key)
throws JSONException {
Object o = obj.opt(key);
if (o == null || o.equals(JSONObject.NULL))
return null;
if (o.equals(Boolean.FALSE)
|| (o instanceof String && ((String) o)
.equalsIgnoreCase("false")))
return false;
else if (o.equals(Boolean.TRUE)
|| (o instanceof String && ((String) o)
.equalsIgnoreCase("true")))
return true;
throw new JSONException(o + " (" + key + ") is not boolean");
}
/**
* Join a slice of the list
*
* @param screenNamesOrIds
* @param first
* Inclusive
* @param last
* Exclusive. Can be > list.size (will be truncated).
* @return
*/
static String join(List screenNamesOrIds, int first, int last) {
StringBuilder names = new StringBuilder();
for (int si = first, n = Math.min(last, screenNamesOrIds.size()); si < n; si++) {
names.append(screenNamesOrIds.get(si));
names.append(",");
}
// pop the final ","
if (names.length() != 0) {
names.delete(names.length() - 1, names.length());
}
return names.toString();
}
/**
* Join the list
*
* @param screenNames
* @return
*/
public static String join(String[] screenNames) {
StringBuilder names = new StringBuilder();
for (int si = 0, n = screenNames.length; si < n; si++) {
names.append(screenNames[si]);
names.append(",");
}
// pop the final ","
if (names.length() != 0) {
names.delete(names.length() - 1, names.length());
}
return names.toString();
}
/**
* Helper method to deal with JSON-in-Java weirdness
*
* @return Can be null
* */
protected static String jsonGet(String key, JSONObject jsonObj) {
assert key != null : jsonObj;
assert jsonObj != null;
Object val = jsonObj.opt(key);
if (val == null)
return null;
if (JSONObject.NULL.equals(val))
return null;
String s = val.toString();
return s;
}
static Date parseDate(String c) {
if (InternalUtils.REGEX_JUST_DIGITS.matcher(c).matches())
return new Date(Long.valueOf(c));
try {
Date _createdAt = new Date(c);
return _createdAt;
} catch (Exception e) { // Bug reported by Marakana with *some*
// Status.Net sites
try {
Date _createdAt = InternalUtils.dfMarko.parse(c);
return _createdAt;
} catch (ParseException e1) {
throw new TwitterException.Parsing(c, e1);
}
}
}
/**
* @param on
* true to activate {@link #getAPIUsageStats()}. false to switch
* stats off. false by default
*/
static public void setTrackAPIUsage(boolean on) {
if (!on) {
usage = null;
return;
}
if (usage != null)
return;
usage = new ConcurrentHashMap<String, Long>();
}
/**
* Use a bufferred reader (preferably UTF-8) to extract the contents of the
* given stream. A convenience method for
* {@link InternalUtils#toString(Reader)}.
*/
protected static String toString(InputStream inputStream) {
InputStreamReader reader;
try {
reader = new InputStreamReader(inputStream, "UTF-8");
} catch (UnsupportedEncodingException e) {
reader = new InputStreamReader(inputStream);
}
return InternalUtils.toString(reader);
}
/**
* Use a buffered reader to extract the contents of the given reader.
*
* @param reader
* @return The contents of this reader.
*/
static String toString(Reader reader) throws RuntimeException {
try {
// Buffer if not already buffered
reader = reader instanceof BufferedReader ? (BufferedReader) reader
: new BufferedReader(reader);
StringBuilder output = new StringBuilder();
while (true) {
int c = reader.read();
if (c == -1) {
break;
}
output.append((char) c);
}
return output.toString();
} catch (IOException ex) {
throw new RuntimeException(ex);
} finally {
URLConnectionHttpClient.close(reader);
}
}
/**
* Twitter html encodes some entities: ", ', <, >, &
*
* @param text
* Can be null (which returns null)
* @return normal-ish text
*/
static String unencode(String text) {
if (text == null)
return null;
// TODO use Jakarta to handle all html entities?
text = text.replace(""", "\"");
text = text.replace("'", "'");
text = text.replace(" ", " ");
text = text.replace("&", "&");
text = text.replace(">", ">");
text = text.replace("<", "<");
// zero-byte chars are a rare but annoying occurrence
if (text.indexOf(0) != -1) {
text = text.replace((char) 0, ' ').trim();
}
// if (Pattern.compile("&\\w+;").matcher(text).find()) {
// System.out.print(text);
// }
return text;
}
/**
* Convert to a URI, or return null if this is badly formatted
*/
static URI URI(String uri) {
try {
return new URI(uri);
} catch (URISyntaxException e) {
return null; // Bad syntax
}
}
static User user(String json) {
try {
JSONObject obj = new JSONObject(json);
User u = new User(obj, null);
return u;
} catch (JSONException e) {
throw new TwitterException(e);
}
}
/**
* Remove xml and html tags, e.g. to safeguard against javascript
* injection attacks, or to get plain text for NLP.
* @param xml can be null, in which case null will be returned
* @return the text contents - ie input with all tags removed
*/
public static String stripTags(String xml) {
if (xml==null) return null;
// short cut if there are no tags
if (xml.indexOf('<')==-1) return xml;
// first all the scripts (cos we must remove the tag contents too)
Matcher m4 = pScriptOrStyle.matcher(xml);
xml = m4.replaceAll("");
// comments
Matcher m2 = pComment.matcher(xml);
String txt = m2.replaceAll("");
// now the tags
Matcher m = TAG_REGEX.matcher(txt);
String txt2 = m.replaceAll("");
Matcher m3 = pDocType.matcher(txt2);
String txt3 = m3.replaceAll("");
return txt3;
}
/**
* Matches an xml comment - including some bad versions
*/
public static final Pattern pComment = Pattern.compile("<!-*.*?-+>", Pattern.DOTALL);
/**
* Used in strip tags to get rid of scripts and css style blocks altogether.
*/
public static final Pattern pScriptOrStyle = Pattern.compile("<(script|style)[^<>]*>.+?</(script|style)>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
/**
* Matches a doctype element.
*/
public static final Pattern pDocType = Pattern.compile("<!DOCTYPE.*?>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
}