package humanize.emoji;
import humanize.emoji.EmojiChar.Vendor;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.io.CharStreams;
import com.google.common.io.LineProcessor;
/**
* Unified Emoji for Java.
*
* @see Unicode Emoji (working draft)
* http://www.unicode.org/reports/tr51/tr51-1d.html
*/
public final class Emoji
{
private static final Charset UTF8 = Charset.forName("UTF8");
private static final String DB_EMOJI_DATA = "/db/emoji-data.txt";
private static final String DB_EMOJI_SOURCES = "/db/emoji-sources.txt";
private static final List<EmojiChar> EMOJI_CHARS = new ArrayList<EmojiChar>();
private static final Map<String, EmojiChar> HEX_INDEX = new HashMap<String, EmojiChar>();
private static final Map<String, EmojiChar> RAW_INDEX = new HashMap<String, EmojiChar>();
private static final Map<VendorKey, EmojiChar> VENDORS_INDEX = new HashMap<VendorKey, EmojiChar>();
private static final Multimap<String, EmojiChar> ANNOTATIONS_INDEX = ArrayListMultimap.create();
/**
* Transforms a list of Unicode code points, as hex strings, into a proper
* encoded string.
*
* @param points
* The list of Unicode code point as a hex strings
* @return the concatenation of the proper encoded string for the given
* points
* @see Emoji#codePointToString(String)
*/
public static String codePointsToString(String... points)
{
StringBuilder ret = new StringBuilder();
for (String hexPoint : points)
{
ret.append(codePointToString(hexPoint));
}
return ret.toString();
}
/**
* Transforms an Unicode code point, given as a hex string, into a proper
* encoded string. Supplementary code points are encoded in UTF-16 as
* required by Java.
*
* @param point
* The Unicode code point as a hex string
* @return the proper encoded string reification of a given point
*/
public static String codePointToString(String point)
{
String ret;
if (Strings.isNullOrEmpty(point))
{
return point;
}
int unicodeScalar = Integer.parseInt(point, 16);
if (Character.isSupplementaryCodePoint(unicodeScalar))
{
ret = String.valueOf(Character.toChars(unicodeScalar));
} else
{
ret = String.valueOf((char) unicodeScalar);
}
return ret;
}
/**
* Finds emoji characters for the given annotations.
*
* @param annotations
* The list of annotations separated by spaces
* @return or an empty list if there is no match
*/
public static List<EmojiChar> findByAnnotations(String annotations)
{
return getInstance()._findByAnnotations(annotations);
}
/**
* Finds an emoji character by Unicode code point.
*
* @param code
* the Unicode code point
* @return the corresponding emoji character or null if not found
*/
public static EmojiChar findByCodePoint(String code)
{
return getInstance()._findByCodePoint(code);
}
/**
* Finds an emoji character by hexadecimal code.
*
* @param hex
* the hexadecimal code
* @return the corresponding emoji character or null if not found
*/
public static EmojiChar findByHexCode(String hex)
{
return getInstance()._findByHexCode(hex.toUpperCase());
}
/**
* Finds an emoji character by vendor code point.
*
* @param vendor
* the vendor
* @param point
* the raw character for the code point in the vendor space
* @return the corresponding emoji character or null if not found
*/
public static EmojiChar findByVendorCodePoint(Vendor vendor, String point)
{
Emoji emoji = Emoji.getInstance();
return emoji._findByVendorCodePoint(vendor, point);
}
/**
* Finds a single emoji character for the given annotations.
*
* @param annotations
* The list of annotations separated by spaces
* @return a matching emoji character or null if none found
*/
public static EmojiChar singleByAnnotations(String annotations)
{
return getInstance()._singleByAnnotations(annotations);
}
private static Emoji getInstance()
{
return LazyHolder.INSTANCE;
}
private Emoji()
{
try
{
loadData();
} catch (IOException e)
{
throw new RuntimeException(e);
}
}
private List<EmojiChar> _findByAnnotations(String annotations)
{
Collection<EmojiChar> found = new HashSet<EmojiChar>();
Collection<String> parts = Arrays.asList(Strings.nullToEmpty(annotations).split("\\s+"));
for (String annotation : parts)
{
collectAnnotations(found, parts, annotation);
}
return found.isEmpty() ?
Collections.<EmojiChar> emptyList() : asSortedList(found);
}
private EmojiChar _findByCodePoint(String code)
{
return RAW_INDEX.get(code);
}
private EmojiChar _findByHexCode(String hex)
{
return HEX_INDEX.get(hex);
}
private EmojiChar _findByVendorCodePoint(Vendor vendor, String code)
{
return VENDORS_INDEX.get(new VendorKey(vendor, code));
}
private EmojiChar _singleByAnnotations(String annotations)
{
List<EmojiChar> found = _findByAnnotations(annotations);
return found.isEmpty() ? null : found.iterator().next();
}
private <T extends Comparable<? super T>> List<T> asSortedList(Collection<T> c)
{
List<T> list = new ArrayList<T>(c);
Collections.sort(list);
return list;
}
private void collectAnnotations(Collection<EmojiChar> found,
Collection<String> parts,
String annotation)
{
if (!ANNOTATIONS_INDEX.containsKey(annotation))
return;
Collection<EmojiChar> echars = ANNOTATIONS_INDEX.get(annotation);
for (EmojiChar echar : echars)
{
if (echar.hasAnnotations(parts))
{
found.add(echar);
}
}
}
private StreamLineProcessor emojiDataProcessor()
{
return new StreamLineProcessor()
{
@Override
protected void consumeLine(String line)
{
String[] row = line.split(";");
String code = extractCode(trim(row[0]));
String defaultStyle = trim(row[1]);
int ordering = Integer.parseInt(trim(row[2]));
List<String> annotations = extractList(trim(row[3]));
String[] rest = parseRemaining(trim(row[4]));
String sources = rest[0];
String version = rest[1];
String raw = rest[2];
String name = rest[3];
EmojiChar ec = new EmojiChar(code, defaultStyle,
ordering, annotations, sources,
version, raw, name);
EMOJI_CHARS.add(ec);
index(ec);
}
};
}
private StreamLineProcessor emojiSourcesProcessor()
{
return new StreamLineProcessor()
{
@Override
protected void consumeLine(String line)
{
String[] row = line.split(";");
String unified = trim(row[0]);
String unicode = codePointsToString(unified.split(" "));
EmojiChar echar = _findByCodePoint(unicode);
if (echar != null)
{
map(echar, Vendor.DOCOMO, row, 1);
map(echar, Vendor.KDDI, row, 2);
map(echar, Vendor.SOFT_BANK, row, 3);
}
}
private void map(EmojiChar echar, Vendor vendor, String[] row, int index)
{
if (row.length <= index)
{
return;
}
String code = trim(row[index]);
if (!Strings.isNullOrEmpty(code))
{
String raw = codePointToString(code);
echar.map(vendor, code, raw);
VENDORS_INDEX.put(new VendorKey(vendor, raw), echar);
}
}
};
}
private String extractCode(String str)
{
return str.replaceAll("U\\+", "");
}
private List<String> extractList(String list)
{
String[] tmp = list.split(",");
List<String> clean = new ArrayList<String>();
for (String s : tmp)
{
clean.add(s.trim());
}
return clean;
}
private void index(EmojiChar echar)
{
// Index by code points (raw characters)
RAW_INDEX.put(echar.getRaw(), echar);
// Index by hex code
HEX_INDEX.put(echar.getCode(), echar);
// Index by annotations
for (String annotation : echar.getAnnotations())
{
ANNOTATIONS_INDEX.put(annotation, echar);
}
}
private void load(String path, LineProcessor<Void> processor) throws IOException
{
InputStream in = Emoji.class.getResourceAsStream(path);
Preconditions.checkNotNull(in, "%s not found in the classpath!", path);
InputStreamReader isr = new InputStreamReader(in, UTF8);
BufferedReader br = new BufferedReader(isr);
CharStreams.readLines(br, processor);
}
private void loadData() throws IOException
{
load(DB_EMOJI_DATA, emojiDataProcessor());
load(DB_EMOJI_SOURCES, emojiSourcesProcessor());
}
private String[] parseRemaining(String in)
{
String[] res = new String[4];
String[] fp = in.split("#", 2);
// sources
res[0] = trim(fp[0]);
// v.g. 'V1.1 (☻) black smiling face'
Pattern expr = Pattern.compile("(V\\d+\\.\\d+)\\s\\((.+)\\)\\s(.+)");
Matcher matcher = expr.matcher(trim(fp[1]));
if (matcher.matches())
{
// version
res[1] = matcher.group(1);
// char
res[2] = matcher.group(2);
// name
res[3] = matcher.group(3);
} else
{
throw new RuntimeException("Error loading: " + in);
}
return res;
}
private String trim(String str)
{
if (Strings.isNullOrEmpty(str))
return str;
return str.replaceAll("\\s+", " ").trim();
}
private static class LazyHolder
{
private static final Emoji INSTANCE = new Emoji();
}
private abstract class StreamLineProcessor implements LineProcessor<Void>
{
@Override
public Void getResult()
{
return null;
}
public boolean processLine(String line) throws IOException
{
if (Strings.isNullOrEmpty(line) || line.indexOf('#') == 0)
{
return true;
}
consumeLine(line);
return true;
}
abstract protected void consumeLine(String line);;
}
private static class VendorKey
{
private final Vendor vendor;
private final String code;
public VendorKey(Vendor vendor, String code)
{
this.vendor = vendor;
this.code = code;
}
@Override
public boolean equals(Object obj)
{
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
VendorKey other = (VendorKey) obj;
return Objects.equal(vendor, other.vendor)
&& Objects.equal(code, other.code);
}
@Override
public int hashCode()
{
return Objects.hashCode(vendor, code);
}
}
}