/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package android.syncml.pim.vcard; import android.syncml.pim.VBuilder; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.regex.Pattern; /** * This class is used to parse vcard. Please refer to vCard Specification 2.1 */ public class VCardParser_V21 { /** Store the known-type */ private static final HashSet<String> sKnownTypeSet = new HashSet<String>( Arrays.asList("DOM", "INTL", "POSTAL", "PARCEL", "HOME", "WORK", "PREF", "VOICE", "FAX", "MSG", "CELL", "PAGER", "BBS", "MODEM", "CAR", "ISDN", "VIDEO", "AOL", "APPLELINK", "ATTMAIL", "CIS", "EWORLD", "INTERNET", "IBMMAIL", "MCIMAIL", "POWERSHARE", "PRODIGY", "TLX", "X400", "GIF", "CGM", "WMF", "BMP", "MET", "PMB", "DIB", "PICT", "TIFF", "PDF", "PS", "JPEG", "QTIME", "MPEG", "MPEG2", "AVI", "WAVE", "AIFF", "PCM", "X509", "PGP")); /** Store the known-value */ private static final HashSet<String> sKnownValueSet = new HashSet<String>( Arrays.asList("INLINE", "URL", "CONTENT-ID", "CID")); /** Store the property name available in vCard 2.1 */ // NICKNAME is not supported in vCard 2.1, but some vCard may contain. private static final HashSet<String> sAvailablePropertyNameV21 = new HashSet<String>(Arrays.asList( "LOGO", "PHOTO", "LABEL", "FN", "TITLE", "SOUND", "VERSION", "TEL", "EMAIL", "TZ", "GEO", "NOTE", "URL", "BDAY", "ROLE", "REV", "UID", "KEY", "MAILER", "NICKNAME")); // Though vCard 2.1 specification does not allow "B" encoding, some data may have it. // We allow it for safety... private static final HashSet<String> sAvailableEncodingV21 = new HashSet<String>(Arrays.asList( "7BIT", "8BIT", "QUOTED-PRINTABLE", "BASE64", "B")); // Used only for parsing END:VCARD. private String mPreviousLine; /** The builder to build parsed data */ protected VBuilder mBuilder = null; /** The encoding type */ protected String mEncoding = null; protected final String sDefaultEncoding = "8BIT"; // Should not directly read a line from this. Use getLine() instead. protected BufferedReader mReader; /** * Create a new VCard parser. */ public VCardParser_V21() { super(); } /** * Parse the file at the given position * vcard_file = [wsls] vcard [wsls] */ protected void parseVCardFile() throws IOException, VCardException { while (parseOneVCard()) { } } protected String getVersion() { return "2.1"; } /** * @return true when the propertyName is a valid property name. */ protected boolean isValidPropertyName(String propertyName) { return sAvailablePropertyNameV21.contains(propertyName.toUpperCase()); } /** * @return true when the encoding is a valid encoding. */ protected boolean isValidEncoding(String encoding) { return sAvailableEncodingV21.contains(encoding.toUpperCase()); } /** * @return String. It may be null, or its length may be 0 * @throws IOException */ protected String getLine() throws IOException { return mReader.readLine(); } /** * @return String with it's length > 0 * @throws IOException * @throws VCardException when the stream reached end of line */ protected String getNonEmptyLine() throws IOException, VCardException { String line; while (true) { line = getLine(); if (line == null) { throw new VCardException("Reached end of buffer."); } else if (line.trim().length() > 0) { return line; } } } /** * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF * items *CRLF * "END" [ws] ":" [ws] "VCARD" */ private boolean parseOneVCard() throws IOException, VCardException { if (!readBeginVCard()) { return false; } parseItems(); readEndVCard(); return true; } /** * @return True when successful. False when reaching the end of line * @throws IOException * @throws VCardException */ protected boolean readBeginVCard() throws IOException, VCardException { String line; while (true) { line = getLine(); if (line == null) { return false; } else if (line.trim().length() > 0) { break; } } String[] strArray = line.split(":", 2); // Though vCard specification does not allow lower cases, // some data may have them, so we allow it. if (!(strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") && strArray[1].trim().equalsIgnoreCase("VCARD"))) { throw new VCardException("BEGIN:VCARD != \"" + line + "\""); } if (mBuilder != null) { mBuilder.startRecord("VCARD"); } return true; } protected void readEndVCard() throws VCardException { // Though vCard specification does not allow lower cases, // some data may have them, so we allow it. String[] strArray = mPreviousLine.split(":", 2); if (!(strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END") && strArray[1].trim().equalsIgnoreCase("VCARD"))) { throw new VCardException("END:VCARD != \"" + mPreviousLine + "\""); } if (mBuilder != null) { mBuilder.endRecord(); } } /** * items = *CRLF item * / item */ protected void parseItems() throws IOException, VCardException { /* items *CRLF item / item */ boolean ended = false; if (mBuilder != null) { mBuilder.startProperty(); } try { ended = parseItem(); } finally { if (mBuilder != null) { mBuilder.endProperty(); } } while (!ended) { // follow VCARD ,it wont reach endProperty if (mBuilder != null) { mBuilder.startProperty(); } try { ended = parseItem(); } finally { if (mBuilder != null) { mBuilder.endProperty(); } } } } /** * item = [groups "."] name [params] ":" value CRLF * / [groups "."] "ADR" [params] ":" addressparts CRLF * / [groups "."] "ORG" [params] ":" orgparts CRLF * / [groups "."] "N" [params] ":" nameparts CRLF * / [groups "."] "AGENT" [params] ":" vcard CRLF */ protected boolean parseItem() throws IOException, VCardException { mEncoding = sDefaultEncoding; // params = ";" [ws] paramlist String line = getNonEmptyLine(); String[] strArray = line.split(":", 2); if (strArray.length < 2) { throw new VCardException("Invalid line(\":\" does not exist): " + line); } String propertyValue = strArray[1]; String[] groupNameParamsArray = strArray[0].split(";"); String groupAndName = groupNameParamsArray[0].trim(); String[] groupNameArray = groupAndName.split("\\."); int length = groupNameArray.length; String propertyName = groupNameArray[length - 1]; if (mBuilder != null) { mBuilder.propertyName(propertyName); for (int i = 0; i < length - 1; i++) { mBuilder.propertyGroup(groupNameArray[i]); } } if (propertyName.equalsIgnoreCase("END")) { mPreviousLine = line; return true; } length = groupNameParamsArray.length; for (int i = 1; i < length; i++) { handleParams(groupNameParamsArray[i]); } if (isValidPropertyName(propertyName) || propertyName.startsWith("X-")) { if (propertyName.equals("VERSION") && !propertyValue.equals(getVersion())) { throw new VCardVersionException("Incompatible version: " + propertyValue + " != " + getVersion()); } handlePropertyValue(propertyName, propertyValue); return false; } else if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) { handleMultiplePropertyValue(propertyName, propertyValue); return false; } else if (propertyName.equals("AGENT")) { handleAgent(propertyValue); return false; } throw new VCardException("Unknown property name: \"" + propertyName + "\""); } /** * params = ";" [ws] paramlist * paramlist = paramlist [ws] ";" [ws] param * / param * param = "TYPE" [ws] "=" [ws] ptypeval * / "VALUE" [ws] "=" [ws] pvalueval * / "ENCODING" [ws] "=" [ws] pencodingval * / "CHARSET" [ws] "=" [ws] charsetval * / "LANGUAGE" [ws] "=" [ws] langval * / "X-" word [ws] "=" [ws] word * / knowntype */ protected void handleParams(String params) throws VCardException { String[] strArray = params.split("=", 2); if (strArray.length == 2) { String paramName = strArray[0].trim(); String paramValue = strArray[1].trim(); if (paramName.equals("TYPE")) { handleType(paramValue); } else if (paramName.equals("VALUE")) { handleValue(paramValue); } else if (paramName.equals("ENCODING")) { handleEncoding(paramValue); } else if (paramName.equals("CHARSET")) { handleCharset(paramValue); } else if (paramName.equals("LANGUAGE")) { handleLanguage(paramValue); } else if (paramName.startsWith("X-")) { handleAnyParam(paramName, paramValue); } else { throw new VCardException("Unknown type \"" + paramName + "\""); } } else { handleType(strArray[0]); } } /** * typeval = knowntype / "X-" word */ protected void handleType(String ptypeval) throws VCardException { if (sKnownTypeSet.contains(ptypeval.toUpperCase()) || ptypeval.startsWith("X-")) { if (mBuilder != null) { mBuilder.propertyParamType("TYPE"); mBuilder.propertyParamValue(ptypeval.toUpperCase()); } } else { throw new VCardException("Unknown type: \"" + ptypeval + "\""); } } /** * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word */ protected void handleValue(String pvalueval) throws VCardException { if (sKnownValueSet.contains(pvalueval.toUpperCase()) || pvalueval.startsWith("X-")) { if (mBuilder != null) { mBuilder.propertyParamType("VALUE"); mBuilder.propertyParamValue(pvalueval); } } else { throw new VCardException("Unknown value \"" + pvalueval + "\""); } } /** * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word */ protected void handleEncoding(String pencodingval) throws VCardException { if (isValidEncoding(pencodingval) || pencodingval.startsWith("X-")) { if (mBuilder != null) { mBuilder.propertyParamType("ENCODING"); mBuilder.propertyParamValue(pencodingval); } mEncoding = pencodingval; } else { throw new VCardException("Unknown encoding \"" + pencodingval + "\""); } } /** * vCard specification only allows us-ascii and iso-8859-xxx (See RFC 1521), * but some vCard contains other charset, so we allow them. */ protected void handleCharset(String charsetval) { if (mBuilder != null) { mBuilder.propertyParamType("CHARSET"); mBuilder.propertyParamValue(charsetval); } } /** * See also Section 7.1 of RFC 1521 */ protected void handleLanguage(String langval) throws VCardException { String[] strArray = langval.split("-"); if (strArray.length != 2) { throw new VCardException("Invalid Language: \"" + langval + "\""); } String tmp = strArray[0]; int length = tmp.length(); for (int i = 0; i < length; i++) { if (!isLetter(tmp.charAt(i))) { throw new VCardException("Invalid Language: \"" + langval + "\""); } } tmp = strArray[1]; length = tmp.length(); for (int i = 0; i < length; i++) { if (!isLetter(tmp.charAt(i))) { throw new VCardException("Invalid Language: \"" + langval + "\""); } } if (mBuilder != null) { mBuilder.propertyParamType("LANGUAGE"); mBuilder.propertyParamValue(langval); } } /** * Mainly for "X-" type. This accepts any kind of type without check. */ protected void handleAnyParam(String paramName, String paramValue) { if (mBuilder != null) { mBuilder.propertyParamType(paramName); mBuilder.propertyParamValue(paramValue); } } protected void handlePropertyValue( String propertyName, String propertyValue) throws IOException, VCardException { if (mEncoding == null || mEncoding.equalsIgnoreCase("7BIT") || mEncoding.equalsIgnoreCase("8BIT") || mEncoding.toUpperCase().startsWith("X-")) { if (mBuilder != null) { ArrayList<String> v = new ArrayList<String>(); v.add(maybeUnescapeText(propertyValue)); mBuilder.propertyValues(v); } } else if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { String result = getQuotedPrintable(propertyValue); if (mBuilder != null) { ArrayList<String> v = new ArrayList<String>(); v.add(result); mBuilder.propertyValues(v); } } else if (mEncoding.equalsIgnoreCase("BASE64") || mEncoding.equalsIgnoreCase("B")) { String result = getBase64(propertyValue); if (mBuilder != null) { ArrayList<String> v = new ArrayList<String>(); v.add(result); mBuilder.propertyValues(v); } } else { throw new VCardException("Unknown encoding: \"" + mEncoding + "\""); } } protected String getQuotedPrintable(String firstString) throws IOException, VCardException { // Specifically, there may be some padding between = and CRLF. // See the following: // // qp-line := *(qp-segment transport-padding CRLF) // qp-part transport-padding // qp-segment := qp-section *(SPACE / TAB) "=" // ; Maximum length of 76 characters // // e.g. (from RFC 2045) // Now's the time = // for all folk to come= // to the aid of their country. if (firstString.trim().endsWith("=")) { // remove "transport-padding" int pos = firstString.length() - 1; while(firstString.charAt(pos) != '=') { } StringBuilder builder = new StringBuilder(); builder.append(firstString.substring(0, pos + 1)); builder.append("\r\n"); String line; while (true) { line = getLine(); if (line == null) { throw new VCardException( "File ended during parsing quoted-printable String"); } if (line.trim().endsWith("=")) { // remove "transport-padding" pos = line.length() - 1; while(line.charAt(pos) != '=') { } builder.append(line.substring(0, pos + 1)); builder.append("\r\n"); } else { builder.append(line); break; } } return builder.toString(); } else { return firstString; } } protected String getBase64(String firstString) throws IOException, VCardException { StringBuilder builder = new StringBuilder(); builder.append(firstString); while (true) { String line = getLine(); if (line == null) { throw new VCardException( "File ended during parsing BASE64 binary"); } if (line.length() == 0) { break; } builder.append(line); } return builder.toString(); } /** * Mainly for "ADR", "ORG", and "N" * We do not care the number of strnosemi here. * * addressparts = 0*6(strnosemi ";") strnosemi * ; PO Box, Extended Addr, Street, Locality, Region, * Postal Code, Country Name * orgparts = *(strnosemi ";") strnosemi * ; First is Organization Name, * remainder are Organization Units. * nameparts = 0*4(strnosemi ";") strnosemi * ; Family, Given, Middle, Prefix, Suffix. * ; Example:Public;John;Q.;Reverend Dr.;III, Esq. * strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi * ; To include a semicolon in this string, it must be escaped * ; with a "\" character. * * We are not sure whether we should add "\" CRLF to each value. * For now, we exclude them. */ protected void handleMultiplePropertyValue( String propertyName, String propertyValue) throws IOException, VCardException { // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some data have it. if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { propertyValue = getQuotedPrintable(propertyValue); } if (propertyValue.endsWith("\\")) { StringBuilder builder = new StringBuilder(); // builder.append(propertyValue); builder.append(propertyValue.substring(0, propertyValue.length() - 1)); try { String line; while (true) { line = getNonEmptyLine(); // builder.append("\r\n"); // builder.append(line); if (!line.endsWith("\\")) { builder.append(line); break; } else { builder.append(line.substring(0, line.length() - 1)); } } } catch (IOException e) { throw new VCardException( "IOException is throw during reading propertyValue" + e); } // Now, propertyValue may contain "\r\n" propertyValue = builder.toString(); } if (mBuilder != null) { // In String#replaceAll() and Pattern class, "\\\\" means single slash. final String IMPOSSIBLE_STRING = "\0"; // First replace two backslashes with impossible strings. propertyValue = propertyValue.replaceAll("\\\\\\\\", IMPOSSIBLE_STRING); // Now, split propertyValue with ; whose previous char is not back slash. Pattern pattern = Pattern.compile("(?<!\\\\);"); // TODO: limit should be set in accordance with propertyName? String[] strArray = pattern.split(propertyValue, -1); ArrayList<String> arrayList = new ArrayList<String>(); for (String str : strArray) { // Replace impossible strings with original two backslashes arrayList.add( unescapeText(str.replaceAll(IMPOSSIBLE_STRING, "\\\\\\\\"))); } mBuilder.propertyValues(arrayList); } } /** * vCard 2.1 specifies AGENT allows one vcard entry. It is not encoded at all. */ protected void handleAgent(String propertyValue) throws IOException, VCardException { String[] strArray = propertyValue.split(":", 2); if (!(strArray.length == 2 || strArray[0].trim().equalsIgnoreCase("BEGIN") && strArray[1].trim().equalsIgnoreCase("VCARD"))) { throw new VCardException("BEGIN:VCARD != \"" + propertyValue + "\""); } parseItems(); readEndVCard(); } /** * For vCard 3.0. */ protected String maybeUnescapeText(String text) { return text; } /** * Convert escaped text into unescaped text. */ protected String unescapeText(String text) { // Original vCard 2.1 specification does not allow transformation // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous implementation of // this class allowed them, so keep it as is. // In String#replaceAll(), "\\\\" means single slash. return text.replaceAll("\\\\;", ";") .replaceAll("\\\\:", ":") .replaceAll("\\\\,", ",") .replaceAll("\\\\\\\\", "\\\\"); } /** * Parse the given stream and constructs VCardDataBuilder object. * Note that vCard 2.1 specification allows "CHARSET" parameter, and some career sets * local encoding to it. For example, Japanese phone career uses Shift_JIS, which * is not formally allowed in vCard specification. * As a result, there is a case where the encoding given here does not do well with * the "CHARSET". * * In order to avoid such cases, It may be fine to use "ISO-8859-1" as an encoding, * and to encode each localized String afterward. * * RFC 2426 "recommends" (not forces) to use UTF-8, so it may be OK to use * UTF-8 as an encoding when parsing vCard 3.0. But note that some Japanese * phone uses Shift_JIS as a charset (e.g. W61SH), and another uses * "CHARSET=SHIFT_JIS", which is explicitly prohibited in vCard 3.0 specification * (e.g. W53K). * * @param is * The source to parse. * @param charset * The charset. * @param builder * The v builder which used to construct data. * @return Return true for success, otherwise false. * @throws IOException */ public boolean parse(InputStream is, String charset, VBuilder builder) throws IOException, VCardException { // TODO: If we really need to allow only CRLF as line break, // we will have to develop our own BufferedReader(). mReader = new BufferedReader(new InputStreamReader(is, charset)); mBuilder = builder; if (mBuilder != null) { mBuilder.start(); } parseVCardFile(); if (mBuilder != null) { mBuilder.end(); } return true; } private boolean isLetter(char ch) { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { return true; } return false; } }