/* * Copyright (C) 2007 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package android.syncml.pim; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; /** * This interface is used to parse the V format files, such as VCard & VCal * */ abstract public class VParser { /** * The buffer used to store input stream */ protected String mBuffer = null; /** The builder to build parsed data */ protected VBuilder mBuilder = null; /** The encoding type */ protected String mEncoding = null; protected final int PARSE_ERROR = -1; protected final String mDefaultEncoding = "8BIT"; /** * If offset reach '\r\n' return 2. Else return PARSE_ERROR. */ protected int parseCrlf(int offset) { if (offset >= mBuffer.length()) return PARSE_ERROR; char ch = mBuffer.charAt(offset); if (ch == '\r') { offset++; ch = mBuffer.charAt(offset); if (ch == '\n') { return 2; } } return PARSE_ERROR; } /** * Parse the given stream * * @param is * The source to parse. * @param encoding * The encoding type. * @param builder * The v builder which used to construct data. * @return Return true for success, otherwise false. * @throws IOException */ public boolean parse(InputStream is, String encoding, VBuilder builder) throws IOException { setInputStream(is, encoding); mBuilder = builder; int ret = 0, offset = 0, sum = 0; if (mBuilder != null) { mBuilder.start(); } for (;;) { ret = parseVFile(offset); // for next property length if (PARSE_ERROR == ret) { break; } else { offset += ret; sum += ret; } } if (mBuilder != null) { mBuilder.end(); } return (mBuffer.length() == sum); } /** * Copy the content of input stream and filter the "folding" */ protected void setInputStream(InputStream is, String encoding) throws UnsupportedEncodingException { InputStreamReader reader = new InputStreamReader(is, encoding); StringBuilder b = new StringBuilder(); int ch; try { while ((ch = reader.read()) != -1) { if (ch == '\r') { ch = reader.read(); if (ch == '\n') { ch = reader.read(); if (ch == ' ' || ch == '\t') { b.append((char) ch); continue; } b.append("\r\n"); if (ch == -1) { break; } } else { b.append("\r"); } } b.append((char) ch); } mBuffer = b.toString(); } catch (Exception e) { return; } return; } /** * abstract function, waiting implement.<br> * analyse from offset, return the length of consumed property. */ abstract protected int parseVFile(int offset); /** * From offset, jump ' ', '\t', '\r\n' sequence, return the length of jump.<br> * 1 * (SPACE / HTAB / CRLF) */ protected int parseWsls(int offset) { int ret = 0, sum = 0; try { char ch = mBuffer.charAt(offset); if (ch == ' ' || ch == '\t') { sum++; offset++; } else if ((ret = parseCrlf(offset)) != PARSE_ERROR) { offset += ret; sum += ret; } else { return PARSE_ERROR; } for (;;) { ch = mBuffer.charAt(offset); if (ch == ' ' || ch == '\t') { sum++; offset++; } else if ((ret = parseCrlf(offset)) != PARSE_ERROR) { offset += ret; sum += ret; } else { break; } } } catch (IndexOutOfBoundsException e) { ; } if (sum > 0) return sum; return PARSE_ERROR; } /** * To determine if the given string equals to the start of the current * string. * * @param offset * The offset in buffer of current string * @param tar * The given string. * @param ignoreCase * To determine case sensitive or not. * @return The consumed characters, otherwise return PARSE_ERROR. */ protected int parseString(int offset, final String tar, boolean ignoreCase) { int sum = 0; if (tar == null) { return PARSE_ERROR; } if (ignoreCase) { int len = tar.length(); try { if (mBuffer.substring(offset, offset + len).equalsIgnoreCase( tar)) { sum = len; } else { return PARSE_ERROR; } } catch (IndexOutOfBoundsException e) { return PARSE_ERROR; } } else { /* case sensitive */ if (mBuffer.startsWith(tar, offset)) { sum = tar.length(); } else { return PARSE_ERROR; } } return sum; } /** * Skip the white space in string. */ protected int removeWs(int offset) { if (offset >= mBuffer.length()) return PARSE_ERROR; int sum = 0; char ch; while ((ch = mBuffer.charAt(offset)) == ' ' || ch == '\t') { offset++; sum++; } return sum; } /** * "X-" word, and its value. Return consumed length. */ protected int parseXWord(int offset) { int ret = 0, sum = 0; ret = parseString(offset, "X-", true); if (PARSE_ERROR == ret) return PARSE_ERROR; offset += ret; sum += ret; ret = parseWord(offset); if (PARSE_ERROR == ret) { return PARSE_ERROR; } sum += ret; return sum; } /** * From offset, parse as :mEncoding ?= 7bit / 8bit / quoted-printable / * base64 */ protected int parseValue(int offset) { int ret = 0; if (mEncoding == null || mEncoding.equalsIgnoreCase("7BIT") || mEncoding.equalsIgnoreCase("8BIT") || mEncoding.toUpperCase().startsWith("X-")) { ret = parse8bit(offset); if (ret != PARSE_ERROR) { return ret; } return PARSE_ERROR; } if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { ret = parseQuotedPrintable(offset); if (ret != PARSE_ERROR) { return ret; } return PARSE_ERROR; } if (mEncoding.equalsIgnoreCase("BASE64")) { ret = parseBase64(offset); if (ret != PARSE_ERROR) { return ret; } return PARSE_ERROR; } return PARSE_ERROR; } /** * Refer to RFC 1521, 8bit text */ protected int parse8bit(int offset) { int index = 0; index = mBuffer.substring(offset).indexOf("\r\n"); if (index == -1) return PARSE_ERROR; else return index; } /** * Refer to RFC 1521, quoted printable text ([*(ptext / SPACE / TAB) ptext] * ["="] CRLF) */ protected int parseQuotedPrintable(int offset) { int ret = 0, sum = 0; ret = removeWs(offset); offset += ret; sum += ret; for (;;) { ret = parsePtext(offset); if (PARSE_ERROR == ret) break; offset += ret; sum += ret; ret = removeWs(offset); offset += ret; sum += ret; } ret = parseString(offset, "=", false); if (ret != PARSE_ERROR) { // offset += ret; sum += ret; } return sum; } /** * return 1 or 3 <any ASCII character except "=", SPACE, or TAB> */ protected int parsePtext(int offset) { int ret = 0; try { char ch = mBuffer.charAt(offset); if (isPrintable(ch) && ch != '=' && ch != ' ' && ch != '\t') { return 1; } } catch (IndexOutOfBoundsException e) { return PARSE_ERROR; } ret = parseOctet(offset); if (ret != PARSE_ERROR) { return ret; } return PARSE_ERROR; } /** * start with "=" two of (DIGIT / "A" / "B" / "C" / "D" / "E" / "F") <br> * So maybe return 3. */ protected int parseOctet(int offset) { int ret = 0, sum = 0; ret = parseString(offset, "=", false); if (PARSE_ERROR == ret) return PARSE_ERROR; offset += ret; sum += ret; try { int ch = mBuffer.charAt(offset); if (ch == ' ' || ch == '\t') return ++sum; if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F')) { offset++; sum++; ch = mBuffer.charAt(offset); if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F')) { sum++; return sum; } } } catch (IndexOutOfBoundsException e) { ; } return PARSE_ERROR; } /** * Refer to RFC 1521, base64 text The end of the text is marked with two * CRLF sequences */ protected int parseBase64(int offset) { int sum = 0; try { for (;;) { char ch; ch = mBuffer.charAt(offset); if (ch == '\r') { int ret = parseString(offset, "\r\n\r\n", false); sum += ret; break; } else { /* ignore none base64 character */ sum++; offset++; } } } catch (IndexOutOfBoundsException e) { return PARSE_ERROR; } sum -= 2;/* leave one CRLF to parse the end of this property */ return sum; } /** * Any printable ASCII sequence except [ ]=:.,; */ protected int parseWord(int offset) { int sum = 0; try { for (;;) { char ch = mBuffer.charAt(offset); if (!isPrintable(ch)) break; if (ch == ' ' || ch == '=' || ch == ':' || ch == '.' || ch == ',' || ch == ';') break; if (ch == '\\') { ch = mBuffer.charAt(offset + 1); if (ch == ';') { offset++; sum++; } } offset++; sum++; } } catch (IndexOutOfBoundsException e) { ; } if (sum == 0) return PARSE_ERROR; return sum; } /** * If it is a letter or digit. */ protected boolean isLetterOrDigit(char ch) { if (ch >= '0' && ch <= '9') return true; if (ch >= 'a' && ch <= 'z') return true; if (ch >= 'A' && ch <= 'Z') return true; return false; } /** * If it is printable in ASCII */ protected boolean isPrintable(char ch) { if (ch >= ' ' && ch <= '~') return true; return false; } /** * If it is a letter. */ protected boolean isLetter(char ch) { if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { return true; } return false; } /** * Get a word from current position. */ protected String getWord(int offset) { StringBuilder word = new StringBuilder(); try { for (;;) { char ch = mBuffer.charAt(offset); if (isLetterOrDigit(ch) || ch == '-') { word.append(ch); offset++; } else { break; } } } catch (IndexOutOfBoundsException e) { ; } return word.toString(); } /** * If is: "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word */ protected int parsePValueVal(int offset) { int ret = 0, sum = 0; ret = parseString(offset, "INLINE", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "URL", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "CONTENT-ID", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "CID", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "INLINE", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseXWord(offset); if (ret != PARSE_ERROR) { sum += ret; return sum; } return PARSE_ERROR; } /** * If is: "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word and * set mEncoding. */ protected int parsePEncodingVal(int offset) { int ret = 0, sum = 0; ret = parseString(offset, "7BIT", true); if (ret != PARSE_ERROR) { mEncoding = "7BIT"; sum += ret; return sum; } ret = parseString(offset, "8BIT", true); if (ret != PARSE_ERROR) { mEncoding = "8BIT"; sum += ret; return sum; } ret = parseString(offset, "QUOTED-PRINTABLE", true); if (ret != PARSE_ERROR) { mEncoding = "QUOTED-PRINTABLE"; sum += ret; return sum; } ret = parseString(offset, "BASE64", true); if (ret != PARSE_ERROR) { mEncoding = "BASE64"; sum += ret; return sum; } ret = parseXWord(offset); if (ret != PARSE_ERROR) { mEncoding = mBuffer.substring(offset).substring(0, ret); sum += ret; return sum; } return PARSE_ERROR; } /** * Refer to RFC1521, section 7.1<br> * If is: "us-ascii" / "iso-8859-xxx" / "X-" word */ protected int parseCharsetVal(int offset) { int ret = 0, sum = 0; ret = parseString(offset, "us-ascii", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-1", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-2", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-3", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-4", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-5", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-6", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-7", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-8", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseString(offset, "iso-8859-9", true); if (ret != PARSE_ERROR) { sum += ret; return sum; } ret = parseXWord(offset); if (ret != PARSE_ERROR) { sum += ret; return sum; } return PARSE_ERROR; } /** * Refer to RFC 1766<br> * like: XXX(sequence letters)-XXX(sequence letters) */ protected int parseLangVal(int offset) { int ret = 0, sum = 0; ret = parseTag(offset); if (PARSE_ERROR == ret) { return PARSE_ERROR; } offset += ret; sum += ret; for (;;) { ret = parseString(offset, "-", false); if (PARSE_ERROR == ret) { break; } offset += ret; sum += ret; ret = parseTag(offset); if (PARSE_ERROR == ret) { break; } offset += ret; sum += ret; } return sum; } /** * From first 8 position, is sequence LETTER. */ protected int parseTag(int offset) { int sum = 0, i = 0; try { for (i = 0; i < 8; i++) { char ch = mBuffer.charAt(offset); if (!isLetter(ch)) { break; } sum++; offset++; } } catch (IndexOutOfBoundsException e) { ; } if (i == 0) { return PARSE_ERROR; } return sum; } }