/* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package xmlkit; // -*- mode: java; indent-tabs-mode: nil -*- import java.util.*; /** * A List of Strings each representing a word or token. * This object itself is a CharSequence whose characters consist * of all the tokens, separated by blanks. * * @author jrose */ public class TokenList extends ArrayList<String> implements CharSequence { protected String separator; protected boolean frozen; public TokenList() { this.separator = " "; } public TokenList(Collection<? extends Object> tokens) { super(tokens.size()); this.separator = " "; addTokens(tokens); } public TokenList(Collection<? extends Object> tokens, String separator) { super(tokens.size()); this.separator = separator; addTokens(tokens); } public TokenList(Object[] tokens) { super(tokens.length); this.separator = " "; addTokens(tokens, 0, tokens.length); } public TokenList(Object[] tokens, int beg, int end) { super(end - beg); // capacity this.separator = " "; addTokens(tokens, beg, end); } public TokenList(Object[] tokens, int beg, int end, String separator) { super(end - beg); // capacity this.separator = separator; addTokens(tokens, beg, end); } public TokenList(String tokenStr) { this(tokenStr, " ", false); } public TokenList(String tokenStr, String separator) { this(tokenStr, separator, true); } public TokenList(String tokenStr, String separator, boolean allowNulls) { super(tokenStr.length() / 5); this.separator = separator; addTokens(tokenStr, allowNulls); } static public final TokenList EMPTY; static { TokenList tl = new TokenList(new Object[0]); tl.freeze(); EMPTY = tl; } public void freeze() { if (!frozen) { for (ListIterator<String> i = listIterator(); i.hasNext();) { i.set(i.next().toString()); } trimToSize(); frozen = true; } } public boolean isFrozen() { return frozen; } void checkNotFrozen() { if (isFrozen()) { throw new UnsupportedOperationException("cannot modify frozen TokenList"); } } public String getSeparator() { return separator; } public void setSeparator(String separator) { checkNotFrozen(); this.separator = separator; } /// All normal List mutators must check the frozen bit: public String set(int index, String o) { checkNotFrozen(); return super.set(index, o); } public boolean add(String o) { checkNotFrozen(); return super.add(o); } public void add(int index, String o) { checkNotFrozen(); super.add(index, o); } public boolean addAll(Collection<? extends String> c) { checkNotFrozen(); return super.addAll(c); } public boolean addAll(int index, Collection<? extends String> c) { checkNotFrozen(); return super.addAll(index, c); } public boolean remove(Object o) { checkNotFrozen(); return super.remove(o); } public String remove(int index) { checkNotFrozen(); return super.remove(index); } public void clear() { checkNotFrozen(); super.clear(); } /** Add a collection of tokens to the list, applying toString to each. */ public boolean addTokens(Collection<? extends Object> tokens) { // Note that if this sequence is empty, no tokens are added. // This is different from adding a null string, which is // a single token. boolean added = false; for (Object token : tokens) { add(token.toString()); added = true; } return added; } public boolean addTokens(Object[] tokens, int beg, int end) { boolean added = false; for (int i = beg; i < end; i++) { add(tokens[i].toString()); added = true; } return added; } public boolean addTokens(String tokenStr) { return addTokens(tokenStr, false); } public boolean addTokens(String tokenStr, boolean allowNulls) { boolean added = false; int pos = 0, limit = tokenStr.length(), sep = limit; while (pos < limit) { sep = tokenStr.indexOf(separator, pos); if (sep < 0) { sep = limit; } if (sep == pos) { if (allowNulls) { add(""); added = true; } pos += separator.length(); } else { add(tokenStr.substring(pos, sep)); added = true; pos = sep + separator.length(); } } if (allowNulls && sep < limit) { // Input was something like "tok1 tok2 ". add(""); added = true; } return added; } public boolean addToken(Object token) { return add(token.toString()); } /** Format the token string, using quotes and escapes. * Quotes must contain an odd number of 3 or more elements, * a sequence of begin/end quote pairs, plus a superquote. * For each token, the first begin/end pair is used for * which the end quote does not occur in the token. * If the token contains all end quotes, the last pair * is used, with all occurrences of the end quote replaced * by the superquote. If an end quote is the empty string, * the separator is used instead. */ public String format(String separator, String[] quotes) { return ""; //@@ } protected int[] lengths; protected static final int MODC = 0, HINT = 1, BEG0 = 2, END0 = 3; // Layout of lengths: // { modCount, hint, -1==beg[0], end[0]==beg[1], ..., length } // Note that each beg[i]..end[i] span includes a leading separator, // which is not part of the corresponding token. protected final CharSequence getCS(int i) { return (CharSequence) get(i); } // Produce (and cache) an table of indexes for each token. protected int[] getLengths() { int[] lengths = this.lengths; ; int sepLength = separator.length(); if (lengths == null || lengths[MODC] != modCount) { int size = this.size(); lengths = new int[END0 + size + (size == 0 ? 1 : 0)]; lengths[MODC] = modCount; int end = -sepLength; // cancels leading separator lengths[BEG0] = end; for (int i = 0; i < size; i++) { end += sepLength; // count leading separator end += getCS(i).length(); lengths[END0 + i] = end; } this.lengths = lengths; } return lengths; } public int length() { int[] lengths = getLengths(); return lengths[lengths.length - 1]; } // Which token does the given index belong to? protected int which(int i) { if (i < 0) { return -1; } int[] lengths = getLengths(); for (int hint = lengths[HINT];; hint = 0) { for (int wh = hint; wh < lengths.length - END0; wh++) { int beg = lengths[BEG0 + wh]; int end = lengths[END0 + wh]; if (i >= beg && i < end) { lengths[HINT] = wh; return wh; } } if (hint == 0) { return size(); // end of the line } } } public char charAt(int i) { if (i < 0) { return "".charAt(i); } int wh = which(i); int beg = lengths[BEG0 + wh]; int j = i - beg; int sepLength = separator.length(); if (j < sepLength) { return separator.charAt(j); } return getCS(wh).charAt(j - sepLength); } public CharSequence subSequence(int beg, int end) { //System.out.println("i: "+beg+".."+end); if (beg == end) { return ""; } if (beg < 0) { charAt(beg); // raise exception } if (beg > end) { charAt(-1); // raise exception } int begWh = which(beg); int endWh = which(end); if (endWh == size() || end == lengths[BEG0 + endWh]) { --endWh; } //System.out.println("wh: "+begWh+".."+endWh); int begBase = lengths[BEG0 + begWh]; int endBase = lengths[BEG0 + endWh]; int sepLength = separator.length(); int begFrag = 0; if ((beg - begBase) < sepLength) { begFrag = sepLength - (beg - begBase); beg += begFrag; } int endFrag = 0; if ((end - endBase) < sepLength) { endFrag = (end - endBase); end = endBase; endBase = lengths[BEG0 + --endWh]; } if (false) { System.out.print("beg[wbf]end[wbf]"); int pr[] = {begWh, begBase, begFrag, beg, endWh, endBase, endFrag, end}; for (int k = 0; k < pr.length; k++) { System.out.print((k == 4 ? " " : " ") + (pr[k])); } System.out.println(); } if (begFrag > 0 && (end + endFrag) - begBase <= sepLength) { // Special case: Slice the separator. beg -= begFrag; end += endFrag; return separator.substring(beg - begBase, end - begBase); } if (begWh == endWh && (begFrag + endFrag) == 0) { // Special case: Slice a single token. return getCS(begWh).subSequence(beg - begBase - sepLength, end - endBase - sepLength); } Object[] subTokens = new Object[1 + (endWh - begWh) + 1]; int fillp = 0; if (begFrag == sepLength) { // Insert a leading null token to force an initial separator. subTokens[fillp++] = ""; begFrag = 0; } for (int wh = begWh; wh <= endWh; wh++) { CharSequence cs = getCS(wh); if (wh == begWh || wh == endWh) { // Slice it. int csBeg = (wh == begWh) ? (beg - begBase) - sepLength : 0; int csEnd = (wh == endWh) ? (end - endBase) - sepLength : cs.length(); cs = cs.subSequence(csBeg, csEnd); if (begFrag > 0 && wh == begWh) { cs = separator.substring(sepLength - begFrag) + cs; } if (endFrag > 0 && wh == endWh) { cs = cs.toString() + separator.substring(0, endFrag); } } subTokens[fillp++] = cs; } return new TokenList(subTokens, 0, fillp, separator); } /** Returns the concatenation of all tokens, * with intervening separator characters. */ public String toString() { StringBuilder buf = new StringBuilder(length()); int size = this.size(); for (int i = 0; i < size; i++) { if (i > 0) { buf.append(separator); } buf.append(get(i)); } return buf.toString(); } /*---- TESTING CODE ---- public static void main(String[] av) { if (av.length == 0) av = new String[]{"one", "2", "", "four"}; TokenList ts = new TokenList(); final String SEP = ", "; ts.setSeparator(SEP); for (int i = -1; i < av.length; i++) { if (i >= 0) ts.addToken(av[i]); { TokenList tsCopy = new TokenList(ts.toString(), SEP); if (!tsCopy.equals(ts)) { tsCopy.setSeparator(")("); System.out.println("!= ("+tsCopy+")"); } } { TokenList tsBar = new TokenList(ts, "|"); tsBar.add(0, "["); tsBar.add("]"); System.out.println(tsBar); } if (false) { int[] ls = ts.getLengths(); System.out.println("ts: "+ts); System.out.print("ls: {"); for (int j = 0; j < ls.length; j++) System.out.print(" "+ls[j]); System.out.println(" }"); } assert0(ts.size() == i+1); assert0(i < 0 || ts.get(i) == av[i]); String tss = ts.toString(); int tslen = tss.length(); assert0(ts.length() == tss.length()); for (int n = 0; n < tslen; n++) { assert0(ts.charAt(n) == tss.charAt(n)); } for (int j = 0; j < tslen; j++) { for (int k = tslen; k >= j; k--) { CharSequence sub = ts.subSequence(j, k); //System.out.println("|"+sub+"|"); assert0(sub.toString().equals(tss.substring(j, k))); } } } } static void assert0(boolean z) { if (!z) throw new RuntimeException("assert failed"); } // ---- TESTING CODE ----*/ }