/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-06 Wolfgang M. Meier
* meier@ifs.tu-darmstadt.de
* http://exist.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.dom;
import org.exist.EXistException;
import org.exist.storage.BrokerPool;
import org.exist.storage.ElementValue;
import org.exist.storage.io.VariableByteInput;
import org.exist.storage.io.VariableByteInputStream;
import org.exist.storage.io.VariableByteOutputStream;
import org.exist.util.Configuration;
import org.exist.util.hashtable.Int2ObjectHashMap;
import org.exist.util.hashtable.Object2IntHashMap;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.io.*;
import java.util.Iterator;
/**
* Maintains a global symbol table shared by a database instance. The symbol
* table maps namespace URIs and node names to unique, numeric ids. Internally,
* the db does not store node QNames in clear text. Instead, it uses the numeric ids
* maintained here.
*
* The global SymbolTable singleton can be retrieved from {@link org.exist.storage.BrokerPool#getSymbols()}.
* It is saved into the database file "symbols.dbx".
*
* @author wolf
*
*/
public class SymbolTable {
public static final String FILE_NAME = "symbols.dbx";
public final static short FILE_FORMAT_VERSION_ID = 7;
public static int LENGTH_LOCAL_NAME = 2; //sizeof short
public static int LENGTH_NS_URI = 2; //sizeof short
/** Maps local node names to an integer id */
protected Object2IntHashMap nameSymbols = new Object2IntHashMap(200);
/** Maps int ids to local node names */
protected Int2ObjectHashMap names = new Int2ObjectHashMap(200);
/** Maps namespace URIs to an integer id */
protected Object2IntHashMap nsSymbols = new Object2IntHashMap(200);
/** Maps int ids to namespace URIs */
protected Int2ObjectHashMap namespaces = new Int2ObjectHashMap(200);
/**
* Contains default prefix-to-namespace mappings. For convenience, eXist tracks
* the first prefix-to-namespace mapping it finds in a document. If an undefined prefix
* is found in a query, the query engine will first look up the prefix in this table before
* throwing an error.
*/
protected Object2IntHashMap defaultMappings = new Object2IntHashMap(200);
/**
* Temporary name pool to share QName instances during indexing.
*/
protected QNamePool namePool = new QNamePool();
protected Object2IntHashMap mimeTypeByName = new Object2IntHashMap(32);
protected Int2ObjectHashMap mimeTypeById = new Int2ObjectHashMap(32);
/** contains the next local name id to be used */
protected short max = 0;
/** contains the next namespace URI id to be used */
protected short nsMax = 0;
/** set to true if the symbol table needs to be saved */
protected boolean changed = false;
/** the underlying symbols.dbx file */
protected File file;
public SymbolTable(BrokerPool pool, Configuration config)
throws EXistException {
String dataDir = (String) config.getProperty(BrokerPool.PROPERTY_DATA_DIR);
file = new File(dataDir + File.separatorChar + getFileName());
if (!file.canRead()) {
saveSymbols();
} else
loadSymbols();
}
public static String getFileName() {
return FILE_NAME;
}
/**
* Retrieve a shared QName instance from the temporary pool.
*
* TODO: make the namePool thread-local to avoid synchronization.
*
* @param namespaceURI
* @param localName
* @param prefix
*/
public synchronized QName getQName(short type, String namespaceURI, String localName, String prefix) {
byte itype = type == Node.ATTRIBUTE_NODE ? ElementValue.ATTRIBUTE : ElementValue.ELEMENT;
QName qn = namePool.get(itype, namespaceURI, localName, prefix);
if (qn == null) {
qn = namePool.add(itype, namespaceURI, localName, prefix);
}
return qn;
}
/**
* Return a unique id for the local node name of the specified element.
*
* @param element
*/
public synchronized short getSymbol(Element element) {
short id = (short) nameSymbols.get(element.getLocalName());
if (id != -1)
return id;
id = ++max;
nameSymbols.put(element.getLocalName(), id);
names.put(id, element.getLocalName());
changed = true;
// remember the prefix=namespace mapping for querying
String prefix = element.getPrefix();
if (prefix != null
&& prefix.length() > 0
&& (!defaultMappings.containsKey(prefix))) {
final short nsId = getNSSymbol(element.getNamespaceURI());
defaultMappings.put(prefix, nsId);
}
return id;
}
/**
* Return a unique id for the local node name of the specified attribute.
*
* @param attr
*/
public synchronized short getSymbol(Attr attr) {
final String key = '@' + attr.getLocalName();
short id = (short) nameSymbols.get(key);
if (id != -1)
return id;
id = ++max;
nameSymbols.put(key, id);
names.put(id, attr.getLocalName());
changed = true;
// remember the prefix=namespace mapping for querying
String prefix = attr.getPrefix();
if (prefix != null
&& prefix.length() > 0
&& (!defaultMappings.containsKey(prefix))) {
final short nsId = getNSSymbol(attr.getNamespaceURI());
defaultMappings.put(prefix, nsId);
}
return id;
}
/**
* Returns a unique id for the specified local name. If the name is
* the local name of an attribute, it should start with a '@' character.
*
* @param name
*/
public synchronized short getSymbol(String name) {
if (name.length() == 0) // DW: can cause NPE
throw new IllegalArgumentException("name is empty");
short id = (short) nameSymbols.get(name);
if (id != -1)
return id;
id = ++max;
nameSymbols.put(name, id);
names.put(id, name);
changed = true;
return id;
}
/**
* Returns a unique id for the specified namespace URI.
*
* @param ns
*/
public synchronized short getNSSymbol(String ns) {
if (ns == null || ns.length() == 0) {
return 0;
}
short id = (short) nsSymbols.get(ns);
if (id != -1)
return id;
id = ++nsMax;
nsSymbols.put(ns, id);
namespaces.put(id, ns);
changed = true;
return id;
}
/**
* Returns the namespace URI registered for the id or null
* if the namespace URI is not known. Returns the empty string
* if the namespace is empty.
*
* @param id
*/
public synchronized String getNamespace(short id) {
return id == 0 ? "" : (String) namespaces.get(id);
}
/**
* Returns true if the symbol table needs to be saved
* to persistent storage.
*
*/
public synchronized boolean hasChanged() {
return changed;
}
/**
* Returns the local name registered for the id or
* null if the name is not known.
*
* @param id
*/
public synchronized String getName(short id) {
return (String) names.get(id);
}
/**
* Returns a namespace URI for the given prefix if there's
* a default mapping.
*
* @param prefix
*/
public synchronized String getDefaultNamespace(String prefix) {
if (defaultMappings.containsKey(prefix))
return getNamespace((short)defaultMappings.get(prefix));
return null;
}
/**
* Returns a list of default prefixes registered.
*
*/
public synchronized String[] defaultPrefixList() {
String[] prefixes = new String[defaultMappings.size()];
int i = 0;
for (Iterator j = defaultMappings.iterator(); j.hasNext(); i++)
prefixes[i] = (String) j.next();
return prefixes;
}
public synchronized int getMimeTypeId(String mimeType) {
int id = mimeTypeByName.get(mimeType);
if (id == -1) {
int maxId = 0;
for (Iterator i = mimeTypeById.iterator(); i.hasNext(); ) {
Integer val = (Integer) i.next();
maxId = Math.max(maxId, val.intValue());
}
id = ++maxId;
mimeTypeByName.put(mimeType, id);
mimeTypeById.put(id, mimeType);
changed = true;
}
return id;
}
public synchronized String getMimeType(int id) {
return (String) mimeTypeById.get(id);
}
/**
* Write the symbol table to persistent storage.
*
* @param ostream
* @throws IOException
*/
public synchronized void write(final VariableByteOutputStream ostream)
throws IOException {
ostream.writeFixedInt(FILE_FORMAT_VERSION_ID);
ostream.writeShort(max);
ostream.writeShort(nsMax);
ostream.writeInt(nameSymbols.size());
for (Iterator i = nameSymbols.iterator(); i.hasNext();) {
final String entry = (String) i.next();
ostream.writeUTF(entry);
short id = (short) nameSymbols.get(entry);
if(id < 0)
Thread.dumpStack();
ostream.writeShort(id);
}
ostream.writeInt(nsSymbols.size());
for (Iterator i = nsSymbols.iterator(); i.hasNext();) {
final String entry = (String) i.next();
ostream.writeUTF(entry);
short id = (short) nsSymbols.get(entry);
if(id < 0)
Thread.dumpStack();
ostream.writeShort(id);
}
ostream.writeInt(defaultMappings.size());
String prefix;
short nsId;
for (Iterator i = defaultMappings.iterator(); i.hasNext();) {
prefix = (String) i.next();
nsId = (short)defaultMappings.get(prefix);
ostream.writeUTF(prefix);
ostream.writeShort(nsId);
}
ostream.writeInt(mimeTypeByName.size());
String mime;
int mimeId;
for (Iterator i = mimeTypeByName.iterator(); i.hasNext(); ) {
mime = (String) i.next();
mimeId = mimeTypeByName.get(mime);
ostream.writeUTF(mime);
ostream.writeInt(mimeId);
}
changed = false;
}
/**
* Read the symbol table.
*
* @param istream
* @throws IOException
*/
public synchronized void read(VariableByteInput istream) throws IOException {
int magic = istream.readFixedInt();
if (magic != FILE_FORMAT_VERSION_ID)
throw new IOException("Database file symbols.dbx has a storage format incompatible with this " +
"version of eXist. Please do a backup/restore of your data first.");
max = istream.readShort();
nsMax = istream.readShort();
int count = istream.readInt();
String name;
short id;
for (int i = 0; i < count; i++) {
name = istream.readUTF();
id = istream.readShort();
nameSymbols.put(name, id);
if (name.charAt(0) == '@')
names.put(id, name.substring(1));
else
names.put(id, name);
}
count = istream.readInt();
for (int i = 0; i < count; i++) {
name = istream.readUTF();
id = istream.readShort();
nsSymbols.put(name, id);
namespaces.put(id, name);
}
count = istream.readInt();
String prefix;
short nsId;
for (int i = 0; i < count; i++) {
prefix = istream.readUTF();
nsId = istream.readShort();
defaultMappings.put(prefix, nsId);
}
count = istream.readInt();
String mime;
int mimeId;
for (int i = 0; i < count; i++) {
mime = istream.readUTF();
mimeId = istream.readInt();
mimeTypeByName.put(mime, mimeId);
mimeTypeById.put(mimeId, mime);
}
changed = false;
}
public File getFile() {
return file;
}
/**
* Save the global symbol table. The global symbol table stores QNames and
* namespace/prefix mappings.
*
* @throws EXistException
*/
public void saveSymbols() throws EXistException {
synchronized (this) {
try {
VariableByteOutputStream os = new VariableByteOutputStream(256);
this.write(os);
FileOutputStream fos = new FileOutputStream(this.getFile()
.getAbsolutePath(), false);
fos.write(os.toByteArray());
fos.close();
} catch (FileNotFoundException e) {
throw new EXistException("file not found: "
+ this.getFile().getAbsolutePath());
} catch (IOException e) {
throw new EXistException("io error occurred while creating "
+ this.getFile().getAbsolutePath());
}
}
}
/**
* Read the global symbol table. The global symbol table stores QNames and
* namespace/prefix mappings.
*
* @throws EXistException
*/
public void loadSymbols() throws EXistException {
try {
FileInputStream fis = new FileInputStream(this.getFile());
VariableByteInput is = new VariableByteInputStream(fis);
this.read(is);
fis.close();
} catch (FileNotFoundException e) {
throw new EXistException("could not read "
+ this.getFile().getAbsolutePath());
} catch (IOException e) {
throw new EXistException("io error occurred while reading "
+ this.getFile().getAbsolutePath() + ": " + e.getMessage());
}
}
public void backupSymbolsTo(OutputStream os) throws IOException {
FileInputStream fis = new FileInputStream(this.getFile());
byte[] buf = new byte[1024];
int len;
while ((len = fis.read(buf)) > 0) {
os.write(buf, 0, len);
}
fis.close();
}
public void flush() throws EXistException {
if (hasChanged())
saveSymbols();
}
}