/*
* eXist Open Source Native XML Database
* Copyright (C) 2010-2014 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.dom.persistent;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.exist.EXistException;
import org.exist.backup.RawDataBackup;
import org.exist.dom.QName;
import org.exist.storage.BrokerPool;
import org.exist.storage.BrokerPoolService;
import org.exist.storage.BrokerPoolServiceException;
import org.exist.storage.ElementValue;
import org.exist.storage.io.VariableByteInput;
import org.exist.storage.io.VariableByteInputStream;
import org.exist.storage.io.VariableByteOutputStream;
import org.exist.util.Configuration;
import org.exist.util.FileUtils;
import org.exist.util.hashtable.Object2IntHashMap;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Iterator;
/**
* Maintains a global symbol table shared by a database instance. The symbol
* table maps namespace URIs and node names to unique, numeric ids. Internally,
* the db does not store node QNames in clear text. Instead, it uses the numeric ids
* maintained here.
* <p/>
* The global SymbolTable singleton can be retrieved from {@link org.exist.storage.BrokerPool#getSymbols()}.
* It is saved into the database file "symbols.dbx".
*
* @author wolf
* @author Adam Retter <adam@exist-db.org>
*/
public class SymbolTable implements BrokerPoolService {
private static final Logger LOG = LogManager.getLogger(SymbolTable.class);
private static final String FILE_NAME = "symbols.dbx";
public static final short FILE_FORMAT_VERSION_ID = 8;
public static final short LEGACY_FILE_FORMAT_VERSION_ID = 7;
public enum SymbolType {
NAME((byte) 0),
NAMESPACE((byte) 1),
MIMETYPE((byte) 2);
private final byte typeId;
private SymbolType(final byte typeId) {
this.typeId = typeId;
}
public final byte getTypeId() {
return typeId;
}
public static SymbolType valueOf(final byte typeId) {
for(final SymbolType symbolType : SymbolType.values()) {
if(symbolType.getTypeId() == typeId) {
return symbolType;
}
}
throw new IllegalArgumentException("No such enumerated value for typeId:" + typeId);
}
}
public static final int LENGTH_LOCAL_NAME = 2; //sizeof short
public static final int LENGTH_NS_URI = 2; //sizeof short
public static final char ATTR_NAME_PREFIX = '@';
protected final SymbolCollection localNameSymbols = new LocalNameSymbolCollection(SymbolType.NAME, 200);
protected final SymbolCollection namespaceSymbols = new SymbolCollection(SymbolType.NAMESPACE, 200);
protected final SymbolCollection mimeTypeSymbols = new SymbolCollection(SymbolType.MIMETYPE, 32);
/**
* Temporary name pool to share QName instances during indexing.
*/
private final QNamePool namePool = new QNamePool();
/**
* set to true if the symbol table needs to be saved
*/
private boolean changed = false;
/**
* the underlying symbols.dbx file
*/
private Path file;
private final VariableByteOutputStream outBuffer = new VariableByteOutputStream(512);
private OutputStream os = null;
@Override
public void configure(final Configuration configuration) {
final Path dataDir = (Path) configuration.getProperty(BrokerPool.PROPERTY_DATA_DIR);
this.file = dataDir.resolve(getFileName());
}
@Override
public void prepare(final BrokerPool pool) throws BrokerPoolServiceException {
try {
if (!Files.isReadable(file)) {
saveSymbols();
} else {
loadSymbols();
}
} catch(final EXistException e) {
throw new BrokerPoolServiceException(e);
}
}
public static final String getFileName() {
return FILE_NAME;
}
/**
* Retrieve a shared QName instance from the temporary pool.
* <p/>
* TODO: make the namePool thread-local to avoid synchronization.
*
* @param namespaceURI
* @param localName
* @param prefix
*/
public synchronized QName getQName(final short type, final String namespaceURI, final String localName, final String prefix) {
final byte itype = type == Node.ATTRIBUTE_NODE ? ElementValue.ATTRIBUTE : ElementValue.ELEMENT;
QName qn = namePool.get(itype, namespaceURI, localName, prefix);
if(qn == null) {
qn = namePool.add(itype, namespaceURI, localName, prefix);
}
return qn;
}
/**
* Return a unique id for the local node name of the specified element.
*
* @param element
*/
//TODO the (short) cast is nasty - should consider using either short or int end to end
public synchronized short getSymbol(final Element element) {
return (short) localNameSymbols.getId(element.getLocalName());
}
/**
* Return a unique id for the local node name of the specified attribute.
*
* @param attr
*/
//TODO the (short) cast is nasty - should consider using either short or int end to end
public synchronized short getSymbol(final Attr attr) {
final String key = ATTR_NAME_PREFIX + attr.getLocalName();
return (short) localNameSymbols.getId(key);
}
/**
* Returns a unique id for the specified local name. If the name is
* the local name of an attribute, it should start with a '@' character.
*
* @param name
*/
//TODO the (short) cast is nasty - should consider using either short or int end to end
public synchronized short getSymbol(final String name) {
if(name.length() == 0) {
throw new IllegalArgumentException("name is empty");
}
return (short) localNameSymbols.getId(name);
}
/**
* Returns a unique id for the specified namespace URI.
*
* @param ns
*/
//TODO the (short) cast is nasty - should consider using either short or int end to end
public synchronized short getNSSymbol(final String ns) {
if(ns == null || ns.length() == 0) {
return 0;
}
return (short) namespaceSymbols.getId(ns);
}
public synchronized int getMimeTypeId(final String mimeType) {
return mimeTypeSymbols.getId(mimeType);
}
/**
* Returns true if the symbol table needs to be saved
* to persistent storage.
*/
public synchronized boolean hasChanged() {
return changed;
}
/**
* Returns the local name registered for the id or
* null if the name is not known.
*
* @param id
*/
public synchronized String getName(final short id) {
return localNameSymbols.getSymbol(id);
}
public synchronized String getMimeType(final int id) {
return mimeTypeSymbols.getSymbol(id);
}
/**
* Returns the namespace URI registered for the id or null
* if the namespace URI is not known. Returns the empty string
* if the namespace is empty.
*
* @param id
*/
public synchronized String getNamespace(final short id) {
return namespaceSymbols.getSymbol(id);
}
/**
* Write the symbol table to persistent storage. Only called when upgrading
* a .dbx file from previous versions.
*
* @param os outputstream
* @throws IOException
*/
private synchronized void writeAll(final VariableByteOutputStream os) throws IOException {
os.writeFixedInt(FILE_FORMAT_VERSION_ID);
localNameSymbols.write(os);
namespaceSymbols.write(os);
mimeTypeSymbols.write(os);
changed = false;
}
/**
* Read the symbol table from disk.
*
* @param is
* @throws IOException
*/
protected final void read(final VariableByteInput is) throws IOException {
localNameSymbols.clear();
namespaceSymbols.clear();
mimeTypeSymbols.clear();
while(is.available() > 0) {
readEntry(is);
}
}
private void readEntry(final VariableByteInput is) throws IOException {
final byte type = is.readByte();
final int id = is.readInt();
final String key = is.readUTF();
//symbol types can be written in any order by SymbolCollection.getById()->SymbolCollection.write()
switch(SymbolType.valueOf(type)) {
case NAME:
localNameSymbols.add(id, key);
break;
case NAMESPACE:
namespaceSymbols.add(id, key);
break;
case MIMETYPE:
mimeTypeSymbols.add(id, key);
break;
//Removed default clause
}
}
/**
* Legacy method: read a symbol table written by a previous eXist version.
*
* @param istream
* @throws IOException
*/
protected final void readLegacy(final VariableByteInput istream) throws IOException {
istream.readShort(); //read max, not needed anymore
istream.readShort(); //read nsMax not needed anymore
String key;
short id;
//read local names
int count = istream.readInt();
for(int i = 0; i < count; i++) {
key = istream.readUTF();
id = istream.readShort();
localNameSymbols.add(id, key);
}
//read namespaces
count = istream.readInt();
for(int i = 0; i < count; i++) {
key = istream.readUTF();
id = istream.readShort();
namespaceSymbols.add(id, key);
}
// default mappings have been removed
// read them for backwards compatibility
count = istream.readInt();
for(int i = 0; i < count; i++) {
istream.readUTF();
istream.readShort();
}
//read namespaces
count = istream.readInt();
int mimeId;
for(int i = 0; i < count; i++) {
key = istream.readUTF();
mimeId = istream.readInt();
mimeTypeSymbols.add(mimeId, key);
}
changed = false;
}
public final Path getFile() {
return file;
}
/**
* Save the entire symbol table. Will only be called when initializing an
* empty database or when upgrading an older dbx file.
*
* @throws EXistException
*/
private void saveSymbols() throws EXistException {
try(final VariableByteOutputStream os = new VariableByteOutputStream(256);
final OutputStream fos = Files.newOutputStream(getFile())) {
writeAll(os);
fos.write(os.toByteArray());
} catch(final FileNotFoundException e) {
throw new EXistException("File not found: " + this.getFile().toAbsolutePath().toString(), e);
} catch(final IOException e) {
throw new EXistException("IO error occurred while creating "
+ this.getFile().toAbsolutePath().toString(), e);
}
}
/**
* Read the global symbol table. The global symbol table stores QNames and
* namespace/prefix mappings.
*
* @throws EXistException
*/
private synchronized void loadSymbols() throws EXistException {
try(final InputStream fis = Files.newInputStream(getFile())) {
final VariableByteInput is = new VariableByteInputStream(fis);
final int magic = is.readFixedInt();
if(magic == LEGACY_FILE_FORMAT_VERSION_ID) {
LOG.info("Converting legacy symbols.dbx to new format...");
readLegacy(is);
saveSymbols();
} else if(magic != FILE_FORMAT_VERSION_ID) {
throw new EXistException("Symbol table was created by an older" +
"or newer version of eXist" + " (file id: " + magic + "). " +
"To avoid damage, the database will stop.");
} else {
read(is);
}
} catch(final FileNotFoundException e) {
throw new EXistException("Could not read " + this.getFile().toAbsolutePath().toString(), e);
} catch(final IOException e) {
throw new EXistException("IO error occurred while reading "
+ this.getFile().toAbsolutePath().toString() + ": " + e.getMessage(), e);
}
}
public void backupSymbolsTo(final OutputStream os) throws IOException {
Files.copy(getFile(), os);
}
public void backupToArchive(final RawDataBackup backup) throws IOException {
// do not use try-with-resources here, closing the OutputStream will close the entire backup
//try(final OutputStream os = backup.newEntry(FileUtils.fileName(getFile()))) {
try {
final OutputStream os = backup.newEntry(FileUtils.fileName(getFile()));
backupSymbolsTo(os);
} finally {
backup.closeEntry();
}
}
public void flush() throws EXistException {
//Noting to do ? -pb
}
private OutputStream getOutputStream() throws IOException {
if(os == null) {
os = Files.newOutputStream(getFile(), StandardOpenOption.APPEND);
}
return os;
}
public void close() throws IOException {
if(os != null) {
os.close();
}
}
/**
* Represents a distinct collection of symbols
*
* @author wolf
* @author Adam Retter <adam@exist-db.org>
*/
protected class SymbolCollection {
private final SymbolType symbolType;
/**
* Maps mimetype names to an integer id (persisted to disk)
*/
private final Object2IntHashMap<String> symbolsByName;
/**
* Maps int ids to mimetype names (transient map for fast reverse lookup of symbolsByName)
*/
private String[] symbolsById;
/**
* contains the offset of the last symbol
*/
protected short offset = 0;
public SymbolCollection(final SymbolType symbolType, final int initialSize) {
this.symbolType = symbolType;
symbolsByName = new Object2IntHashMap<>(initialSize);
symbolsById = new String[initialSize];
}
private SymbolType getSymbolType() {
return symbolType;
}
private int add(final int id, final String name) {
symbolsById = ensureCapacity(symbolsById, id);
addSymbolById(id, name);
addSymbolByName(name, id);
if(id > offset) {
offset = (short) id;
}
return id;
}
protected void addSymbolById(final int id, final String name) {
symbolsById[id] = name;
}
protected void addSymbolByName(final String name, final int id) {
symbolsByName.put(name, id);
}
protected String[] ensureCapacity(final String[] array, final int max) {
if(array.length <= max) {
final String[] newArray = new String[(max * 3) / 2];
System.arraycopy(array, 0, newArray, 0, array.length);
return newArray;
}
return array;
}
private void clear() {
offset = 0;
}
public synchronized String getSymbol(final int id) {
if(id <= 0 || id > offset) {
return ""; //TODO : raise an exception ? -pb
}
return symbolsById[id];
}
public synchronized int getId(final String name) {
int id = symbolsByName.get(name);
if(id != -1) {
return id;
}
// symbol space exceeded. return -1 to indicate.
if(offset == Short.MAX_VALUE) {
return -1;
}
id = add(++offset, name);
//we use "++offset" here instead of "offset++",
//because the system expects id's to start at 1, not 0
write(id, name);
changed = true;
return id;
}
protected final void write(final VariableByteOutputStream os) throws IOException {
for(final Iterator<String> i = symbolsByName.iterator(); i.hasNext(); ) {
final String symbol = i.next();
final int id = symbolsByName.get(symbol);
if(id < 0) {
LOG.error("Symbol Table: symbolTypeId=" + getSymbolType() +
", symbol='" + symbol + "', id=" + id);
//TODO : raise exception ? -pb
}
writeEntry(id, symbol, os);
}
}
/**
* Append a new entry to the .dbx file
*
* @param id
* @param key
*/
private void write(final int id, final String key) {
outBuffer.clear();
try {
writeEntry(id, key, outBuffer);
getOutputStream().write(outBuffer.toByteArray());
getOutputStream().flush();
} catch(final FileNotFoundException e) {
LOG.error("Symbol table: file not found!", e);
//TODO :throw exception -pb
} catch(final IOException e) {
LOG.error("Symbol table: caught exception while writing!", e);
//TODO : throw exception -pb
}
}
private void writeEntry(final int id, final String key, final VariableByteOutputStream os) throws IOException {
os.writeByte(getSymbolType().getTypeId());
os.writeInt(id);
os.writeUTF(key);
}
}
/**
* Local name storage is used by both element names and attribute names
* <p/>
* Attributes behave slightly differently to element names
* For the persistent map symbolsByName, the attribute name is prefixed with
* an '@' symbol to differentiate the attribute name from a similar element name
* However, for the in-memory reverse map symbolsById, the attribute name
* should not be prefixed.
*
* @author Adam Retter <adam@exist-db.org>
*/
private class LocalNameSymbolCollection extends SymbolCollection {
public LocalNameSymbolCollection(final SymbolType symbolType, final int initialSize) {
super(symbolType, initialSize);
}
@Override
protected void addSymbolById(final int id, final String name) {
/*
For attributes, Don't store '@' in in-memory mapping of id -> attrName
enables faster retrieval
*/
if(name.charAt(0) == ATTR_NAME_PREFIX) {
super.addSymbolById(id, name.substring(1));
} else {
super.addSymbolById(id, name);
}
}
}
}