// Tables.java // (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 14.01.2010 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.kelondro.blob; import java.io.File; import java.io.IOException; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.util.ByteArray; import net.yacy.cora.util.ByteBuffer; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.LookAheadIterator; import net.yacy.cora.util.SpaceExceededException; import net.yacy.data.ymark.YMarkUtil; import net.yacy.kelondro.util.FileUtils; public class Tables implements Iterable<String> { private final static String p1 = "(?:^|.*,)"; private final static String p2 = "((?:"; private final static String p3 = ")(?:,.*|$)){"; private final static String CIDX = "_cidx"; private final static int NOINDEX = 50000; private final static int RAMINDEX = 100000; private static final String suffix = ".bheap"; private static final String system_table_pkcounter = "pkcounter"; private static final String system_table_pkcounter_counterName = "pk"; private final File location; private final ConcurrentHashMap<String, BEncodedHeap> tables; private final ConcurrentHashMap<String, TablesColumnIndex> cidx; private int keymaxlen; // use our own formatter to prevent concurrency locks with other processes private final static GenericFormatter my_SHORT_MILSEC_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_MILSEC, 1); public Tables(final File location, final int keymaxlen) { this.location = new File(location.getAbsolutePath()); if (!this.location.exists()) this.location.mkdirs(); this.keymaxlen = keymaxlen; this.tables = new ConcurrentHashMap<String, BEncodedHeap>(); final String[] files = this.location.list(); File file; // lazy initialization: do not open the database files here for (final String f: files) { if (f.endsWith(suffix)) { file = new File(this.location, f); if (file.length() == 0) { file.delete(); continue; } } } this.cidx = new ConcurrentHashMap<String, TablesColumnIndex>(); } public TablesColumnIndex getIndex(final String tableName, TablesColumnIndex.INDEXTYPE indexType) throws TableColumnIndexException, IOException { final TablesColumnIndex index; switch(indexType) { case RAM: index = new TablesColumnRAMIndex(); break; case BLOB: final String idx_table = tableName+CIDX; BEncodedHeap bheap; bheap = this.getHeap(idx_table); index = new TablesColumnBLOBIndex(bheap); break; default: throw new TableColumnIndexException("Unsupported TableColumnIndex: "+indexType.name()); } return index; } public TablesColumnIndex getIndex(final String tableName) throws TableColumnIndexException { // return an existing index final TablesColumnIndex tci = this.cidx.get(tableName); if (tci != null) { return tci; } // create a new index int size; try { size = this.size(tableName); } catch (final IOException e) { size = 0; } final TablesColumnIndex index; if(size < NOINDEX) { throw new TableColumnIndexException("TableColumnIndex not available for tables with less than "+NOINDEX+" rows: "+tableName); } if(size < RAMINDEX) { index = new TablesColumnRAMIndex(); } else { final String idx_table = tableName+CIDX; BEncodedHeap bheap; try { bheap = this.getHeap(idx_table); } catch (final IOException e) { bheap = null; ConcurrentLog.logException(e); } if(bheap != null) { index = new TablesColumnBLOBIndex(bheap); } else { index = new TablesColumnRAMIndex(); } } this.cidx.put(tableName, index); return index; } public boolean hasIndex (final String tableName) { return this.cidx.containsKey(tableName); } public boolean hasIndex(final String tableName, final String columnName) { final TablesColumnIndex tci = this.cidx.get(tableName); if (tci != null) { return tci.hasIndex(columnName); } try { if(this.has(tableName+CIDX, YMarkUtil.getKeyId(columnName))) { return true; } } catch (final IOException e) { ConcurrentLog.logException(e); } return false; } public Iterator<Row> getByIndex(final String table, final String whereColumn, final String separator, final String whereValue) { final HashSet<Tables.Row> rows = new HashSet<Tables.Row>(); final TreeSet<byte[]> set1 = new TreeSet<byte[]>(TablesColumnIndex.NATURALORDER); final TreeSet<byte[]> set2 = new TreeSet<byte[]>(TablesColumnIndex.NATURALORDER); final String[] values = whereValue.split(separator); if(this.hasIndex(table, whereColumn)) { try { final TablesColumnIndex index = this.getIndex(table); for(int i=0; i<values.length; i++) { final Collection<byte[]> b = index.get(whereColumn, values[i]); if (b != null) { final Iterator<byte[]> biter = b.iterator(); while(biter.hasNext()) { set1.add(biter.next()); } if(i==0) { set2.addAll(set1); } else { set2.retainAll(set1); } set1.clear(); } } for(byte[] pk : set2) { rows.add(this.select(table, pk)); } } catch (final Exception e) { ConcurrentLog.logException(e); return new HashSet<Row>().iterator(); } } else if (!separator.isEmpty()) { final StringBuilder patternBuilder = new StringBuilder(256); patternBuilder.append(p1); patternBuilder.append(p2); for (final String value : values) { patternBuilder.append(Pattern.quote(value)); patternBuilder.append('|'); } patternBuilder.deleteCharAt(patternBuilder.length()-1); patternBuilder.append(p3); patternBuilder.append(values.length); patternBuilder.append('}'); final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE); try { return this.iterator(table, whereColumn, p); } catch (final IOException e) { ConcurrentLog.logException(e); return new HashSet<Row>().iterator(); } } else { try { return this.iterator(table, whereColumn, UTF8.getBytes(whereValue)); } catch (final IOException e) { ConcurrentLog.logException(e); return new HashSet<Row>().iterator(); } } return rows.iterator(); } @Override public Iterator<String> iterator() { return getTablenames().iterator(); } public Set<String> getTablenames() { // we did a lazy initialization, but here we must discover all actually existing tables String tablename; File file; final String[] files = this.location.list(); for (final String f: files) { if (f.endsWith(suffix)) { file = new File(this.location, f); if (file.length() == 0) { continue; } tablename = f.substring(0, f.length() - suffix.length()); try { getHeap(tablename); } catch (final IOException e) { ConcurrentLog.logException(e); } } } // now the list of tables is enriched, return an iterator return this.tables.keySet(); } public void close(final String tablename) { final BEncodedHeap heap = this.tables.remove(tablename); if (heap == null) return; heap.close(); } public synchronized void close() { for (final BEncodedHeap heap: this.tables.values()) heap.close(); this.tables.clear(); } public void clear() { Set<String> tablenames = this.getTablenames(); for (String tablename: tablenames) this.clear(tablename); } public void clear(final String tablename) { try { BEncodedHeap heap = getHeap(tablename); if (heap != null) { final File f = heap.getFile(); heap.clear(); heap.close(); FileUtils.deletedelete(f); heap = null; } } catch (final IOException e) { ConcurrentLog.logException(e); } finally { this.tables.remove(tablename); } } /** * @param tablename * @return true if table in use */ public boolean hasHeap(final String tablename) { return this.tables.containsKey(tablename); } /** * Get or create a heap table/file * @param tablename * @return existing or created heap * @throws IOException */ public BEncodedHeap getHeap(final String tablename) throws IOException { final String table = tablename + suffix; BEncodedHeap heap = this.tables.get(tablename); if (heap != null) return heap; // open a new heap and register it in the tables final File heapf = new File(this.location, table); heap = new BEncodedHeap(heapf, this.keymaxlen); this.tables.put(tablename, heap); return heap; } /** * get the total number of known tables * @return */ public int size() { return this.tables.size(); } public int size(final String table) throws IOException { final BEncodedHeap heap = getHeap(table); return heap.size(); } private byte[] ukey(final String tablename) throws IOException, SpaceExceededException { Row row = select(system_table_pkcounter, UTF8.getBytes(tablename)); if (row == null) { // table counter entry in pkcounter table does not exist: make a new table entry row = new Row(UTF8.getBytes(tablename), system_table_pkcounter_counterName, UTF8.getBytes(int2key(0))); update(system_table_pkcounter, row); } byte[] pk = row.get(system_table_pkcounter_counterName); int pki; if (pk == null) { pki = size(tablename); } else { pki = (int) (ByteArray.parseDecimal(pk) + 1); } while (true) { pk = UTF8.getBytes(int2key(pki)); if (!has(tablename, pk)) break; pki++; } return pk; } private String int2key(final int i) { final StringBuilder sb = new StringBuilder(this.keymaxlen); final String is = Integer.toString(i); for (int j = 0; j < this.keymaxlen - is.length(); j++) sb.append('0'); sb.append(is); return sb.toString(); } /** * insert a map into a table using a new unique key * @param tablename * @param map * @throws SpaceExceededException * @throws IOException * @throws SpaceExceededException */ public byte[] insert(final String tablename, final Map<String, byte[]> map) throws IOException, SpaceExceededException { final byte[] uk = ukey(tablename); update(tablename, uk, map); final BEncodedHeap heap = getHeap(system_table_pkcounter); heap.insert(UTF8.getBytes(tablename), system_table_pkcounter_counterName, uk); return uk; } public void insert(final String table, final byte[] pk, final Map<String, byte[]> map) throws IOException { final BEncodedHeap heap = getHeap(table); try { heap.insert(pk, map); } catch (final SpaceExceededException e) { throw new IOException(e.getMessage()); } } public void insert(final String table, final Row row) throws IOException { final BEncodedHeap heap = getHeap(table); try { heap.insert(row.pk, row); } catch (final SpaceExceededException e) { throw new IOException(e.getMessage()); } } public void update(final String table, final byte[] pk, final Map<String, byte[]> map) throws IOException { final BEncodedHeap heap = getHeap(table); try { heap.update(pk, map); } catch (final SpaceExceededException e) { throw new IOException(e.getMessage()); } } public void update(final String table, final Row row) throws IOException { final BEncodedHeap heap = getHeap(table); try { heap.update(row.pk, row); } catch (final SpaceExceededException e) { throw new IOException(e.getMessage()); } } public byte[] createRow(final String table) throws IOException, SpaceExceededException { return this.insert(table, new ConcurrentHashMap<String, byte[]>()); } public Row select(final String table, final byte[] pk) throws IOException, SpaceExceededException { final BEncodedHeap heap = getHeap(table); final Map<String,byte[]> b = heap.get(pk); if (b != null) return new Row(pk, b); return null; } public void delete(final String table, final byte[] pk) throws IOException { final BEncodedHeap heap = getHeap(table); heap.delete(pk); } public boolean has(final String table, final byte[] key) throws IOException { final BEncodedHeap heap = getHeap(table); return heap.containsKey(key); } public Iterator<byte[]> keys(final String table) throws IOException { final BEncodedHeap heap = getHeap(table); return heap.keys(); } public Iterator<byte[]> keys(final String table, final boolean up, final boolean rotating) throws IOException { final BEncodedHeap heap = getHeap(table); return heap.keys(up, rotating); } public Iterator<Row> iterator(final String table) throws IOException { return new HeapRowIterator(table); } public Iterator<Row> iterator(final String table, final String whereColumn, final byte[] whereValue) throws IOException { return new HeapRowIterator(table, whereColumn, whereValue); } public Iterator<Row> iterator(final String table, final String whereColumn, final Pattern wherePattern) throws IOException { return new HeapRowIterator(table, whereColumn, wherePattern); } public Iterator<Row> iterator(final String table, final Pattern wherePattern) throws IOException { return new HeapRowIterator(table, wherePattern); } public Iterator<Row> iterator(final String table, final boolean up) throws IOException { return new OrderedRowIterator(table, up); } public Iterator<Row> iterator(final String table, final String whereColumn, final byte[] whereValue, final boolean up) throws IOException { return new OrderedRowIterator(table, whereColumn, whereValue, up); } public Iterator<Row> iterator(final String table, final String whereColumn, final Pattern wherePattern, final boolean up) throws IOException { return new OrderedRowIterator(table, whereColumn, wherePattern, up); } public Iterator<Row> iterator(final String table, final Pattern wherePattern, final boolean up) throws IOException { return new OrderedRowIterator(table, wherePattern, up); } public static Collection<Row> orderBy(final Iterator<Row> rowIterator, int maxcount, final String sortColumn) { final TreeMap<String, Row> sortTree = new TreeMap<String, Row>(); Row row; byte[] r; while ((maxcount < 0 || maxcount-- > 0) && rowIterator.hasNext()) { row = rowIterator.next(); r = row.get(sortColumn); if (r == null) { sortTree.put("0000" + UTF8.String(row.pk), row); } else { sortTree.put(UTF8.String(r) + UTF8.String(row.pk), row); } } return sortTree.values(); } public ArrayList<String> columns(final String table) throws IOException { final BEncodedHeap heap = getHeap(table); return heap.columns(); } public class HeapRowIterator extends LookAheadIterator<Row> implements Iterator<Row> { private final String whereColumn; private final byte[] whereValue; private final Pattern wherePattern; private final Iterator<Map.Entry<byte[], Map<String, byte[]>>> i; /** * iterator that iterates all elements in the given table * @param table * @throws IOException */ public HeapRowIterator(final String table) throws IOException { this.whereColumn = null; this.whereValue = null; this.wherePattern = null; final BEncodedHeap heap = getHeap(table); this.i = heap.iterator(); } /** * iterator that iterates all elements in the given table * where a given column is equal to a given value * @param table * @param whereColumn * @param whereValue * @throws IOException */ public HeapRowIterator(final String table, final String whereColumn, final byte[] whereValue) throws IOException { assert whereColumn != null || whereValue == null; this.whereColumn = whereColumn; this.whereValue = whereValue; this.wherePattern = null; final BEncodedHeap heap = getHeap(table); this.i = heap.iterator(); } /** * iterator that iterates all elements in the given table * where a given column matches with a given value * @param table * @param whereColumn * @param wherePattern * @throws IOException */ public HeapRowIterator(final String table, final String whereColumn, final Pattern wherePattern) throws IOException { this.whereColumn = whereColumn; this.whereValue = null; this.wherePattern = wherePattern == null || wherePattern.toString().isEmpty() ? null : wherePattern; final BEncodedHeap heap = getHeap(table); this.i = heap.iterator(); } /** * iterator that iterates all elements in the given table * where any column matches with a given value * @param table * @param pattern * @throws IOException */ public HeapRowIterator(final String table, final Pattern pattern) throws IOException { this.whereColumn = null; this.whereValue = null; this.wherePattern = pattern == null || pattern.toString().isEmpty() ? null : pattern; final BEncodedHeap heap = getHeap(table); this.i = heap.iterator(); } @Override protected Row next0() { if (this.i == null) return null; Row r; while (this.i.hasNext()) { r = new Row(this.i.next()); if (this.whereValue != null) { if (ByteBuffer.equals(r.get(this.whereColumn), this.whereValue)) return r; } else if (this.wherePattern != null) { if (this.whereColumn == null) { // shall match any column for (final byte[] b: r.values()) { if (this.wherePattern.matcher(UTF8.String(b)).matches()) return r; } } else { // must match the given column if (this.wherePattern.matcher(UTF8.String(r.get(this.whereColumn))).matches()) return r; } } else { return r; } } return null; } } public class OrderedRowIterator extends LookAheadIterator<Row> implements Iterator<Row> { private final String whereColumn; private final byte[] whereValue; private final Pattern wherePattern; private final Iterator<byte[]> i; private final BEncodedHeap heap; /** * iterator that iterates all elements in the given table * @param table * @param up * @throws IOException */ public OrderedRowIterator(final String table, final boolean up) throws IOException { this.whereColumn = null; this.whereValue = null; this.wherePattern = null; this.heap = getHeap(table); this.i = heap.keys(up, false); } /** * iterator that iterates all elements in the given table * where a given column is equal to a given value * @param table * @param whereColumn * @param whereValue * @param up * @throws IOException */ public OrderedRowIterator(final String table, final String whereColumn, final byte[] whereValue, final boolean up) throws IOException { assert whereColumn != null || whereValue == null; this.whereColumn = whereColumn; this.whereValue = whereValue; this.wherePattern = null; this.heap = getHeap(table); this.i = heap.keys(up, false); } /** * iterator that iterates all elements in the given table * where a given column matches with a given value * @param table * @param whereColumn * @param wherePattern * @param up * @throws IOException */ public OrderedRowIterator(final String table, final String whereColumn, final Pattern wherePattern, final boolean up) throws IOException { this.whereColumn = whereColumn; this.whereValue = null; this.wherePattern = wherePattern == null || wherePattern.toString().isEmpty() ? null : wherePattern; this.heap = getHeap(table); this.i = heap.keys(up, false); } /** * iterator that iterates all elements in the given table * where a given column matches with a given value * @param table * @param wherePattern * @param up * @throws IOException */ public OrderedRowIterator(final String table, final Pattern wherePattern, final boolean up) throws IOException { this.whereColumn = null; this.whereValue = null; this.wherePattern = wherePattern == null || wherePattern.toString().isEmpty() ? null : wherePattern; this.heap = getHeap(table); this.i = heap.keys(up, false); } @Override protected Row next0() { if (this.i == null) return null; Row r; while (this.i.hasNext()) { byte[] pk = this.i.next(); Map<String, byte[]> map; try { map = this.heap.get(pk); if (map == null) continue; r = new Row(pk, map); if (this.whereValue != null) { if (ByteBuffer.equals(r.get(this.whereColumn), this.whereValue)) return r; } else if (this.wherePattern != null) { if (this.whereColumn == null) { // shall match any column for (final byte[] b: r.values()) { if (this.wherePattern.matcher(UTF8.String(b)).matches()) return r; } } else { // must match the given column if (this.wherePattern.matcher(UTF8.String(r.get(this.whereColumn))).matches()) return r; } } else { return r; } } catch (IOException | SpaceExceededException e) { continue; } } return null; } } public static class Data extends LinkedHashMap<String, byte[]> { private static final long serialVersionUID = 978426054043749337L; public Data() { super(); } private Data(final Map<String, byte[]> map) { super(); assert map != null; putAll(map); } public void put(final String colname, final String value) { super.put(colname, UTF8.getBytes(value)); } public void put(final String colname, final int value) { super.put(colname, ASCII.getBytes(Integer.toString(value))); } public void put(final String colname, final long value) { super.put(colname, ASCII.getBytes(Long.toString(value))); } public void put(final String colname, final Date value) { super.put(colname, UTF8.getBytes(my_SHORT_MILSEC_FORMATTER.format(value))); } public byte[] get(final String colname, final byte[] dflt) { final byte[] r = this.get(colname); if (r == null) return dflt; return r; } public String get(final String colname, final String dflt) { final byte[] r = this.get(colname); if (r == null) return dflt; return UTF8.String(r); } public int get(final String colname, final int dflt) { final byte[] r = this.get(colname); if (r == null) return dflt; try { return (int) ByteArray.parseDecimal(r); } catch (final NumberFormatException e) { return dflt; } } public long get(final String colname, final long dflt) { final byte[] r = this.get(colname); if (r == null) return dflt; try { return ByteArray.parseDecimal(r); } catch (final NumberFormatException e) { return dflt; } } public Date get(final String colname, final Date dflt) { final byte[] r = this.get(colname); if (r == null) return dflt; try { return my_SHORT_MILSEC_FORMATTER.parse(UTF8.String(r), 0).getTime(); } catch (final ParseException e) { return dflt; } } @Override public String toString() { final StringBuilder sb = new StringBuilder(this.size() * 40); sb.append('{'); for (final Map.Entry<String, byte[]> entry: entrySet()) { sb.append(entry.getKey()).append('=').append(UTF8.String(entry.getValue())).append(", "); } if (sb.length() > 1) sb.setLength(sb.length() - 2); sb.append('}'); return sb.toString(); } } public class Row extends Data { private static final long serialVersionUID = 978426054043749338L; private final byte[] pk; private Row(final Map.Entry<byte[], Map<String, byte[]>> entry) { super(entry.getValue()); assert entry != null; assert entry.getKey() != null; assert entry.getValue() != null; this.pk = entry.getKey(); } private Row(final byte[] pk, final Map<String, byte[]> map) { super(map); assert pk != null; assert map != null; this.pk = pk; } private Row(final byte[] pk, final String k0, final byte[] v0) { super(); assert k0 != null; assert v0 != null; this.put(k0, v0); this.pk = pk; } public byte[] getPK() { return this.pk; } @Override public String toString() { final StringBuilder sb = new StringBuilder(Tables.this.keymaxlen + 20 * this.size()); sb.append(UTF8.String(this.pk)).append(":").append(super.toString()); return sb.toString(); } } public static void main(final String[] args) { // test the class final File f = new File(new File("maptest").getAbsolutePath()); // System.out.println(f.getAbsolutePath()); // System.out.println(f.getParent()); try { final Tables map = new Tables(f.getParentFile(), 4); // put some values into the map final Map<String, byte[]> m = new HashMap<String, byte[]>(); m.put("k", "000".getBytes()); map.update("testdao", "123".getBytes(), m); m.put("k", "111".getBytes()); map.update("testdao", "456".getBytes(), m); m.put("k", "222".getBytes()); map.update("testdao", "789".getBytes(), m); // iterate over keys final Iterator<Row> i = map.iterator("testdao"); while (i.hasNext()) { System.out.println(i.next().toString()); } // clean up map.close(); } catch (final IOException e) { ConcurrentLog.logException(e); } } }