/**
* JsonIndex
* Copyright 16.07.2015 by Michael Peter Christen, @0rb1t3r
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package org.loklak.tools.storage;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.eclipse.jetty.util.log.Log;
import org.json.JSONObject;
import org.loklak.data.DAO;
public class JsonDataset {
private final JsonRepository indexDump; // a directory containing dump, import and imported subdirectories
final Map<String, JsonFactoryIndex> index; // a mapping from a search key to the search index
private final JsonMinifier minifier; // a minifier for json which learns about json mapping key names
private final Map<String, Boolean> columns; // a mapping from the column key to a boolean which is true if the column value is case-insensitive
private final String dateFieldName; // a name of a date field which shows the update time of the record
private final DateFormat dateFieldFormat;
public static class Column {
public String key;
public boolean caseInsensitive;
public Column (String key, boolean caseInsensitive) {
this.key = key;
this.caseInsensitive = caseInsensitive;
}
}
/**
* define a data set: an indexed JsonDump where the index is held in RAM
* @param dump_dir the path where the subdirectories for this data set shall be stored
* @param dump_file_prefix a prefix for the file names
* @param index_keys the names of the json property keys where their content shall be indexed by this field
* @param mode the indexing mode, either completely in RAM with Mode.COMPRESSED or with file handles with Mode.REWRITABLE
* @throws IOException
*/
public JsonDataset(
File dump_dir, String dump_file_prefix,
Column[] columns,
String dateFieldName, String dateFieldFormat,
JsonRepository.Mode mode, final boolean dailyDump,
int count) throws IOException {
// initialize class objects
int concurrency = Runtime.getRuntime().availableProcessors();
this.indexDump = new JsonRepository(dump_dir, dump_file_prefix, null, mode, dailyDump, concurrency);
this.index = new ConcurrentHashMap<>();
this.minifier = new JsonMinifier();
this.columns = new HashMap<>();
this.dateFieldName = dateFieldName == null ? "" : dateFieldName;
this.dateFieldFormat = this.dateFieldName.length() == 0 ? null : new SimpleDateFormat(dateFieldFormat);
for (Column column: columns) this.columns.put(column.key, column.caseInsensitive);
// assign for each index key one JsonFactory index
for (Column col: columns) this.index.put(col.key, new JsonFactoryIndex());
// start reading of the JsonDump
final Collection<File> dumps = indexDump.getOwnDumps(count);
// for each reader one threqd is started which does Json parsing and indexing
if (dumps != null) for (final File dump: dumps) {
final JsonReader reader = indexDump.getDumpReader(dump);
DAO.log("loading " + reader.getName());
Thread[] indexerThreads = new Thread[concurrency];
for (int i = 0; i < concurrency; i++) {
indexerThreads[i] = new Thread() {
public void run() {
JsonFactory jsonHandle;
try {
while ((jsonHandle = reader.take()) != JsonStreamReader.POISON_JSON_MAP) {
JSONObject op = jsonHandle.getJSON();
JsonFactory jsonFactory;
if (jsonHandle instanceof JsonRandomAccessFile.JsonHandle) {
JsonRandomAccessFile.JsonHandle handle = (JsonRandomAccessFile.JsonHandle) jsonHandle;
assert reader instanceof JsonRandomAccessFile;
// create the file json handle which does not contain the json any more
// but only the file handle
jsonFactory = ((JsonRandomAccessFile) reader).getJsonFactory(handle.getIndex(), handle.getLength());
} else {
assert JsonDataset.this.indexDump.getMode() == JsonRepository.COMPRESSED_MODE;
// create the json minifier object which contains the json in minified version
// before we create the minifier, we remove the meta keys from the json to further minify it
for (String meta_key: JsonRepository.META_KEYS_STRINGS) {
op.remove(meta_key);
}
jsonFactory = JsonDataset.this.minifier.minify(op);
}
// the resulting json factory is written to each search index
for (Map.Entry<String, Boolean> column: JsonDataset.this.columns.entrySet()) {
String searchKey = column.getKey();
boolean case_insensitive = column.getValue();
JsonFactoryIndex factoryIndex = JsonDataset.this.index.get(searchKey);
Object searchValue = op.has(searchKey) ? op.get(searchKey) : null;
if (searchValue != null) {
if (searchValue instanceof String) {
JsonFactory old = factoryIndex.put(case_insensitive ? ((String) searchValue).toLowerCase() : (String) searchValue, jsonFactory);
} else {
JsonFactory old = factoryIndex.put(searchValue, jsonFactory);
}
}
}
}
} catch (InterruptedException e) {
Log.getLog().warn(e);
} catch (IOException e) {
Log.getLog().warn(e);
}
}
};
indexerThreads[i].start();
}
// wait for the completion of each task
for (int i = 0; i < concurrency; i++) {
try {indexerThreads[i].join();} catch (InterruptedException e) {}
}
}
}
/**
* put an object into the index, but do not overwrite existing pairs
* @param key
* @param value
* @throws IOException
*/
public JsonFactory putUnique(JSONObject obj) throws IOException {
JsonFactory json = indexDump.write(obj, 'I');
for (Map.Entry<String, Boolean> column: this.columns.entrySet()) {
//for (Map.Entry<String, JsonFactoryIndex> idxo: this.index.entrySet()) {
String searchKey = column.getKey();
boolean case_insensitive = column.getValue();
Object value = obj.get(searchKey);
if (value != null && value instanceof String) {
JsonFactoryIndex index = this.index.get(searchKey);
String valueString = case_insensitive ? ((String) value).toLowerCase() : (String) value;
index.put(valueString, json);
}
}
return json;
}
public JsonFactory get(String column, String value) {
Boolean insensitive = this.columns.get(column);
if (insensitive == null) throw new RuntimeException("Column " + column + " was not declared");
JsonFactoryIndex jfi = this.index.get(column);
if (jfi == null) throw new RuntimeException("Column " + column + " was not defined");
return jfi.get(insensitive ? value.toLowerCase() : value);
}
public Date parseDate(JSONObject json) throws ParseException {
if (this.dateFieldName == null || this.dateFieldName.length() == 0 || this.dateFieldFormat == null) throw new ParseException("no date field defined", 0);
Object d = json.get(this.dateFieldName);
if (d == null) throw new ParseException("no date field in json, expected field '" + this.dateFieldName + "'", 0);
if (d instanceof Date) return (Date) d;
if (!(d instanceof String)) throw new ParseException("date field in json must contain a String or Date, not " + d.getClass().getName(), 0);
return this.dateFieldFormat.parse((String) d);
}
public void close() {
this.indexDump.close();
}
public static class JsonFactoryIndex extends ConcurrentHashMap<Object, JsonFactory> implements Map<Object, JsonFactory> {
private static final long serialVersionUID = 4596787150066539880L;
}
public int size() {
int size = 0;
for (JsonFactoryIndex fi: this.index.values()) {
size = Math.max(size, fi.size());
}
return size;
}
}