/**
* JsonRepository
* Copyright 16.07.2015 by Michael Peter Christen, @0rb1t3r
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package org.loklak.tools.storage;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.SortedSet;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import org.eclipse.jetty.util.log.Log;
import org.json.JSONArray;
import org.json.JSONObject;
import org.loklak.data.DAO;
import org.loklak.tools.Compression;
public class JsonRepository {
// special keys which can be added to the data set to track changes
public final static String OPERATION_KEY_STRING = "$P";
public final static String MOD_DATE_KEY_STRING = "$D";
public final static String REFERRER_KEY_STRING = "$U";
public final static String[] META_KEYS_STRINGS = new String[]{OPERATION_KEY_STRING, MOD_DATE_KEY_STRING, REFERRER_KEY_STRING};
public static final Mode COMPRESSED_MODE = Mode.COMPRESSED;
public static final Mode REWRITABLE_MODE = Mode.REWRITABLE;
private final static SimpleDateFormat dateFomatMonthly = new SimpleDateFormat("yyyyMM", Locale.US);
private final static SimpleDateFormat dateFomatDaily = new SimpleDateFormat("yyyyMMdd", Locale.US);
private final static SimpleDateFormat dateFomatHourly = new SimpleDateFormat("yyyyMMddHH", Locale.US);
private final static SimpleDateFormat dateFomatMinutely = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
static {
dateFomatMonthly.setTimeZone(TimeZone.getTimeZone("GMT"));
dateFomatDaily.setTimeZone(TimeZone.getTimeZone("GMT"));
dateFomatHourly.setTimeZone(TimeZone.getTimeZone("GMT"));
}
public static enum Mode {
COMPRESSED, // dump files are compressed but cannot be re-written. All data is cached in RAM.
REWRITABLE; // dump files are not compressed but can be re-written. Data is only indexed in RAM and retrieved from file.
}
final File dump_dir, dump_dir_own, dump_dir_import, dump_dir_imported, dump_dir_buffer;
final String dump_file_prefix;
final JsonRandomAccessFile json_log;
final Mode mode;
final int concurrency;
final Map<String, JsonRandomAccessFile> buffers;
public JsonRepository(File dump_dir, String dump_file_prefix, String readme, final Mode mode, final boolean dailyDump, final int concurrency) throws IOException {
this.dump_dir = dump_dir;
this.dump_file_prefix = dump_file_prefix;
this.dump_dir_own = new File(this.dump_dir, "own");
this.dump_dir_import = new File(this.dump_dir, "import");
this.dump_dir_imported = new File(this.dump_dir, "imported");
this.dump_dir_buffer = new File(this.dump_dir, "buffer");
this.dump_dir.mkdirs();
this.dump_dir_own.mkdirs();
this.dump_dir_import.mkdirs();
this.dump_dir_imported.mkdirs();
this.dump_dir_buffer.mkdirs();
this.mode = mode;
this.concurrency = concurrency;
if (readme != null) {
File message_dump_dir_readme = new File(this.dump_dir, "readme.txt");
if (!message_dump_dir_readme.exists()) {
BufferedWriter w = new BufferedWriter(new FileWriter(message_dump_dir_readme));
w.write(readme);
w.close();
}
}
this.json_log = new JsonRandomAccessFile(getCurrentDump(dump_dir_own, this.dump_file_prefix, mode, dailyDump), this.concurrency);
this.buffers = new TreeMap<>();
}
public File getDumpDir() {
return this.dump_dir;
}
public Mode getMode() {
return this.mode;
}
private static String dateSuffix(final boolean dailyDump, final Date d) {
return (dailyDump ? dateFomatDaily : dateFomatMonthly).format(d);
}
private static File getCurrentDump(File path, String prefix, final Mode mode, final boolean dailyDump) {
String currentDatePart = dateSuffix(dailyDump, new Date());
// if there is already a dump, use it
String[] existingDumps = path.list();
if (existingDumps != null) {
for (String d: existingDumps) {
// first check if the file is the current file: we never compress that to enable a write to the end of the file
if (d.startsWith(prefix + currentDatePart) && d.endsWith(".txt")) {
continue;
}
// according to the write mode, we either compress or uncompress the file on-the-fly
if (mode == COMPRESSED_MODE) {
// all files should be compressed to enable small file sizes, but contents must be in RAM after reading
if (d.startsWith(prefix) && d.endsWith(".txt")) {
final File source = new File(path, d);
final File dest = new File(path, d + ".gz");
if (dest.exists()) dest.delete();
new Thread() {
public void run() {
try {
DAO.log("starting gzip of " + source);
Compression.gzip(source, dest, true);
DAO.log("finished gzip of " + source);
} catch (IOException e) {
DAO.log("gzip of " + source + " failed: " + e.getMessage());
}
}
}.start();
}
} else {
// all files should be uncompressed to enable random-access mode
if (d.startsWith(prefix) && d.endsWith(".gz")) {
final File source = new File(path, d);
final File dest = new File(path, d.substring(0, d.length() - 3));
if (dest.exists()) dest.delete();
try {
Compression.gunzip(source, dest, true);
} catch (IOException e) {
Log.getLog().warn(e);
// mark the file as invalid
if (dest.exists()) dest.delete();
final File invalid = new File(path, d + ".invalid");
source.renameTo(invalid);
}
}
}
}
// the latest file with the current date is the required one (and it should not be compressed)
for (String d: existingDumps) {
if (d.startsWith(prefix + currentDatePart) && d.endsWith(".txt")) {
return new File(path, d);
}
}
}
// no current file was found: create a new one, use a random number.
// The random is used to make it possible to join many different dumps from different instances
// without renaming them
String random = (Long.toString(Math.abs(new Random(System.currentTimeMillis()).nextLong())) + "00000000").substring(0, 8);
return new File(path, prefix + currentDatePart + "_" + random + ".txt");
}
public JsonFactory write(JSONObject json) throws IOException {
String line = json.toString(); // new ObjectMapper().writer().writeValueAsString(map);
JsonFactory jf = null;
byte[] b = line.getBytes(StandardCharsets.UTF_8);
long seekpos = this.json_log.appendLine(b);
jf = this.json_log.getJsonFactory(seekpos, b.length);
return jf;
}
public JsonFactory write(JSONObject json, char opkey) throws IOException {
String line = json.toString(); // new ObjectMapper().writer().writeValueAsString(map);
JsonFactory jf = null;
StringBuilder sb = new StringBuilder();
sb.append('{').append('\"').append(OPERATION_KEY_STRING).append('\"').append(':').append('\"').append(opkey).append('\"').append(',');
sb.append(line.substring(1));
byte[] b = sb.toString().getBytes(StandardCharsets.UTF_8);
long seekpos = this.json_log.appendLine(b);
jf = this.json_log.getJsonFactory(seekpos, b.length);
return jf;
}
public void buffer(Date created_at, Map<String, Object> map) throws IOException {
// compute a buffer name from the created_at date
String bufferName = dateSuffix(true, created_at);
synchronized (this.buffers) {
this.buffers.get(bufferName);
}
// TODO: THIS IS INCOMPLETE!
}
public JSONArray getBufferShard() {
return null;
}
public int getBufferShardCount() {
return 0;
}
public void close() {
try {this.json_log.close();} catch (IOException e) {}
}
public SortedSet<File> getOwnDumps(int count) {
return getDumps(this.dump_dir_own, this.dump_file_prefix, null, count);
}
public SortedSet<File> getImportDumps(int count) {
return getDumps(this.dump_dir_import, this.dump_file_prefix, null, count);
}
public SortedSet<File> getImportedDumps(int count) {
return getDumps(this.dump_dir_imported, this.dump_file_prefix, null, count);
}
private static SortedSet<File> tailSet(SortedSet<File> set, int count) {
if (count >= set.size()) return set;
TreeSet<File> t = new TreeSet<File>();
Iterator<File> fi = set.iterator();
for (int i = 0; i < set.size() - count; i++) fi.next();
while (fi.hasNext()) t.add(fi.next());
return t;
}
private static SortedSet<File> getDumps(final File path, final String prefix, final String suffix, int count) {
String[] list = path.list();
TreeSet<File> dumps = new TreeSet<File>(); // sort the names with a tree set
for (String s: list) {
if ((prefix == null || s.startsWith(prefix)) &&
(suffix == null || s.endsWith(suffix))) dumps.add(new File(path, s));
}
return tailSet(dumps, count);
}
/**
* move a file from the import directory to the imported directory.
* @param dumpName only the name, not the full path. The file must be in the import file path
* @return true if the file was shifted successfully, false if file did not exist or cannot be moved
*/
public boolean shiftProcessedDump(String dumpName) {
File f = new File(this.dump_dir_import, dumpName);
if (!f.exists()) return false;
File g = new File(this.dump_dir_imported, dumpName);
if (g.exists()) g.delete();
return f.renameTo(g);
}
/**
* create a concurrent dump reader for the given file. The reader is either a JsonStreamReader if the
* dump file is gzipped or a JsonRandomAccessFile if the file is a plain txt file. Both reader types
* must be started as concurrent process which this method does on it's own. The reader process dies
* automatically when the file is read completely. When the reader thread dies, it pushed several
* JsonReader.POISON_JSON_MAP objects to the reading queue, according to the concurrency defined with the
* initializer of this class.
* @param dump file
* @return a concurrent JsonReader with started Thread wrapper
* @throws IOException
*/
public JsonReader getDumpReader(File dump) throws IOException {
if (dump == null || !dump.exists()) throw new IOException("dump file " + dump + " does not exist");
if (dump.getName().endsWith(".gz")) {
assert this.mode == COMPRESSED_MODE;
JsonStreamReader r = new JsonStreamReader(new GZIPInputStream(new FileInputStream(dump)), dump.getAbsolutePath(), this.concurrency);
final Thread readerThread = new Thread(r);
readerThread.start();
return r;
}
if (dump.getName().endsWith(".txt")) {
// no assert for the mode here because both mode would be valid
final JsonRandomAccessFile r = new JsonRandomAccessFile(dump, this.concurrency);
final Thread readerThread = new Thread(r);
readerThread.start();
return r;
}
throw new IOException("wrong file extension: must be txt or gz");
}
}