package au.gov.amsa.mariweb;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import rx.Observable;
import rx.Observable.OnSubscribe;
import rx.Observer;
import rx.Scheduler;
import rx.Subscriber;
import rx.functions.Func1;
import au.gov.amsa.streams.Strings;
import au.gov.amsa.util.nmea.NmeaMessageParser;
import au.gov.amsa.util.nmea.NmeaUtil;
import com.google.common.base.Preconditions;
public class BackupReader {
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
private static final Set<String> TABLES = new HashSet<String>(Arrays.asList("ITU21_data",
"ITU411_data", "ITU123_data", "ITU5_data", "ITU18_data", "ITU19_data"));
static Observable<String> getNmea(InputStream is) {
final AtomicInteger lineNo = new AtomicInteger(0);
return Strings.split(Strings.from(new InputStreamReader(is)), "\n")
//
.doOnNext(line -> {
lineNo.incrementAndGet();
}).filter(line -> {
for (String table : TABLES)
if (line.startsWith("INSERT INTO `" + table + "`"))
return true;
return false;
})
// parse the insert statements
.lift(new OperatorExtractValuesFromInsertStatement())
// buffer on backpressure because above operator is not
// backpressure aware and was experiencing a hang here when
// expected a MissingBackpressureException.
.onBackpressureBuffer()
// use the bits from the row
.flatMap(toNmea());
}
private static Func1<List<String>, Observable<String>> toNmea() {
return row -> {
String aisMessage = row.get(5);
String[] items = aisMessage.split("\\|");
List<String> list = new ArrayList<String>();
String positionTime = row.get(1);
String arrivalTime = row.get(2);
String tagBlock = row.get(6);
final String tagBlockAmended;
if (tagBlock.length() == 0)
throw new RuntimeException("tag block is empty!");
else {
// now insert arrival time into the tag block with a tag of `at`
if (tagBlock.charAt(0) != '\\')
throw new RuntimeException("tag block should start with \\:" + tagBlock);
if (tagBlock.length() < 5)
throw new RuntimeException("tag block should be at least 5 characters:"
+ tagBlock);
LinkedHashMap<String, String> tags = NmeaMessageParser.extractTags(tagBlock
.substring(1, tagBlock.length() - 1));
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
sdf.setTimeZone(UTC);
try {
long unixMs = sdf.parse(arrivalTime).getTime();
StringBuilder s = new StringBuilder();
String source = tags.get("s");
if (source != null) {
source = source.trim();
s.append("s:");
s.append(source);
}
long positionTimeUnixSeconds = sdf.parse(positionTime).getTime() / 1000;
if (s.length() > 0)
s.append(',');
s.append("c:");
s.append(positionTimeUnixSeconds);
if (s.length() > 0)
s.append(',');
s.append("a:");// custom tag for arrival time
s.append(unixMs);
String checksum = NmeaUtil.getChecksum(s.toString());
s.append('*');
s.append(checksum);
s.append('\\');
s.insert(0, '\\');
tagBlockAmended = s.toString();
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
for (String item : items) {
// list.add(toTagBlock(positionTime) + item);
list.add(tagBlockAmended + item);
}
return Observable.from(list);
};
}
public Observable<String> getNmea(String s) {
return getNmea(new ByteArrayInputStream(s.getBytes()));
}
public void extractNmea(File file, File output) {
GZIPInputStream is = null;
GZIPOutputStream fos = null;
try {
is = new GZIPInputStream(new FileInputStream(file));
fos = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(output)));
extractNmea(is, fos);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
if (is != null)
try {
is.close();
} catch (IOException e) {
// do nothing
}
if (fos != null)
try {
fos.close();
} catch (IOException e) {
// do nothing
}
}
}
public void extractNmea(InputStream is, OutputStream os) {
final PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(os,
Charset.forName("UTF-8"))));
try {
final AtomicReference<Throwable> exception = new AtomicReference<Throwable>();
getNmea(is)
// subscribe
.subscribe(new Observer<String>() {
Long time = System.currentTimeMillis();
final int rateEvery = 1000000;
long count;
@Override
public void onCompleted() {
}
@Override
public void onError(Throwable e) {
e.printStackTrace();
exception.set(e);
}
@Override
public void onNext(String line) {
writer.println(line);
incrementCount();
}
private void incrementCount() {
long n = ++count;
if (n % rateEvery == 0) {
long t = System.currentTimeMillis();
double rate = rateEvery * 1000.0 / (t - time);
System.out.println(new Date() + ":" + n + " msgsPerSecond = "
+ rate);
time = t;
}
}
});
if (exception.get() != null)
throw new RuntimeException(exception.get());
} finally {
writer.close();
}
}
public static void convertDirectoryToNmea(File directory, final Scheduler scheduler,
boolean recurse) {
int count = Observable.from(getFilesToProcess(directory, recurse))
.flatMap(new Func1<File, Observable<File>>() {
@Override
public Observable<File> call(final File file) {
return Observable.create(new OnSubscribe<File>() {
@Override
public void call(Subscriber<? super File> subscriber) {
convertFileToNmea(file);
subscriber.onNext(file);
subscriber.onCompleted();
}
}).subscribeOn(scheduler);
}
}).count().toBlocking().single();
System.out.println(count + "files converted");
}
private static List<File> getFilesToProcess(File directory, boolean recurse) {
Preconditions.checkArgument(directory.exists(), "directory does not exist: " + directory);
Preconditions.checkArgument(directory.isDirectory(), "file is not a directory: "
+ directory);
File[] files = directory.listFiles((dir, name) -> {
return (name.startsWith("LSS_20") || name.startsWith("ITU_20"))
&& name.endsWith(".bu.gz");
});
// sort by ascending filename
Arrays.sort(files, (a, b) -> {
return a.getName().compareTo(b.getName());
});
List<File> list = new ArrayList<File>(Arrays.asList(files));
if (recurse) {
for (File d : directory.listFiles())
if (d.isDirectory() && !d.getName().startsWith("."))
list.addAll(getFilesToProcess(d, recurse));
}
return list;
}
static void convertFileToNmea(File file) {
String baseName = "NMEA_" + file.getName().replace(".bu", "").replace("LSS_", "ITU_");
File output = new File(file.getParentFile(), baseName + ".tmp");
File finalOutput = new File(file.getParentFile(), baseName);
if (!finalOutput.exists()) {
System.out.println("converting " + file);
try {
BackupReader b = new BackupReader();
b.extractNmea(file, output);
output.renameTo(finalOutput);
if (finalOutput.length() < 1000000)
throw new RuntimeException("file less than 1MB, deleting " + finalOutput);
System.out.println("converted " + file);
} catch (RuntimeException e) {
System.out.println("problem processing " + file);
e.printStackTrace(System.out);
finalOutput.delete();
}
} else
System.out.println("output exists: " + finalOutput);
}
}