package yuku.alkitabconverter.yet;
import gnu.trove.list.array.TIntArrayList;
import yuku.alkitab.model.FootnoteEntry;
import yuku.alkitab.model.XrefEntry;
import yuku.alkitab.util.Ari;
import yuku.alkitab.yes2.model.PericopeData;
import yuku.alkitabconverter.util.Rec;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class YetFileInput {
public static class YetFileInputResult {
public List<Rec> recs;
public PericopeData pericopeData;
public Map<String, String> infos;
public int numberOfBooks;
public Map<Integer, String> bookNames; // key is book_1
public Map<Integer, String> bookAbbreviations; // key is book_1
public LinkedHashMap<Integer /* arif */, XrefEntry> xrefEntries;
public LinkedHashMap<Integer /* arif */, FootnoteEntry> footnoteEntries;
void addInfo(String k, String v) {
if (infos == null) infos = new LinkedHashMap<>();
infos.put(k, v);
}
void addPericopeEntry(PericopeData.Entry e) {
if (pericopeData == null) pericopeData = new PericopeData();
pericopeData.addEntry(e);
}
void addRec(Rec rec) {
if (recs == null) recs = new ArrayList<>();
recs.add(rec);
}
void addBookName(int book_1, String bookName, String bookAbbreviation) {
if (bookNames == null) {
bookNames = new TreeMap<>();
bookAbbreviations = new TreeMap<>();
}
bookNames.put(book_1, bookName);
bookAbbreviations.put(book_1, bookAbbreviation);
}
void setNumberOfBooks(int numberOfBooks) {
this.numberOfBooks = numberOfBooks;
}
public void addXrefEntry(final int arif, final XrefEntry xe) {
if (xrefEntries == null) {
xrefEntries = new LinkedHashMap<>();
}
xrefEntries.put(arif, xe);
}
public void addFootnoteEntry(final int arif, final FootnoteEntry fe) {
if (footnoteEntries == null) {
footnoteEntries = new LinkedHashMap<>();
}
footnoteEntries.put(arif, fe);
}
/**
* @return book names indexed from 0
*/
public List<String> getBookNamesAsList() {
final List<String> res = new ArrayList<>();
for (Map.Entry<Integer, String> e : bookNames.entrySet()) {
final int index = e.getKey() - 1;
while (index + 1 > res.size()) {
res.add(null);
}
res.set(index, e.getValue());
}
return res;
}
public List<String> getBookAbbreviationsAsList() {
final List<String> res = new ArrayList<>();
for (Map.Entry<Integer, String> e : bookAbbreviations.entrySet()) {
final int index = e.getKey() - 1;
while (index + 1 > res.size()) {
res.add(null);
}
res.set(index, e.getValue());
}
return res;
}
}
static Matcher xrefMatcher = Pattern.compile("@<x([0-9]+)@>").matcher("");
static Matcher footnoteMatcher = Pattern.compile("@<f([0-9]+)@>").matcher("");
public YetFileInputResult parse(String nf) throws Exception {
LinkedHashMap<Integer, Integer> nversePerBook = new LinkedHashMap<>();
int lastBook_1 = 1;
int lastChapter_1 = 1;
int lastVerse_1 = 0;
PericopeData.Entry lastPericopeEntry = null;
YetFileInputResult res = new YetFileInputResult();
int report_line_number = 0;
String report_line_text = null;
try {
final Scanner sc = new Scanner(new BufferedInputStream(new FileInputStream(nf), 20*1024*1024), "utf-8");
while (sc.hasNextLine()) {
String line = sc.nextLine();
report_line_number++;
report_line_text = line;
String[] splits = line.split("\t", -1);
String command = splits[0];
if ("info".equals(command)) {
String k = splits[1];
String v = splits[2];
res.addInfo(k, v);
} else if ("pericope".equals(command)) {
int book_1 = Integer.parseInt(splits[1]);
int chapter_1 = Integer.parseInt(splits[2]);
int verse_1 = Integer.parseInt(splits[3]);
String text = splits[4];
lastPericopeEntry = new PericopeData.Entry();
res.addPericopeEntry(lastPericopeEntry);
lastPericopeEntry.ari = ((book_1 - 1) << 16) | (chapter_1 << 8) | verse_1;
lastPericopeEntry.block = new PericopeData.Block();
lastPericopeEntry.block.title = text;
} else if ("parallel".equals(command)) {
String text = splits[1];
if (lastPericopeEntry == null) {
throw new RuntimeException("parallel encountered before pericope title: " + line);
}
lastPericopeEntry.block.addParallel(text);
} else if ("book_name".equals(command)) {
int book_1 = Integer.parseInt(splits[1]);
String bookName = splits[2];
String bookAbbreviation;
if (splits.length > 3 && splits[3] != null && !splits[3].isEmpty()) {
bookAbbreviation = splits[3];
} else {
bookAbbreviation = null;
}
res.addBookName(book_1, bookName, bookAbbreviation);
} else if ("verse".equals(command)) {
int book_1 = Integer.parseInt(splits[1]);
int chapter_1 = Integer.parseInt(splits[2]);
int verse_1 = Integer.parseInt(splits[3]);
String text = splits[4];
// check verse ordering
boolean validOrdering = false;
if (verse_1 == lastVerse_1 + 1) {
// next verse
validOrdering = true;
} else if (verse_1 == 1 && chapter_1 == lastChapter_1 + 1) {
// next chapter, verse 1
validOrdering = true;
} else if (verse_1 == 1 && chapter_1 == 1) {
// new book, chapter 1 verse 1
validOrdering = true;
if (book_1 != lastBook_1 + 1) {
// skipped book(s)
System.err.println("warning: some book(s) skipped. current book_1: " + book_1 + " previous book_1: " + lastBook_1);
}
}
if (!validOrdering) {
throw new RuntimeException("wrong verse ordering at line: " + line);
}
Rec rec = new Rec();
rec.book_1 = book_1;
rec.chapter_1 = chapter_1;
rec.verse_1 = verse_1;
rec.text = text;
res.addRec(rec);
nversePerBook.put(book_1, (nversePerBook.get(book_1) == null? 0: nversePerBook.get(book_1)) + 1);
lastBook_1 = book_1;
lastChapter_1 = chapter_1;
lastVerse_1 = verse_1;
} else if ("xref".equals(command)) {
int book_1 = Integer.parseInt(splits[1]);
int chapter_1 = Integer.parseInt(splits[2]);
int verse_1 = Integer.parseInt(splits[3]);
int field_1 = Integer.parseInt(splits[4]);
String content = splits[5];
XrefEntry xe = new XrefEntry();
xe.content = content;
res.addXrefEntry((Ari.encode(book_1 - 1, chapter_1, verse_1) << 8) | field_1, xe);
} else if ("footnote".equals(command)) {
int book_1 = Integer.parseInt(splits[1]);
int chapter_1 = Integer.parseInt(splits[2]);
int verse_1 = Integer.parseInt(splits[3]);
int field_1 = Integer.parseInt(splits[4]);
String content = splits[5];
FootnoteEntry fe = new FootnoteEntry();
fe.content = content;
res.addFootnoteEntry((Ari.encode(book_1 - 1, chapter_1, verse_1) << 8) | field_1, fe);
} else if (command.trim().startsWith("#") || command.trim().length() == 0) {
// comment or blank line
} else {
System.err.println("unknown line encountered: " + line);
return null;
}
}
} catch (Exception e) {
System.err.println("Error in line " + report_line_number + ": " + report_line_text);
throw e;
}
{ // verify footnotes and xref entries exist
TIntArrayList footnoteArifs = new TIntArrayList();
TIntArrayList xrefArifs = new TIntArrayList();
List<String> errors = new ArrayList<>();
for (final Rec rec : res.recs) {
final int ari = Ari.encode(rec.book_1 - 1, rec.chapter_1, rec.verse_1);
final String text = rec.text;
footnoteMatcher.reset(text);
while (footnoteMatcher.find()) {
final int field = Integer.parseInt(footnoteMatcher.group(1));
if (field < 1 || field > 255) {
throw new RuntimeException("footnote field not in 1-255: " + text);
}
footnoteArifs.add(ari << 8 | field);
}
xrefMatcher.reset(text);
while (xrefMatcher.find()) {
final int field = Integer.parseInt(xrefMatcher.group(1));
if (field < 1 || field > 255) {
throw new RuntimeException("xref field not in 1-255: " + text);
}
xrefArifs.add(ari << 8 | field);
}
}
for (final int arif : footnoteArifs.toArray()) {
if (res.footnoteEntries == null || !res.footnoteEntries.containsKey(arif)) {
final int ari = arif >>> 8;
errors.add(String.format("footnote referenced in verse text not found: arif 0x%08x (book_1=%d, chapter_1=%d, verse_1=%d, field=%d)", arif, Ari.toBook(ari) + 1, Ari.toChapter(ari), Ari.toVerse(ari), arif & 0xff));
}
}
for (final int arif : xrefArifs.toArray()) {
if (res.xrefEntries == null || !res.xrefEntries.containsKey(arif)) {
final int ari = arif >>> 8;
errors.add(String.format("xref referenced in verse text not found: arif 0x%08x (book_1=%d, chapter_1=%d, verse_1=%d, field=%d)", arif, Ari.toBook(ari) + 1, Ari.toChapter(ari), Ari.toVerse(ari), arif & 0xff));
}
}
if (errors.size() != 0) {
for (final String error : errors) {
System.err.println(error);
}
throw new RuntimeException("there are footnotes and/or xrefs not resolved");
}
}
for (Entry<Integer, Integer> e: nversePerBook.entrySet()) {
System.err.println("book_1 " + e.getKey() + ": " + e.getValue() + " verses");
}
res.setNumberOfBooks(nversePerBook.size());
return res;
}
}