package yuku.alkitabconverter.util; import yuku.alkitab.util.IntArrayList; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; public class DesktopVerseParser { private static final String bookNamesPattern_indonesian = "kejadian|kej|kel|keluaran|im|imamat|bil|bilangan|ul|ulangan|yos|yosua|hak|hakim-hakim|rut|ru|1\\s+samuel|1samuel|1\\s+sam|1sam|1\\s+sa|1sa|i\\s+samuel|i\\s+sam|i\\s+sa|2\\s+samuel|2samuel|2\\s+sam|2sam|2\\s+sa|2sa|ii\\s+samuel|ii\\s+sam|ii\\s+sa|1\\s+raj|1\\s+raja|1raj|1raja|1\\s+raja-raja|1raja-raja|2\\s+raj|2\\s+raja|2raj|2raja|2\\s+raja-raja|2raja-raja|i\\s+raj|i\\s+raja|iraj|iraja|i\\s+raja-raja|iraja-raja|ii\\s+raj|ii\\s+raja|iiraj|iiraja|ii\\s+raja-raja|iiraja-raja|1\\s+tawarikh|1tawarikh|1\\s+taw|1taw|i\\s+tawarikh|i\\s+taw|2\\s+tawarikh|2tawarikh|2\\s+taw|2taw|ii\\s+tawarikh|ii\\s+taw|ezra|ezr|neh|nh|ne|nehemia|est|es|ester|ayub|ayb|ay|mazmur|maz|mzm|amsal|ams|pengkhotbah|pkh|kidung\\s+agung|kidungagung|kid|yesaya|yes|yeremia|yer|ratapan|rat|yehezkiel|yeh|daniel|dan|dn|hosea|hos|ho|yoel|yl|amos|amo|am|obaja|oba|ob|yunus|yun|mikha|mik|mi|nahum|nah|na|habakkuk|habakuk|hab|zefanya|zef|haggai|hagai|hag|zakharia|zak|za|maleakhi|mal|matius|mat|mt|markus|mark|mar|mrk|mr|mk|lukas|luk|lu|lk|yohanes|yoh|kisah\\s+para\\s+rasul|kisah\\s+rasul|kis|roma|rom|rm|ro|1\\s+korintus|1korintus|1\\s+kor|1kor|2\\s+korintus|2korintus|2\\s+kor|2kor|i\\s+korintus|ikorintus|i\\s+kor|ikor|ii\\s+korintus|iikorintus|ii\\s+kor|iikor|galatia|gal|ga|efesus|ef|filipi|flp|fil|kolose|kol|1\\s+tesalonika|1tesalonika|1\\s+tes|1tes|i\\s+tesalonika|i\\s+tes|2\\s+tesalonika|2tesalonika|2\\s+tes|2tes|ii\\s+tesalonika|ii\\s+tes|1timotius|1\\s+timotius|1\\s+tim|1tim|1\\s+ti|1ti|i\\s+tim|i\\s+ti|i\\s+timotius|i\\s+tim|i\\s+ti|2timotius|2\\s+timotius|2\\s+tim|2tim|2\\s+ti|2ti|ii\\s+timotius|ii\\s+tim|ii\\s+ti|titus|tit|filemon|flm|ibrani|ibr|yakobus|yak|1\\s+pet|1pet|1\\s+pe|1pe|i\\s+peter|i\\s+pet|i\\s+pe|1\\s+petrus|1petrus|1\\s+ptr|1ptr|2\\s+pet|2pet|2\\s+pe|2pe|ii\\s+peter|ii\\s+pet|ii\\s+pe|2\\s+petrus|2petrus|2\\s+ptr|2ptr|1\\s+yohanes|1yohanes|1yoh|1\\s+yoh|i\\s+yohanes|i\\s+yoh|2\\s+yohanes|2yohanes|ii\\s+yohanes|ii\\s+yoh|2yoh|2\\s+yoh|3\\s+yohanes|3yohanes|3yoh|3\\s+yoh|iii\\s+yohanes|iii\\s+yoh|yudas|yud|wahyu|why|wah"; private static final String bookNamesPattern_english = "genesis|gen|ge|gn|exodus|exod|exo|ex|leviticus|lev|lv|le|numbers|num|nmb|nu|deuteronomy|deut|deu|dt|de|joshua|josh|jos|judges|judg|jdg|ruth|rut|rth|ru|1\\s+samuel|1samuel|1\\s+sam|1sam|1\\s+sa|1sa|i\\s+samuel|i\\s+sam|i\\s+sa|2\\s+samuel|2samuel|2\\s+sam|2sam|2\\s+sa|2sa|ii\\s+samuel|ii\\s+sam|ii\\s+sa|1\\s+kings|1kings|1\\s+kin|1kin|1\\s+kgs|1kgs|1\\s+ki|1ki|i\\s+kings|i\\s+kin|i\\s+kgs|i\\s+ki|2\\s+kings|2kings|2\\s+kin|2kin|2\\s+kgs|2kgs|2\\s+ki|2ki|ii\\s+kings|ii\\s+kin|ii\\s+kgs|ii\\s+ki|1\\s+chronicles|1chronicles|1\\s+chron|1chron|1\\s+chr|1chr|1\\s+ch|1ch|i\\s+chronicles|i\\s+chron|i\\s+chr|i\\s+ch|2\\s+chronicles|2chronicles|2\\s+chron|2chron|2\\s+chr|2chr|2\\s+ch|2ch|ii\\s+chronicles|ii\\s+chron|ii\\s+chr|ii\\s+ch|ezra|ezr|nehemiah|neh|nh|ne|nehemia|esther|esth|est|es|ester|job|jb|psalms|psalm|psa|pss|ps|proverbs|proverb|prov|pro|pr|ecclesiastes|eccl|ecc|ec|songs\\s+of\\s+solomon|songsofsolomon|song\\s+of\\s+solomon|songofsolomon|song\\s+of\\s+songs|songofsongs|songs|song|son|sos|so|isaiah|isa|is|jeremiah|jer|je|lamentations|lam|la|ezekiel|ezek|eze|daniel|dan|dn|da|hosea|hos|ho|joel|joe|yl|amos|amo|am|obadiah|oba|ob|jonah|jon|micah|mikha|mic|mi|nahum|nah|na|habakkuk|habakuk|hab|zephaniah|zeph|zep|haggai|hagai|hag|zechariah|zech|zec|za|malachi|mal|matthew|mathew|matt|mat|mt|markus|mark|mar|mrk|mr|mk|luke|luk|lu|lk|john|joh|jhn|jn|acts\\s+of\\s+the\\s+apostles|actsoftheapostles|acts|act|ac|romans|rom|rm|ro|1\\s+corinthians|1corinthians|1\\s+cor|1cor|1\\s+co|1co|i\\s+corinthians|i\\s+cor|i\\s+co|2\\s+corinthians|2corinthians|2\\s+cor|2cor|2\\s+co|2co|ii\\s+corinthians|ii\\s+cor|ii\\s+co|galatians|galatia|gal|ga|ephesians|eph|ep|phillippians|philippians|phill|phil|phi|php|ph|colossians|col|co|1\\s+thessalonians|1thessalonians|1\\s+thess|1thess|1\\s+thes|1thes|1\\s+the|1the|1\\s+th|1th|i\\s+thessalonians|i\\s+thess|i\\s+thes|i\\s+the|i\\s+th|2\\s+thessalonians|2thessalonians|2\\s+thess|2thess|2\\s+thes|2thes|2\\s+the|2the|2\\s+th|2th|ii\\s+thessalonians|ii\\s+thess|ii\\s+thes|ii\\s+the|ii\\s+th|1\\s+timothy|1timothy|1\\s+tim|1tim|1\\s+ti|1ti|i\\s+timothy|i\\s+tim|i\\s+ti|2\\s+timothy|2timothy|2\\s+tim|2tim|2\\s+ti|2ti|ii\\s+timothy|ii\\s+tim|ii\\s+ti|titus|tit|philemon|phile|phm|hebrews|heb|he|james|jam|jas|jms|ja|jm|1\\s+peter|1peter|1\\s+pet|1pet|1\\s+pe|1pe|i\\s+peter|i\\s+pet|i\\s+pe|1\\s+ptr|1ptr|2\\s+peter|2peter|2\\s+pet|2pet|2\\s+pe|2pe|ii\\s+peter|ii\\s+pet|ii\\s+pe|2\\s+ptr|2ptr|1\\s+john|1john|1\\s+joh|1joh|1\\s+jhn|1jhn|1\\s+jo|1jo|1\\s+jn|1jn|i\\s+john|i\\s+joh|i\\s+jhn|i\\s+jo|i\\s+jn|2\\s+john|2john|2\\s+joh|2joh|2\\s+jhn|2jhn|2\\s+jo|2jo|2\\s+jn|2jn|ii\\s+john|ii\\s+joh|ii\\s+jhn|ii\\s+jo|ii\\s+jn|3\\s+john|3john|3\\s+joh|3joh|3\\s+jhn|3jhn|3\\s+jo|3jo|3\\s+jn|3jn|iii\\s+john|iii\\s+joh|iii\\s+jhn|iii\\s+jo|iii\\s+jn|jude|jud|ju|revelations|revelation|rev|re|rv"; public static final String TAG = DesktopVerseParser.class.getSimpleName(); ///////////////////////////////////// 1 complete verse address (book chapter verse) ///////////////////////////////////// 2 book name with optional period and spaces after it ///////////////////////////////////// 3 book name ///////////////////////////////////// ... 4 numbers (chapter or chapter:verse, with ',' or ';' or 'dan') which is not followed by nofollow static Pattern reg = Pattern.compile("(((" + bookNamesPattern_indonesian + "|" + bookNamesPattern_english + ")(?:\\.?\\s+|\\.))(\\d+(?:(?:-|:|(?:;\\s*\\d+:\\s*)|,|\\.|\\d|dan|\\s)+\\d+)?))", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); static Pattern numberRangeSplitter = Pattern.compile("\\s*(;|,|dan)\\s*" /* NOT case insensitive */); static Pattern numberStartEndSplitter = Pattern.compile("\\s*--?\\s*"); static Pattern chapterVerse = Pattern.compile("(\\d+)\\s*[:.]\\s*(\\d+)"); static Pattern numbersOnly = Pattern.compile("[0-9]+"); static String[] orderedBooks = { "kejadian|kej|genesis|gen|ge|gn", "kel|keluaran|exodus|exod|exo|ex", "im|imamat|leviticus|lev|lv|le", "bil|bilangan|numbers|num|nmb|nu", "ul|ulangan|deuteronomy|deut|deu|dt|de", "yos|yosua|joshua|josh|jos", "hak|hakim-hakim|judges|judg|jdg", "rut|ru|ruth|rut|rth|ru", "1 samuel|1samuel|1 sam|1sam|1 sa|1sa|i samuel|i sam|i sa", // same for english: 1 samuel|1samuel|1 sam|1sam|1 sa|1sa|i samuel|i sam|i sa "2 samuel|2samuel|2 sam|2sam|2 sa|2sa|ii samuel|ii sam|ii sa", // same for english: 2 samuel|2samuel|2 sam|2sam|2 sa|2sa|ii samuel|ii sam|ii sa "1 raj|1 raja|1raj|1raja|1 raja-raja|1raja-raja|i raj|i raja|iraj|iraja|i raja-raja|iraja-raja|1 kings|1kings|1 kin|1kin|1 kgs|1kgs|1 ki|1ki|i kings|i kin|i kgs|i ki", "2 raj|2 raja|2raj|2raja|2 raja-raja|2raja-raja|ii raj|ii raja|iiraj|iiraja|ii raja-raja|iiraja-raja|2 kings|2kings|2 kin|2kin|2 kgs|2kgs|2 ki|2ki|ii kings|ii kin|ii kgs|ii ki", "1 tawarikh|1tawarikh|1 taw|1taw|i tawarikh|i taw|1 chronicles|1chronicles|1 chron|1chron|1 chr|1chr|1 ch|1ch|i chronicles|i chron|i chr|i ch", "2 tawarikh|2tawarikh|2 taw|2taw|ii tawarikh|ii taw|2 chronicles|2chronicles|2 chron|2chron|2 chr|2chr|2 ch|2ch|ii chronicles|ii chron|ii chr|ii ch", "ezra|ezr", // same for english: ezra|ezr "neh|nh|ne|nehemia|nehemiah|neh|nh|ne|nehemia", "est|es|ester|esther|esth|est|es|ester", "ayub|ayb|ay|job|jb", "mazmur|maz|mzm|psalms|psalm|psa|pss|ps", "amsal|ams|proverbs|proverb|prov|pro|pr", "pengkhotbah|pkh|ecclesiastes|eccl|ecc|ec", "kidung agung|kidungagung|kid|songs of solomon|songsofsolomon|song of solomon|songofsolomon|song of songs|songofsongs|songs|song|son|sos|so", "yesaya|yes|isaiah|isa|is", "yeremia|yer|jeremiah|jer|je", "ratapan|rat|lamentations|lam|la", "yehezkiel|yeh|ezekiel|ezek|eze", "daniel|dan|dn|daniel|dan|dn|da", "hosea|hos|ho", // same for english: hosea|hos|ho "yoel|yl|joel|joe|yl", "amos|amo|am", // same for english: amos|amo|am "obaja|oba|ob|obadiah|oba|ob", "yunus|yun|jonah|jon", "mikha|mik|mi|micah|mikha|mic|mi", "nahum|nah|na", // same for english: nahum|nah|na "habakkuk|habakuk|hab", // same for english: habakkuk|habakuk|hab "zefanya|zef|zephaniah|zeph|zep", "haggai|hagai|hag", // same for english: haggai|hagai|hag "zakharia|zak|za|zechariah|zech|zec|za", "maleakhi|mal|malachi|mal", "matius|mat|mt|matthew|mathew|matt|mat|mt", "markus|mark|mar|mrk|mr|mk", "lukas|luk|lu|lk|luke|luk|lu|lk", "yohanes|yoh|john|joh|jhn|jn", "kisah para rasul|kisah rasul|kis|acts of the apostles|actsoftheapostles|acts|act|ac", "roma|rom|rm|ro|romans|rom|rm|ro", "1 korintus|1korintus|1 kor|1kor|i korintus|ikorintus|i kor|ikor|1 corinthians|1corinthians|1 cor|1cor|1 co|1co|i corinthians|i cor|i co|icor|ico", "2 korintus|2korintus|2 kor|2kor|ii korintus|iikorintus|ii kor|iikor|2 corinthians|2corinthians|2 cor|2cor|2 co|2co|ii corinthians|ii cor|ii co|iicor|iico", "galatia|gal|ga|galatians|galatia|gal|ga", "efesus|ef|ephesians|eph|ep", "filipi|flp|fil|phillippians|philippians|phill|phil|phi|php|ph", "kolose|kol|colossians|col|co", "1 tesalonika|1tesalonika|1 tes|1tes|i tesalonika|i tes|1 thessalonians|1thessalonians|1 thess|1thess|1 thes|1thes|1 the|1the|1 th|1th|i thessalonians|i thess|i thes|i the|i th", "2 tesalonika|2tesalonika|2 tes|2tes|ii tesalonika|ii tes|2 thessalonians|2thessalonians|2 thess|2thess|2 thes|2thes|2 the|2the|2 th|2th|ii thessalonians|ii thess|ii thes|ii the|ii th", "1timotius|1 timotius|1 tim|1tim|1 ti|1ti|i tim|i ti|i timotius|i tim|i ti|1 timothy|1timothy|1 tim|1tim|1 ti|1ti|i timothy|i tim|i ti|itim|iti", "2timotius|2 timotius|2 tim|2tim|2 ti|2ti|ii tim|ii ti|ii timotius|ii tim|ii ti|2 timothy|2timothy|2 tim|2tim|2 ti|2ti|ii timothy|ii tim|ii ti|iitim|iiti", "titus|tit", // same for english: titus|tit "filemon|flm|philemon|phile|phm", "ibrani|ibr|hebrews|heb|he", "yakobus|yak|james|jam|jas|jms|ja|jm", "1 pet|1pet|1 pe|1pe|i peter|i pet|i pe|1 petrus|1petrus|1 ptr|1ptr|1 peter|1peter|1 pet|1pet|1 pe|1pe|i peter|i pet|i pe|1 ptr|1ptr|ipet|ipe", "2 pet|2pet|2 pe|2pe|ii peter|ii pet|ii pe|2 petrus|2petrus|2 ptr|2ptr|2 peter|2peter|2 pet|2pet|2 pe|2pe|ii peter|ii pet|ii pe|2 ptr|2ptr|iipet|iipe", "1 yohanes|1yohanes|1yoh|1 yoh|i yohanes|i yoh|1 john|1john|1 joh|1joh|1 jhn|1jhn|1 jo|1jo|1 jn|1jn|i john|i joh|i jhn|i jo|i jn|ijoh|ijhn|ijo|ijn", "2 yohanes|2yohanes|ii yohanes|ii yoh|2yoh|2 yoh|2 john|2john|2 joh|2joh|2 jhn|2jhn|2 jo|2jo|2 jn|2jn|ii john|ii joh|ii jhn|ii jo|ii jn|iijoh|iijhn|iijo|iijn", "3 yohanes|3yohanes|3yoh|3 yoh|iii yohanes|iii yoh|3 john|3john|3 joh|3joh|3 jhn|3jhn|3 jo|3jo|3 jn|3jn|iii john|iii joh|iii jhn|iii jo|iii jn|iiijoh,iiijhn|iiijo|iiijn", "yudas|yud|jude|jud|ju", "wahyu|why|wah|revelations|revelation|rev|re|rv", }; static HashMap<String, Integer> bookNameToId = new HashMap<String, Integer>(512); static { for (int i = 0, len = orderedBooks.length; i < len; i++) { for (String bookName: orderedBooks[i].split("\\|")) { bookNameToId.put(bookName, i); } } } /** * If succeeded, will return start-end pairs [start, end, start, end, ...]. Single verses will have the same values for both start and end. * @return null when failed */ public static IntArrayList verseStringToAri(String verse) { Matcher m = reg.matcher(verse); if (!m.find()) { return null; } String bookName = m.group(3).toLowerCase(); Integer bookId = bookNameToId.get(bookName); if (bookId == null) { return null; } int book_0 = bookId; boolean singleChapterBook = (book_0 == 30 /* obaja */ || book_0 == 56 /* filemon */ || book_0 == 62 /* 2yoh */ || book_0 == 63 /* 3yoh */ || book_0 == 64 /* yudas */ ); int lastChapter = 0; int book_0_shifted = book_0 << 16; IntArrayList res = new IntArrayList(); String numbers = m.group(4); String[] ranges = numberRangeSplitter.split(numbers); for (String range: ranges) { String[] startend = numberStartEndSplitter.split(range); if (startend.length == 1) { int cv = parseCv(startend[0], singleChapterBook, lastChapter); if (cv != 0) { res.add(book_0_shifted | cv); // start res.add(book_0_shifted | cv); // end same as start lastChapter = (cv >> 8) & 0xff; } } else if (startend.length == 2) { int cvStart = parseCv(startend[0], singleChapterBook, lastChapter); if (cvStart != 0) { final int cvEnd; String startend_1_trim = startend[1].trim(); if (numbersOnly.matcher(startend_1_trim).matches()) { // check for cases like "2:3-17" (chapter 2 verse 3 to chapter 2 verse 17) or "14-17" (chapter 14 to chapter 17) final int startend_1_number = Integer.parseInt(startend_1_trim); if ((cvStart & 0xff) == 0) { // cvStart has no verse number, so this is for cases like "14-17" (chapter 14 to chapter 17) cvEnd = startend_1_number << 8; } else { // for cases like "2:3-17" (chapter 2 verse 3 to chapter 2 verse 17) cvEnd = (cvStart & 0xff00) | startend_1_number; } } else { cvEnd = parseCv(startend[1], singleChapterBook, lastChapter); } if (cvEnd != 0) { if (cvEnd >= cvStart) { res.add(book_0_shifted | cvStart); res.add(book_0_shifted | cvEnd); lastChapter = (cvEnd >> 8) & 0xff; } } } } } return res; } private static int parseCv(String cv, boolean singleChapterBook, int previousChapter) { if (numbersOnly.matcher(cv).matches()) { // either c:0 or 1:v int n = Integer.parseInt(cv); if (singleChapterBook) { return 0x0100 | (n & 0xff); } else if (previousChapter != 0) { return ((previousChapter & 0xff) << 8) | (n & 0xff); } else { return (n & 0xff) << 8; } } else { Matcher m = chapterVerse.matcher(cv); if (m.matches()) { int c = Integer.parseInt(m.group(1)); int v = Integer.parseInt(m.group(2)); return ((c & 0xff) << 8) | (v & 0xff); } } return 0; } }