package com.door43.translationstudio.core; import android.app.Activity; import android.content.Context; import android.content.pm.PackageInfo; import android.net.Uri; import android.text.TextUtils; import com.door43.tools.reporting.Logger; import com.door43.translationstudio.AppContext; import com.door43.translationstudio.R; import com.door43.translationstudio.spannables.USFMVerseSpan; import com.door43.util.Zip; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * For processing USFM input file or zip files into importable package. */ public class ImportUsfm { public static final String TAG = ImportUsfm.class.getSimpleName(); public static final String BOOK_NAME_MARKER = "\\\\toc1\\s([^\\n]*)"; private static final Pattern PATTERN_BOOK_NAME_MARKER = Pattern.compile(BOOK_NAME_MARKER); public static final String ID_TAG = "\\\\id\\s([^\\n]*)"; private static final Pattern ID_TAG_MARKER = Pattern.compile(ID_TAG); public static final String BOOK_SHORT_NAME_MARKER = "\\\\toc3\\s([^\\n]*)"; private static final Pattern PATTERN_BOOK_SHORT_NAME_MARKER = Pattern.compile(BOOK_SHORT_NAME_MARKER); public static final String SECTION_MARKER = "\\\\s5([^\\n]*)"; private static final Pattern PATTERN_SECTION_MARKER = Pattern.compile(SECTION_MARKER); public static final String CHAPTER_NUMBER_MARKER = "\\\\c\\s(\\d+(-\\d+)?)\\s"; private static final Pattern PATTERN_CHAPTER_NUMBER_MARKER = Pattern.compile(CHAPTER_NUMBER_MARKER); private static final Pattern PATTERN_USFM_VERSE_SPAN = Pattern.compile(USFMVerseSpan.PATTERN); public static final int END_MARKER = 999999; public static final String FIRST_VERSE = "first_verse"; public static final String FILE_NAME = "file_name"; private File mTempDir; private File mTempOutput; private File mTempDest; private File mTempSrce; private File mProjectFolder; private String mChapter; private int mLastChapter; private List<File> mSourceFiles; // raw list of files found in expanded package private HashMap<String, JSONArray> mChunks; private List<File> mImportProjects; // files that seem to be actual books. private List<String> mErrors; private List<String> mFoundBooks; //descriptions of books from raw list private int mCurrentBook; private String mBookName; private String mBookShortName; private TargetLanguage mTargetLanguage; private Context mContext; private boolean mProcessSuccess; private UpdateStatusListener mStatusUpdateListener; private int mCurrentChapter; private int mChaperCount; private List<MissingNameItem> mBooksMissingNames; private boolean mCancel = false; private Chapter[] mChapters; /** * constructor * @param context * @param targetLanguage */ public ImportUsfm(Context context, TargetLanguage targetLanguage) { mTempDir = null; mTempOutput = null; mTempDest = null; mTempSrce = null; mProjectFolder = null; createTempFolders(); mStatusUpdateListener = null; mContext = context; mChunks = null; mSourceFiles = new ArrayList<>(); mImportProjects = new ArrayList<>(); mErrors = new ArrayList<>(); mFoundBooks = new ArrayList<>(); mTargetLanguage = targetLanguage; mCurrentBook = 0; mProcessSuccess = false; mBooksMissingNames = new ArrayList<>(); mCurrentChapter = 0; mChaperCount = 1; mBookName = null; mBookShortName = null; mChapter = null; } /** * constructor used to create new instance from JSON * @param context * @param tempDir * @param tempOutput * @param tempDest * @param tempSrce * @param projectFolder * @param chapter * @param sourceFiles * @param importProjects * @param errors * @param foundBooks * @param currentBook * @param bookName * @param bookShortName * @param targetLanguage * @param success * @param currentChapter * @param chaperCount * @param bookMissingNames */ private ImportUsfm(Activity context, File tempDir, File tempOutput, File tempDest, File tempSrce, File projectFolder, String chapter, List<File> sourceFiles, List<File> importProjects, List<String> errors, List<String> foundBooks, int currentBook, String bookName, String bookShortName, TargetLanguage targetLanguage, boolean success, int currentChapter, int chaperCount, List<MissingNameItem> bookMissingNames) { this.mStatusUpdateListener = null; this.mContext = context; this.mChunks = null; this.mTempDir = tempDir; this.mTempOutput = tempOutput; this.mTempDest = tempDest; this.mTempSrce = tempSrce; this.mProjectFolder = projectFolder; this.mChapter = chapter; this.mSourceFiles = sourceFiles; this.mImportProjects = importProjects; this.mErrors = errors; this.mFoundBooks = foundBooks; this.mCurrentBook = currentBook; this.mBookName = bookName; this.mBookShortName = bookShortName; this.mTargetLanguage = targetLanguage; this.mProcessSuccess = success; this.mCurrentChapter = currentChapter; this.mChaperCount = chaperCount; this.mBooksMissingNames = bookMissingNames; } /** * generate JSON from object * @return */ public JSONObject toJson() { try { JSONObject json = new JSONObject(); json.putOpt("TempDir", mTempDir); json.putOpt("TempOutput", mTempOutput); json.putOpt("TempDest", mTempDest); json.putOpt("TempSrce", mTempSrce); json.putOpt("ProjectFolder", mProjectFolder); json.putOpt("SourceFiles", toJsonFileArray(mSourceFiles)); json.putOpt("ImportProjects", toJsonFileArray(mImportProjects)); json.putOpt("Errors", toJsonStringArray(mErrors)); json.putOpt("FoundBooks", toJsonStringArray(mFoundBooks)); json.putOpt("TargetLanguage", mTargetLanguage.toApiFormatJson()); json.putOpt("CurrentBook", mCurrentBook); json.putOpt("Success", mProcessSuccess); json.putOpt("MissingNames", MissingNameItem.toJsonArray(mBooksMissingNames)); json.putOpt("CurrentChapter", mCurrentChapter); json.putOpt("ChaperCount", mChaperCount); json.putOpt("BookName", mBookName); json.putOpt("BookShortName", mBookShortName); json.putOpt("Chapter", mChapter); return json; } catch (Exception e) { e.printStackTrace(); return null; } } /** * rebuild object from JSON string * @param context * @param jsonStr * @return */ public static ImportUsfm newInstance(Activity context, String jsonStr) { try { JSONObject jsonObject = new JSONObject(jsonStr); return ImportUsfm.newInstance(context, jsonObject); } catch (Exception e) { e.printStackTrace(); return null; } } /** * cancel any processing * @param mCancel */ public void setCancel(boolean mCancel) { this.mCancel = mCancel; } /** * was processing successful overall * @return */ public boolean isProcessSuccess() { return mProcessSuccess; } /** * rebuild object from JSON * @param context * @param json * @return */ public static ImportUsfm newInstance(Activity context, JSONObject json) { try { return new ImportUsfm(context, getOptFile(json,"TempDir"), getOptFile(json,"TempOutput"), getOptFile(json,"TempDest"), getOptFile(json,"TempSrce"), getOptFile(json,"ProjectFolder"), getOptString(json,"Chapter"), fromJsonArrayToFiles(getOptJsonArray(json,"SourceFiles")), fromJsonArrayToFiles(getOptJsonArray(json,"ImportProjects")), fromJsonArrayToStrings(getOptJsonArray(json,"Errors")), fromJsonArrayToStrings(getOptJsonArray(json,"FoundBooks")), getOptInteger(json,"CurrentBook"), getOptString(json,"BookName"), getOptString(json,"BookShortName"), TargetLanguage.generate(getOptJsonObject(json,"TargetLanguage")), getOptBoolean(json,"Success"), getOptInteger(json,"CurrentChapter"), getOptInteger(json,"ChaperCount"), MissingNameItem.fromJsonArray(getOptJsonArray(json,"MissingNames"))); } catch (Exception e) { e.printStackTrace(); return null; } } /** * get list of books that we cant find valid names (resource IDs) for * @return */ public MissingNameItem[] getBooksMissingNames() { return mBooksMissingNames.toArray(new MissingNameItem[mBooksMissingNames.size()]); } /** * used to keep list of books that are missing names (valid resource IDs) * @param description * @param invalidName * @param contents */ public void addBookMissingName(String description, String invalidName, String contents) { mBooksMissingNames.add(new MissingNameItem(description, invalidName, contents)); } /** * set status listener * @param listener */ public void setUpdateStatusListener(UpdateStatusListener listener) { mStatusUpdateListener = listener; } /** * will update the status by calling listener. Will display text and update * the percent complete * @param text */ private void updateStatus(String text) { int fileCount = mSourceFiles.size(); if (fileCount < 1) { fileCount = 1; } float importAmountDone = (float) mCurrentBook / fileCount; float bookAmountDone = (float) mCurrentChapter / (mChaperCount + 2); float percentage = 100.0f * (importAmountDone + bookAmountDone / fileCount); int percentDone = Math.round(percentage); if (mStatusUpdateListener != null) { if (!isMissing(mBookShortName)) { text = mBookShortName + " - " + text; } mStatusUpdateListener.statusUpdate(text, percentDone); } } /** * will update the status by calling listener. Will display string resource and update * the percent complete * @param resource */ private void updateStatus(int resource) { String status = mContext.getResources().getString(resource); updateStatus(status); } /** * will update the status by calling listener. Will build status string using resource as string format * and applying data to it. Will also update the percent complete. * @param resource * @param data */ private void updateStatus(int resource, String data) { String format = mContext.getResources().getString(resource); updateStatus(String.format(format, data)); } /** * get processing results as multi-line string */ public String getResultsString() { normalizeBookQueue(); normalizeMessageQueue(); String results = ""; String format = mContext.getResources().getString(R.string.found_book); for (int i = 0; i <= mCurrentBook; i++) { String bookName = mFoundBooks.get(i); String bookNameCleaned = getCleanedBookName(format, bookName); String errors = mErrors.get(i); if(errors.isEmpty()) { errors = mContext.getResources().getString(R.string.no_error); } String currentResults = "\n" + (i+1) + " - " + bookNameCleaned + "\n" + errors; results = results + currentResults + "\n"; } return results; } /** * cleanup uri escape characters * @param format * @param bookName * @return */ private String getCleanedBookName(String format, String bookName) { String cleaned = bookName; String[] parts = bookName.split("%3A"); if(parts.length == 2) { //look for URI prefix cleaned = "SD_CARD/" + parts[1]; } cleaned = Uri.decode(cleaned); return String.format(format, cleaned); } /** * returns string to use for language title * @return */ public String getLanguageTitle() { String format; format = mContext.getResources().getString(R.string.selected_language); String language = String.format(format, mTargetLanguage.getId() + " - " + mTargetLanguage.name); return language; } /** * set book name * @param bookShortName * @param bookName */ private void setBookName(String bookShortName, String bookName) { normalizeBookQueue(); String description = bookName; if(!bookShortName.isEmpty()) { description = bookShortName + " = " + bookName; } mFoundBooks.set(mCurrentBook, description); } /** * add error to error list * * @param resource * @param error */ private void addError(int resource, String error) { String format = mContext.getResources().getString(resource); String newError = String.format(format, error); addError(newError); } /** * add error to error list * * @param resource * @param val1 * @param val2 * */ private void addError(int resource, String val1, String val2) { String format = mContext.getResources().getString(resource); String newError = String.format(format, val1, val2); addError(newError); } /** * add error to error list * * @param resource */ private void addError(int resource) { String newError = mContext.getResources().getString(resource); addError(newError); } /** * add error to error list * * @param error */ private void addError(String error) { addMessage(error, true); } /** * add message to error list * * @param message */ private void addMessage(String message, boolean error) { normalizeMessageQueue(); String errors = mErrors.get(mCurrentBook); if (!errors.isEmpty()) { errors += "\n"; } String format = mContext.getResources().getString(error ? R.string.error_prefix : R.string.warning_prefix); String newError = String.format(format, message); mErrors.set(mCurrentBook, errors + newError); if (error) { Logger.e(TAG, newError); } else { Logger.w(TAG, newError); } } private void normalizeMessageQueue() { while (mErrors.size() <= mCurrentBook) { mErrors.add(""); } } private void normalizeBookQueue() { while (mFoundBooks.size() <= mCurrentBook) { mFoundBooks.add(""); } } /** * add warning to error list * * @param error */ private void addWarning(String error) { addMessage(error, false); } /** * add warning to error list * * @param resource * @param error */ private void addWarning(int resource, String error) { String format = mContext.getResources().getString(resource); String newWarning = String.format(format, error); addWarning(newWarning); } /** * add warning to error list * * @param resource * @param val1 * @param val2 * */ private void addWarning(int resource, String val1, String val2) { String format = mContext.getResources().getString(resource); String newWarning = String.format(format, val1, val2); addWarning(newWarning); } /** * unpack and import documents from zip stream * * @param usfmStream * @return */ public boolean readZipStream(InputStream usfmStream) { boolean successOverall = true; boolean success; updateStatus(R.string.initializing_import); try { Zip.unzipFromStream(usfmStream, mTempSrce); File[] usfmFiles = mTempSrce.listFiles(); for (File usfmFile : usfmFiles) { addFilesInFolder(usfmFile); } Logger.i(TAG, "found files: " + TextUtils.join("\n", mSourceFiles)); for (mCurrentBook = 0; mCurrentBook < mSourceFiles.size(); mCurrentBook++) { mCurrentChapter = 0; File file = mSourceFiles.get(mCurrentBook); String name = file.getName(); updateStatus(R.string.found_book, name); success = processBook(file); if (!success) { addError(R.string.could_not_parse, getShortFilePath(file.toString())); } successOverall = successOverall && success; } mCurrentBook = mSourceFiles.size() - 1; // set to last book } catch (Exception e) { Logger.e(TAG, "error reading stream ", e); addError(R.string.zip_read_error); successOverall = false; } updateStatus(R.string.finished_loading); mProcessSuccess = successOverall; return successOverall; } /** * import single file * * @param file * @return */ public boolean readFile(File file) { boolean success = true; updateStatus(R.string.initializing_import); if (null == file) { addError(R.string.file_read_error); return false; } try { String ext = FilenameUtils.getExtension(file.toString()); boolean zip = "zip".equalsIgnoreCase(ext); if (!zip) { success = processBook(file); } else { InputStream usfmStream = new FileInputStream(file); success = readZipStream(usfmStream); } } catch (Exception e) { addError(R.string.file_read_error_detail, file.toString()); success = false; } updateStatus(R.string.finished_loading); mProcessSuccess = success; return success; } /** * import file from uri, if it is a zip file, then all files in zip will be imported * * @param uri * @return */ public boolean readUri(Uri uri) { boolean success = true; updateStatus(R.string.initializing_import); if (null == uri) { addError(R.string.file_read_error); return false; } String path = uri.toString(); try { String ext = FilenameUtils.getExtension(path); boolean zip = "zip".equalsIgnoreCase(ext); InputStream usfmStream = AppContext.context().getContentResolver().openInputStream(uri); if (!zip) { String text = IOUtils.toString(usfmStream, "UTF-8"); success = processBook(text, uri.toString()); } else { success = readZipStream(usfmStream); } } catch (Exception e) { addError(R.string.file_read_error_detail, path); success = false; } updateStatus(R.string.finished_loading); mProcessSuccess = success; return success; } /** * import file from resource. if it is a zip file, then all files in zip will be imported * * @param fileName * @return */ public boolean readResourceFile(Context context, String fileName) { boolean success = true; updateStatus(R.string.initializing_import); String ext = FilenameUtils.getExtension(fileName).toLowerCase(); boolean zip = "zip".equals(ext); try { InputStream usfmStream = context.getAssets().open(fileName); if (!zip) { String text = IOUtils.toString(usfmStream, "UTF-8"); success = processBook(text, fileName); } else { success = readZipStream(usfmStream); } } catch (Exception e) { Logger.e(TAG, "error reading " + fileName, e); success = false; } updateStatus(R.string.finished_loading); mProcessSuccess = success; return success; } /** * add chunk markers (contains verses and chapters) to map by chapter * * @param book * @param chunks * @return */ public boolean addChunks(String book, ChunkMarker[] chunks, SourceTranslation sourceTranslation) { try { for (ChunkMarker chunkMarker : chunks) { String chapter = chunkMarker.chapterSlug; String firstverse = chunkMarker.firstVerseSlug; JSONArray verses = null; if (mChunks.containsKey(chapter)) { verses = mChunks.get(chapter); } else { verses = new JSONArray(); mChunks.put(chapter, verses); } JSONObject chunk = new JSONObject(); chunk.put(FIRST_VERSE, firstverse); // chunk.put(FILE_NAME, firstverse); // default to the same, later cleanup verses.put(chunk); } for (int i = 1; i <= mChapters.length; i++) { // get file names for chunks String chapterId = getChapterFolderName(i + ""); String[] chapterFrameSlugs = AppContext.getLibrary().getFrameSlugs(sourceTranslation, chapterId); JSONArray verseBreaks = getVerseBreaksObj(i + ""); for (int j = 0; j < verseBreaks.length(); j++) { JSONObject chunk = verseBreaks.getJSONObject(j); chunk.put(FILE_NAME, chapterFrameSlugs[j]); } } } catch (Exception e) { Logger.e(TAG, "error parsing chunks " + book, e); return false; } return true; } /** * get the base folder for all the projects * * @return */ public File getProjectsFolder() { return mTempOutput; } /** * get array of the imported project folders * * @return */ public File[] getImportProjects() { if (mImportProjects != null) { return mImportProjects.toArray(new File[mImportProjects.size()]); } return new File[0]; } /** * process single document and create a project * * @param file * @return */ private boolean processBook(File file) { boolean success; try { String book = FileUtils.readFileToString(file); success = processBook(book, file.toString()); } catch (Exception e) { Logger.e(TAG, "error reading book " + file.toString(), e); addError(R.string.error_reading_file, file.toString()); success = false; } return success; } private boolean processBook(String book, String name) { return processBook(book, name, true, null); } public boolean processText(String book, String name, boolean promptForName, String useName) { mCurrentBook = mFoundBooks.size(); boolean success = processBook(book, name, promptForName, useName); mProcessSuccess = success; return success; } private boolean processBook(String book, String name, boolean promptForName, String useName) { if(mCancel) { return false; } boolean successOverall = true; boolean success; mBookShortName = ""; String description = getShortFilePath(name); setBookName("", description); try { mCurrentChapter = 0; mChaperCount = 1; extractBookID(book); // TODO: 4/12/16 verify book if (null == mTargetLanguage) { addError(R.string.missing_language); return false; } // boolean hasSections = isPresent(book, PATTERN_SECTION_MARKER); boolean hasVerses = isPresent(book, PATTERN_USFM_VERSE_SPAN); if (useName != null) { mBookShortName = useName; } if (isMissing(mBookShortName)) { addError(R.string.missing_book_short_name); addBookMissingName(name, null, book); return promptForName; } mBookShortName = mBookShortName.toLowerCase(); setBookName(mBookShortName, description); if (!hasVerses) { addError(R.string.no_verse); return false; } mTempDest = new File(mTempOutput, mBookShortName); mProjectFolder = new File(mTempDest, mBookShortName + "-" + mTargetLanguage.getId()); if (isMissing(mBookName)) { addError(R.string.missing_book_name); mBookName = mBookShortName; } ChunkMarker[] markers = AppContext.getLibrary().getChunkMarkers(mBookShortName); boolean haveChunksList = markers.length > 0; if (!haveChunksList) { // no chunk list // TODO: 4/13/16 add support for processing by sections addWarning(R.string.no_chunk_list, mBookShortName); addBookMissingName(mBookName, mBookShortName, book); return promptForName; } else { // has chunks SourceTranslation sourceTranslation = AppContext.getLibrary().getSourceTranslation(mBookShortName, "en", "ulb"); mChapters = AppContext.getLibrary().getChapters(sourceTranslation); mChunks = new HashMap<>(); // clear old map addChunks(mBookShortName, markers, sourceTranslation); mChaperCount = mChunks.size(); success = extractChaptersFromBook(book); successOverall = successOverall && success; } if(mCancel) { successOverall = false; } if (successOverall) { mCurrentChapter = (mChaperCount + 1); updateStatus(R.string.building_manifest); success = buildManifest(); successOverall = successOverall && success; } if (successOverall) { mImportProjects.add(mProjectFolder); } } catch (Exception e) { Logger.e(TAG, "error parsing book", e); return false; } return successOverall; } public String getShortFilePath(String name) { String filename = name; if(name != null) { int pos = name.indexOf(mTempSrce.toString()); // try to strip off temp folder path if (pos >= 0) { filename = name.substring(pos + mTempSrce.toString().length() + 1); } else { // otherwise we use just file name String[] parts = name.split("/"); if (parts.length > 0) { filename = parts[parts.length - 1]; } } } return filename; } private void extractBookID(String book) { mBookName = extractString(book, PATTERN_BOOK_NAME_MARKER); mBookShortName = extractString(book, PATTERN_BOOK_SHORT_NAME_MARKER); String idString = extractString(book, ID_TAG_MARKER); if (null != idString) { String[] tags = idString.split(" "); if (tags.length > 0) { mBookShortName = tags[0]; } } } /** * create the manifest for a project * * @throws JSONException */ private boolean buildManifest() throws JSONException { PackageInfo pInfo; TargetTranslation targetTranslation; try { Context context = AppContext.context(); pInfo = context.getPackageManager().getPackageInfo(context.getPackageName(), 0); String projectId = mBookShortName; String resourceSlug = Resource.REGULAR_SLUG; targetTranslation = TargetTranslation.create(context, AppContext.getProfile().getNativeSpeaker(), TranslationFormat.USFM, mTargetLanguage, projectId, TranslationType.TEXT, resourceSlug, pInfo, mProjectFolder); } catch (Exception e) { addError(R.string.file_write_error); Logger.e(TAG, "failed to build manifest", e); return false; } return true; } /** * extract chapters in book * * @param text * @return */ public boolean extractChaptersFromBook(CharSequence text) { Pattern pattern = PATTERN_CHAPTER_NUMBER_MARKER; Matcher matcher = pattern.matcher(text); int lastIndex = 0; CharSequence section; mChapter = null; mLastChapter = 0; boolean successOverall = true; boolean success; boolean foundChapter = false; while (matcher.find() && successOverall) { if(mCancel) { return false; } foundChapter = true; success = true; section = text.subSequence(lastIndex, matcher.start()); // get section before this chapter marker String chapter = matcher.group(1); // chapter number for next section mCurrentChapter = Integer.valueOf(chapter); if(mCurrentChapter > mChunks.size()) { //make sure in range break; } int expectedChapter = mLastChapter + 1; if(mCurrentChapter != expectedChapter) { // if out of order if (mCurrentChapter > expectedChapter) { // if gap success = processChapterGap(section, mLastChapter, mCurrentChapter); mLastChapter = mCurrentChapter - 1; } else if (mCurrentChapter == expectedChapter) { Logger.e(TAG, "duplicate chapter " + mChapter); addError(R.string.duplicate_chapter, mChapter); return false; } else { Logger.e(TAG, "out of order chapter " + mChapter + " after " + mLastChapter); addError(R.string.chapter_out_of_order, mChapter, mLastChapter + ""); return false; } } else { success = breakUpChapter( section, mChapter); } successOverall = successOverall && success; if(!success) { break; } mLastChapter++; mChapter = chapter; // chapter number for next section lastIndex = matcher.end(); } if(!foundChapter) { // if no chapters found Logger.e(TAG, "no chapters" ); addError(R.string.no_chapter); return false; } if (successOverall) { section = text.subSequence(lastIndex, text.length()); // get last section success = breakUpChapter(section, mChapter); mLastChapter = Integer.valueOf(mChapter); successOverall = successOverall && success; } if (successOverall) { mCurrentChapter = Integer.valueOf(mChapter); if ((mChapter == null) || (mCurrentChapter != mChunks.size())) { if(mCurrentChapter < mChunks.size()) { success = processChapterGap("", mCurrentChapter, mChunks.size() + 1); successOverall = successOverall && success; } else { String lastChapter = (mChapter != null) ? mChapter : "(null)"; addWarning(R.string.chapter_count_invalid, mChunks.size() + "", lastChapter); return false; } } } return successOverall; } /** * handle missing chapters in book * @param section * @param missingStart * @param missingEnd * @return */ private boolean processChapterGap(CharSequence section, int missingStart, int missingEnd) { boolean success; if(missingStart <= 0) { // if first chapter is missing, then we start processing there missingStart = 1; Logger.w(TAG, "missing chapter " + missingStart); addWarning(R.string.missing_chapter_n, missingStart + ""); } success = breakUpChapter(section, missingStart + ""); for(int i = missingStart + 1; i < missingEnd; i++) { // skip missing gaps Logger.w(TAG, "missing chapter " + i); addWarning(R.string.missing_chapter_n, i + ""); breakUpChapter("", i + ""); } return success; } /** * break up chapter into sections based on chunk list * * @param text * @return */ private boolean breakUpChapter(CharSequence text, String currentChapterStr) { boolean successOverall = true; boolean success = true; if (!isMissing(currentChapterStr)) { try { String chapter = getChapterFolderName(currentChapterStr); if (null == chapter) { addError(R.string.could_not_find_chapter, currentChapterStr); return false; } JSONArray versebreaks = getVerseBreaksObj(chapter); int currentChapter = Integer.valueOf(chapter); updateStatus(R.string.processing_chapter, new Integer(mChaperCount - currentChapter + 1).toString()); String lastFirst = null; for (int i = 0; (i < versebreaks.length()) && success; i++) { String first = versebreaks.getJSONObject(i).getString(FIRST_VERSE); success = extractVerses(chapter, text, lastFirst, first); successOverall = successOverall && success; lastFirst = first; } if (successOverall) { success = extractVerses(chapter, text, lastFirst, END_MARKER +""); successOverall = successOverall && success; } } catch (Exception e) { Logger.e(TAG, "error parsing chapter " + currentChapterStr, e); addError(R.string.could_not_parse_chapter, currentChapterStr); return false; } } else { // save stuff before first chapter String chapter1 = getChapterFolderName("1"); // to get width of chapters String chapter0 = "0000".substring(0, chapter1.length()); // match length of chapter 1 success = saveSection(".", "before", text); successOverall = successOverall && success; success = saveSection(".", "title", mBookName); successOverall = successOverall && success; } return successOverall; } /** * get the chapter name with the appropriate zero padding expected by app * @param findChapter * @return */ private String getChapterFolderName(String findChapter) { try { int chapter = Integer.valueOf(findChapter); if (chapter > 0) { // first check in expected location Chapter chapterN = mChapters[chapter - 1]; if (Integer.valueOf(chapterN.getId()) == chapter) { return chapterN.getId(); } } for (Chapter chapterN : mChapters) { //search for chapter match if (Integer.valueOf(chapterN.getId()) == chapter) { return chapterN.getId(); } } } catch (Exception e) { e.printStackTrace(); } addError(R.string.could_not_find_chapter, findChapter); return null; } /** * get the file name to use for verse chunk * @param findChapter * @param firstVerse * @return */ private String getChunkFileName(String findChapter, String firstVerse) { try { JSONArray chunks = getVerseBreaksObj(findChapter); for (int i = 0; i < chunks.length(); i++) { JSONObject chunk = chunks.getJSONObject(i); if (firstVerse.equals(chunk.getString(FIRST_VERSE))) { return chunk.getString(FILE_NAME); } } } catch (JSONException e) { e.printStackTrace(); } return firstVerse; // if not found, use same as chapter id } /** * get the array of verse chunks * @param findChapter * @return */ private JSONArray getVerseBreaksObj(String findChapter) { String chapter = findChapter; if (mChunks.containsKey(chapter)) { return mChunks.get(chapter); } chapter = "0" + chapter; if (mChunks.containsKey(chapter)) { return mChunks.get(chapter); } chapter = "0" + chapter; if (mChunks.containsKey(chapter)) { return mChunks.get(chapter); } //try removing leading spaces chapter = findChapter; while( !chapter.isEmpty() && (chapter.charAt(0) == '0') ) { chapter = chapter.substring(1); if (mChunks.containsKey(chapter)) { return mChunks.get(chapter); } } addError(R.string.could_not_find_chapter, findChapter); return null; } /** * extract verses in range of start to end into new section * * @param chapter * @param text * @param start * @param end * @return */ private boolean extractVerses(String chapter, CharSequence text, String start, String end) { boolean success = true; if (null == start) { // skip over stuff before verse 1 for now return true; } int startVerse = Integer.valueOf(start); int endVerse = Integer.valueOf(end); success = extractVerseRange(chapter, text, startVerse, endVerse, start); return success; } /** * extract verses in range of start to end into new section * * @param chapter * @param text * @param start * @param end * @param firstVerse * @return */ private boolean extractVerseRange(String chapter, CharSequence text, int start, int end, String firstVerse) { boolean successOverall = true; boolean success; if (!isMissing(chapter)) { Pattern pattern = PATTERN_USFM_VERSE_SPAN; Matcher matcher = pattern.matcher(text); int lastIndex = 0; String section = ""; int currentVerse = 0; int foundVerseCount = 0; int endVerseRange = 0; boolean done = false; boolean matchesFound = false; while (matcher.find()) { matchesFound = true; if (currentVerse >= end) { done = true; break; } if (currentVerse >= start) { if( (currentVerse == 1) && (start == 1) ){ // pick up initial content of chapter lastIndex = 0; // get everything before this first verse } if(end == END_MARKER) { // just include everything to end done = false; break; } while(true) { // find the end of the section if(endVerseRange > 0) { foundVerseCount += (endVerseRange - currentVerse + 1); } else { foundVerseCount++; } String verse = matcher.group(1); int[] verseRange = getVerseRange(verse); if(null == verseRange) { break; } currentVerse = verseRange[0]; endVerseRange = verseRange[1]; if (currentVerse >= end) { break; } boolean found = matcher.find(); if(!found) { break; } } section = section + text.subSequence(lastIndex, matcher.start()); // get section before this chunk marker done = true; break; } String verse = matcher.group(1); int[] verseRange = getVerseRange(verse); if(null == verseRange) { return false; } currentVerse = verseRange[0]; endVerseRange = verseRange[1]; lastIndex = matcher.start(); } if (!done && matchesFound && (currentVerse >= start) && (currentVerse < end)) { section = section + text.subSequence(lastIndex, text.length()); // get last section } if(start != 0) { // text before first verse is not a concern int delta = foundVerseCount - (end - start); if (section.isEmpty()) { String format = mContext.getResources().getString(R.string.could_not_find_verses_in_chapter); String msg = String.format(format, start, end - 1, chapter); addWarning(msg); } else if ((end != END_MARKER) && (delta != 0)) { String format; if(delta < 0) { delta = -delta; format = mContext.getResources().getString(R.string.missing_verses_in_chapter); } else { format = mContext.getResources().getString(R.string.extra_verses_in_chapter); } String msg = String.format(format, delta, start, end - 1, chapter); addWarning(msg); } } String chunkFileName = getChunkFileName(chapter, firstVerse); success = saveSection(getChapterFolderName(chapter), chunkFileName, section); successOverall = successOverall && success; } return successOverall; } /** * get verse range * @param verse * @return */ private int[] getVerseRange(String verse) { int[] verseRange; int currentVerse; int endVerseRange; try { int currentVers = Integer.valueOf(verse); verseRange = new int[] {currentVers, 0}; } catch (NumberFormatException e) { // might be a range in format 12-13 String[] range = verse.split("-"); if (range.length < 2) { verseRange = null; } else { currentVerse = Integer.valueOf(range[0]); endVerseRange = Integer.valueOf(range[1]); verseRange = new int[]{currentVerse, endVerseRange}; } } return verseRange; } /** * save section (chunk) to file in chapter folder * * @param chapter * @param fileName * @param section * @return */ private boolean saveSection(String chapter, String fileName, CharSequence section) { File chapterFolder = new File(mProjectFolder, chapter); try { String cleanChunk = removePattern(section, PATTERN_SECTION_MARKER); FileUtils.forceMkdir(chapterFolder); File output = new File(chapterFolder, fileName + ".txt"); FileUtils.write(output, cleanChunk); return true; } catch (Exception e) { Logger.e(TAG, "error parsing chapter " + mChapter, e); addError(R.string.file_write_for_verse, chapter + "/" + fileName); return false; } } /** * test if CharSequence is null or empty * * @param text * @return */ private boolean isMissing(CharSequence text) { if (null == text) { return true; } return text.length() == 0; } /** * extract chapters from document text (used for splitting by sections) * * @param text * @return */ private boolean extractChaptersFromDocument(CharSequence text) { Pattern pattern = PATTERN_CHAPTER_NUMBER_MARKER; Matcher matcher = pattern.matcher(text); int lastIndex = 0; int length = text.length(); CharSequence chapter; mChapter = null; while (matcher.find()) { chapter = text.subSequence(lastIndex, matcher.start()); // get section before this chapter marker extractSectionsFromChapter(chapter); mChapter = matcher.group(1); // chapter number for next section lastIndex = matcher.end(); mCurrentChapter = Integer.valueOf(mChapter); //estimate number of chapters - doesn't need to be exact if (mCurrentChapter > 1) { float percentIn = (float) lastIndex / length; if (percentIn != 0.0f) { mChaperCount = Math.round((mCurrentChapter - 1) / percentIn); if (mChaperCount < 1) { // sanity checks mChaperCount = 1; } else if (mChaperCount > 250) { mChaperCount = 250; } else if (mChaperCount < mCurrentChapter) { mChaperCount = mCurrentChapter; } updateStatus(R.string.processing_chapter, new Integer(mChaperCount - mCurrentChapter + 1).toString()); } } } chapter = text.subSequence(lastIndex, text.length()); // get last section extractSectionsFromChapter(chapter); return true; } /** * extract sections from chapter * * @param chapter */ private void extractSectionsFromChapter(CharSequence chapter) { if (!isMissing(mChapter)) { Pattern pattern = PATTERN_SECTION_MARKER; Matcher matcher = pattern.matcher(chapter); int lastIndex = 0; CharSequence section; while (matcher.find()) { section = chapter.subSequence(lastIndex, matcher.start()); // get section before this chunk marker if (lastIndex > 0) { // ignore what's before first section processSection(section); } lastIndex = matcher.end(); } section = chapter.subSequence(lastIndex, chapter.length()); // get last section processSection(section); } } /** * extract verses from section * * @param section * @return */ private boolean processSection(CharSequence section) { if (!isMissing(section)) { String firstVerse = extractString(section, PATTERN_USFM_VERSE_SPAN); if (null == firstVerse) { addError(R.string.missing_verses_in_section); return false; } saveSection(getChapterFolderName(mChapter), firstVerse, section); } return true; } /** * match regexPattern and get string in group 1 if present * * @param text * @param regexPattern * @return */ private String extractString(CharSequence text, Pattern regexPattern) { if (text.length() > 0) { // find instance Matcher matcher = regexPattern.matcher(text); String foundItem = null; if (matcher.find()) { foundItem = matcher.group(1); return foundItem.trim(); } } return null; } /** * remove pattern if present in text * * @param text * @param removePattern * @return */ private String removePattern(CharSequence text, Pattern removePattern) { String out = ""; Matcher matcher = removePattern.matcher(text); int lastIndex = 0; while (matcher.find()) { out = out + text.subSequence(lastIndex, matcher.start()); // get section before this chunk marker lastIndex = matcher.end(); } out = out + text.subSequence(lastIndex, text.length()); // get last section return out; } /** * test to see if regex pattern is present in text * * @param text * @param regexPattern * @return */ private boolean isPresent(CharSequence text, Pattern regexPattern) { if (text.length() > 0) { // find instance Matcher matcher = regexPattern.matcher(text); if (matcher.find()) { return true; } } return false; } /** * create the necessary temp folders for unzipped source and output */ private void createTempFolders() { mTempDir = new File(AppContext.context().getCacheDir(), System.currentTimeMillis() + ""); mTempDir.mkdirs(); mTempSrce = new File(mTempDir, "source"); mTempSrce.mkdirs(); mTempOutput = new File(mTempDir, "output"); mTempOutput.mkdirs(); } /** * cleanup working directory and values */ public void cleanup() { FileUtils.deleteQuietly(mTempDir); mTempDir = null; mTempSrce = null; mTempOutput = null; mTempDest = null; } /** * add file and files in sub-folders to list of files to process * * @param usfmFile * @return */ private boolean addFilesInFolder(File usfmFile) { Logger.i(TAG, "processing folder: " + usfmFile.toString()); if (usfmFile.isDirectory()) { File[] usfmSubFiles = usfmFile.listFiles(); for (File usfmSuile : usfmSubFiles) { addFilesInFolder(usfmSuile); } Logger.i(TAG, "found files: " + usfmSubFiles.toString()); } else { addFile(usfmFile); } return true; } /** * add file to list of files to process * * @param usfmFile * @return */ private boolean addFile(File usfmFile) { Logger.i(TAG, "processing file: " + usfmFile.toString()); mSourceFiles.add(usfmFile); return true; } public interface OnFinishedListener { void onFinished(boolean success); } public interface UpdateStatusListener { void statusUpdate(String textStatus, int percentStatus); } static JSONArray toJsonFileArray(List<File> array) { JSONArray jsonArray = new JSONArray(); for (File item : array) { jsonArray.put(item.toString()); } return jsonArray; } static List<File> fromJsonArrayToFiles(String jsonStr) { try { JSONArray jsonArray = new JSONArray(jsonStr); return fromJsonArrayToFiles(jsonArray); } catch (Exception e) { e.printStackTrace(); } return null; } static List<File> fromJsonArrayToFiles(JSONArray jsonArray) throws JSONException { List<File> array = new ArrayList<>(); for (int i = 0; i < jsonArray.length(); i++) { String path = jsonArray.getString(i); File file = new File(path); array.add(file); } return array; } static JSONArray toJsonStringArray(List<String> array) { JSONArray jsonArray = new JSONArray(); for (String item : array) { jsonArray.put(item); } return jsonArray; } static List<String> fromJsonArrayToStrings(String jsonStr) { try { JSONArray jsonArray = new JSONArray(jsonStr); return fromJsonArrayToStrings(jsonArray); } catch (Exception e) { e.printStackTrace(); } return null; } static List<String> fromJsonArrayToStrings(JSONArray jsonArray) throws JSONException { List<String> array = new ArrayList<>(); for (int i = 0; i < jsonArray.length(); i++) { String text = jsonArray.getString(i); array.add(text); } return array; } static Integer getOptInteger(JSONObject json, String key) { return (Integer) getOpt(json,key); } static Boolean getOptBoolean(JSONObject json, String key) { return (Boolean) getOpt(json,key); } static File getOptFile(JSONObject json, String key) { String path = getOptString(json, key); if(path != null) { return new File(path); } return null; } static String getOptString(JSONObject json, String key) { Object obj = getOpt(json, key); return (String) obj; } static JSONObject getOptJsonObject(JSONObject json, String key) { try { Object obj = getOpt(json, key); return (JSONObject) obj; } catch (Exception e) { return new JSONObject(); } } static JSONArray getOptJsonArray(JSONObject json, String key) { try { Object obj = getOpt(json, key); return (JSONArray) obj; } catch (Exception e) { return new JSONArray(); } } static Object getOpt(JSONObject json, String key) { try { if(json.has(key)) { Object obj = json.get(key); return obj; } } catch (Exception e) { e.printStackTrace(); } return null; } }