ImportUsfm.java example

Explorer
ts-android-master
package com.door43.translationstudio.core;

import android.app.Activity;
import android.content.Context;
import android.content.pm.PackageInfo;
import android.net.Uri;
import android.text.TextUtils;

import com.door43.tools.reporting.Logger;
import com.door43.translationstudio.AppContext;
import com.door43.translationstudio.R;
import com.door43.translationstudio.spannables.USFMVerseSpan;
import com.door43.util.Zip;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * For processing USFM input file or zip files into importable package.
 */
public class ImportUsfm {
    public static final String TAG = ImportUsfm.class.getSimpleName();
    public static final String BOOK_NAME_MARKER = "\\\\toc1\\s([^\\n]*)";
    private static final Pattern PATTERN_BOOK_NAME_MARKER = Pattern.compile(BOOK_NAME_MARKER);
    public static final String ID_TAG = "\\\\id\\s([^\\n]*)";
    private static final Pattern ID_TAG_MARKER = Pattern.compile(ID_TAG);
    public static final String BOOK_SHORT_NAME_MARKER = "\\\\toc3\\s([^\\n]*)";
    private static final Pattern PATTERN_BOOK_SHORT_NAME_MARKER = Pattern.compile(BOOK_SHORT_NAME_MARKER);
    public static final String SECTION_MARKER = "\\\\s5([^\\n]*)";
    private static final Pattern PATTERN_SECTION_MARKER = Pattern.compile(SECTION_MARKER);
    public static final String CHAPTER_NUMBER_MARKER = "\\\\c\\s(\\d+(-\\d+)?)\\s";
    private static final Pattern PATTERN_CHAPTER_NUMBER_MARKER = Pattern.compile(CHAPTER_NUMBER_MARKER);
    private static final Pattern PATTERN_USFM_VERSE_SPAN = Pattern.compile(USFMVerseSpan.PATTERN);
    public static final int END_MARKER = 999999;
    public static final String FIRST_VERSE = "first_verse";
    public static final String FILE_NAME = "file_name";

    private File mTempDir;
    private File mTempOutput;
    private File mTempDest;
    private File mTempSrce;
    private File mProjectFolder;

    private String mChapter;
    private int mLastChapter;
    private List<File> mSourceFiles; // raw list of files found in expanded package
    private HashMap<String, JSONArray> mChunks;

    private List<File> mImportProjects; // files that seem to be actual books.
    private List<String> mErrors;
    private List<String> mFoundBooks; //descriptions of books from raw list
    private int mCurrentBook;

    private String mBookName;
    private String mBookShortName;
    private TargetLanguage mTargetLanguage;
    private Context mContext;
    private boolean mProcessSuccess;
    private UpdateStatusListener mStatusUpdateListener;
    private int mCurrentChapter;
    private int mChaperCount;
    private List<MissingNameItem> mBooksMissingNames;
    private boolean mCancel = false;
    private Chapter[] mChapters;

    /**
     * constructor
     * @param context
     * @param targetLanguage
     */
    public ImportUsfm(Context context, TargetLanguage targetLanguage) {
        mTempDir = null;
        mTempOutput = null;
        mTempDest = null;
        mTempSrce = null;
        mProjectFolder = null;

        createTempFolders();

        mStatusUpdateListener = null;
        mContext = context;
        mChunks = null;

        mSourceFiles = new ArrayList<>();
        mImportProjects = new ArrayList<>();
        mErrors = new ArrayList<>();
        mFoundBooks = new ArrayList<>();
        mTargetLanguage = targetLanguage;
        mCurrentBook = 0;

        mProcessSuccess = false;
        mBooksMissingNames = new ArrayList<>();
        mCurrentChapter = 0;
        mChaperCount = 1;

        mBookName = null;
        mBookShortName = null;
        mChapter = null;
    }

    /**
     * constructor used to create new instance from JSON
     * @param context
     * @param tempDir
     * @param tempOutput
     * @param tempDest
     * @param tempSrce
     * @param projectFolder
     * @param chapter
     * @param sourceFiles
     * @param importProjects
     * @param errors
     * @param foundBooks
     * @param currentBook
     * @param bookName
     * @param bookShortName
     * @param targetLanguage
     * @param success
     * @param currentChapter
     * @param chaperCount
     * @param bookMissingNames
     */
    private ImportUsfm(Activity context, File tempDir, File tempOutput, File tempDest,
                       File tempSrce, File projectFolder, String chapter, List<File> sourceFiles,
                       List<File> importProjects, List<String> errors, List<String> foundBooks,
                       int currentBook, String bookName, String bookShortName, TargetLanguage targetLanguage,
                       boolean success, int currentChapter, int chaperCount, List<MissingNameItem> bookMissingNames) {
        this.mStatusUpdateListener = null;
        this.mContext = context;
        this.mChunks = null;

        this.mTempDir = tempDir;
        this.mTempOutput = tempOutput;
        this.mTempDest = tempDest;
        this.mTempSrce = tempSrce;
        this.mProjectFolder = projectFolder;
        this.mChapter = chapter;
        this.mSourceFiles = sourceFiles;
        this.mImportProjects = importProjects;
        this.mErrors = errors;
        this.mFoundBooks = foundBooks;
        this.mCurrentBook = currentBook;
        this.mBookName = bookName;
        this.mBookShortName = bookShortName;
        this.mTargetLanguage = targetLanguage;
        this.mProcessSuccess = success;
        this.mCurrentChapter = currentChapter;
        this.mChaperCount = chaperCount;
        this.mBooksMissingNames = bookMissingNames;
    }

    /**
     * generate JSON from object
     * @return
     */
    public JSONObject toJson() {
        try {
            JSONObject json = new JSONObject();
            json.putOpt("TempDir", mTempDir);
            json.putOpt("TempOutput", mTempOutput);
            json.putOpt("TempDest", mTempDest);
            json.putOpt("TempSrce", mTempSrce);
            json.putOpt("ProjectFolder", mProjectFolder);
            json.putOpt("SourceFiles", toJsonFileArray(mSourceFiles));
            json.putOpt("ImportProjects", toJsonFileArray(mImportProjects));
            json.putOpt("Errors", toJsonStringArray(mErrors));
            json.putOpt("FoundBooks", toJsonStringArray(mFoundBooks));
            json.putOpt("TargetLanguage", mTargetLanguage.toApiFormatJson());
            json.putOpt("CurrentBook", mCurrentBook);
            json.putOpt("Success", mProcessSuccess);
            json.putOpt("MissingNames", MissingNameItem.toJsonArray(mBooksMissingNames));
            json.putOpt("CurrentChapter", mCurrentChapter);
            json.putOpt("ChaperCount", mChaperCount);
            json.putOpt("BookName", mBookName);
            json.putOpt("BookShortName", mBookShortName);
            json.putOpt("Chapter", mChapter);

            return json;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * rebuild object from JSON string
     * @param context
     * @param jsonStr
     * @return
     */
    public static ImportUsfm newInstance(Activity context, String jsonStr) {
        try {
            JSONObject jsonObject = new JSONObject(jsonStr);
            return ImportUsfm.newInstance(context, jsonObject);
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * cancel any processing
     * @param mCancel
     */
    public void setCancel(boolean mCancel) {
        this.mCancel = mCancel;
    }

    /**
     * was processing successful overall
     * @return
     */
    public boolean isProcessSuccess() {
        return mProcessSuccess;
    }

    /**
     * rebuild object from JSON
     * @param context
     * @param json
     * @return
     */
    public static ImportUsfm newInstance(Activity context, JSONObject json) {
        try {
            return new ImportUsfm(context,
                    getOptFile(json,"TempDir"),
                    getOptFile(json,"TempOutput"),
                    getOptFile(json,"TempDest"),
                    getOptFile(json,"TempSrce"),
                    getOptFile(json,"ProjectFolder"),
                    getOptString(json,"Chapter"),
                    fromJsonArrayToFiles(getOptJsonArray(json,"SourceFiles")),
                    fromJsonArrayToFiles(getOptJsonArray(json,"ImportProjects")),
                    fromJsonArrayToStrings(getOptJsonArray(json,"Errors")),
                    fromJsonArrayToStrings(getOptJsonArray(json,"FoundBooks")),
                    getOptInteger(json,"CurrentBook"),
                    getOptString(json,"BookName"),
                    getOptString(json,"BookShortName"),
                    TargetLanguage.generate(getOptJsonObject(json,"TargetLanguage")),
                    getOptBoolean(json,"Success"),
                    getOptInteger(json,"CurrentChapter"),
                    getOptInteger(json,"ChaperCount"),
                    MissingNameItem.fromJsonArray(getOptJsonArray(json,"MissingNames")));

        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * get list of books that we cant find valid names (resource IDs) for
     * @return
     */
    public MissingNameItem[] getBooksMissingNames() {
        return mBooksMissingNames.toArray(new MissingNameItem[mBooksMissingNames.size()]);
    }

    /**
     * used to keep list of books that are missing names (valid resource IDs)
     * @param description
     * @param invalidName
     * @param contents
     */
    public void addBookMissingName(String description, String invalidName, String contents) {
        mBooksMissingNames.add(new MissingNameItem(description, invalidName, contents));
    }

    /**
     * set status listener
     * @param listener
     */
    public void setUpdateStatusListener(UpdateStatusListener listener) {
        mStatusUpdateListener = listener;
    }

    /**
     * will update the status by calling listener.  Will display text and update
     *   the percent complete
     * @param text
     */
    private void updateStatus(String text) {
        int fileCount = mSourceFiles.size();
        if (fileCount < 1) {
            fileCount = 1;
        }

        float importAmountDone = (float) mCurrentBook / fileCount;
        float bookAmountDone = (float) mCurrentChapter / (mChaperCount + 2);
        float percentage = 100.0f * (importAmountDone + bookAmountDone / fileCount);
        int percentDone = Math.round(percentage);

        if (mStatusUpdateListener != null) {
            if (!isMissing(mBookShortName)) {
                text = mBookShortName + " - " + text;
            }
            mStatusUpdateListener.statusUpdate(text, percentDone);
        }
    }

    /**
     * will update the status by calling listener.  Will display string resource and update
     *   the percent complete
     * @param resource
     */
    private void updateStatus(int resource) {
        String status = mContext.getResources().getString(resource);
        updateStatus(status);
    }

    /**
     * will update the status by calling listener.  Will build status string using resource as string format
     * and applying data to it. Will also update the percent complete.
     * @param resource
     * @param data
     */
    private void updateStatus(int resource, String data) {
        String format = mContext.getResources().getString(resource);
        updateStatus(String.format(format, data));
    }

    /**
     * get processing results as multi-line string
     */
    public String getResultsString() {
        normalizeBookQueue();
        normalizeMessageQueue();
        String results = "";
        String format = mContext.getResources().getString(R.string.found_book);
        for (int i = 0; i <= mCurrentBook; i++) {
            String bookName = mFoundBooks.get(i);
            String bookNameCleaned = getCleanedBookName(format, bookName);
            String errors = mErrors.get(i);
            if(errors.isEmpty()) {
                errors = mContext.getResources().getString(R.string.no_error);
            }
            String currentResults = "\n" + (i+1) + " - " + bookNameCleaned + "\n" + errors;
            results = results + currentResults + "\n";
        }
        return results;
    }

    /**
     * cleanup uri escape characters
     * @param format
     * @param bookName
     * @return
     */
    private String getCleanedBookName(String format, String bookName) {
        String cleaned = bookName;
        String[] parts = bookName.split("%3A");
        if(parts.length == 2) { //look for URI prefix
            cleaned = "SD_CARD/" + parts[1];
        }
        cleaned = Uri.decode(cleaned);

        return String.format(format, cleaned);
    }

    /**
     * returns string to use for language title
     * @return
     */
    public String getLanguageTitle() {
        String format;
        format = mContext.getResources().getString(R.string.selected_language);
        String language = String.format(format, mTargetLanguage.getId() + " - " + mTargetLanguage.name);
        return language;
    }

    /**
     * set book name
     * @param bookShortName
     * @param bookName
     */
    private void setBookName(String bookShortName, String bookName) {
        normalizeBookQueue();
        String description = bookName;
        if(!bookShortName.isEmpty()) {
            description = bookShortName + " = " + bookName;
        }
        mFoundBooks.set(mCurrentBook, description);
    }

    /**
     * add error to error list
     *
     * @param resource
     * @param error
     */
    private void addError(int resource, String error) {
        String format = mContext.getResources().getString(resource);
        String newError = String.format(format, error);
        addError(newError);
    }

    /**
     * add error to error list
     *
     * @param resource
     * @param val1
     * @param val2
     * */
    private void addError(int resource, String val1, String val2) {
        String format = mContext.getResources().getString(resource);
        String newError = String.format(format, val1, val2);
        addError(newError);
    }

    /**
     * add error to error list
     *
     * @param resource
     */
    private void addError(int resource) {
        String newError = mContext.getResources().getString(resource);
        addError(newError);
    }

    /**
     * add error to error list
     *
     * @param error
     */
    private void addError(String error) {
        addMessage(error, true);
    }

    /**
     * add message to error list
     *
     * @param message
     */
    private void addMessage(String message, boolean error) {
        normalizeMessageQueue();
        String errors = mErrors.get(mCurrentBook);
        if (!errors.isEmpty()) {
            errors += "\n";
        }
        String format = mContext.getResources().getString(error ? R.string.error_prefix : R.string.warning_prefix);
        String newError = String.format(format, message);
        mErrors.set(mCurrentBook, errors + newError);
        if (error) {
            Logger.e(TAG, newError);
        } else {
            Logger.w(TAG, newError);
        }
    }

    private void normalizeMessageQueue() {
        while (mErrors.size() <= mCurrentBook) {
            mErrors.add("");
        }
    }

    private void normalizeBookQueue() {
        while (mFoundBooks.size() <= mCurrentBook) {
            mFoundBooks.add("");
        }
    }

    /**
     * add warning to error list
     *
     * @param error
     */
    private void addWarning(String error) {
        addMessage(error, false);
    }

    /**
     * add warning to error list
     *
     * @param resource
     * @param error
     */
    private void addWarning(int resource, String error) {
        String format = mContext.getResources().getString(resource);
        String newWarning = String.format(format, error);
        addWarning(newWarning);
    }

    /**
     * add warning to error list
     *
     * @param resource
     * @param val1
     * @param val2
     * */
    private void addWarning(int resource, String val1, String val2) {
        String format = mContext.getResources().getString(resource);
        String newWarning = String.format(format, val1, val2);
        addWarning(newWarning);
    }

    /**
     * unpack and import documents from zip stream
     *
     * @param usfmStream
     * @return
     */
    public boolean readZipStream(InputStream usfmStream) {
        boolean successOverall = true;
        boolean success;
        updateStatus(R.string.initializing_import);
        try {
            Zip.unzipFromStream(usfmStream, mTempSrce);
            File[] usfmFiles = mTempSrce.listFiles();

            for (File usfmFile : usfmFiles) {
                addFilesInFolder(usfmFile);
            }
            Logger.i(TAG, "found files: " + TextUtils.join("\n", mSourceFiles));

            for (mCurrentBook = 0; mCurrentBook < mSourceFiles.size(); mCurrentBook++) {
                mCurrentChapter = 0;
                File file = mSourceFiles.get(mCurrentBook);
                String name = file.getName();
                updateStatus(R.string.found_book, name);
                success = processBook(file);
                if (!success) {
                    addError(R.string.could_not_parse, getShortFilePath(file.toString()));
                }
                successOverall = successOverall && success;
            }

            mCurrentBook = mSourceFiles.size() - 1; // set to last book

        } catch (Exception e) {
            Logger.e(TAG, "error reading stream ", e);
            addError(R.string.zip_read_error);
            successOverall = false;
        }

        updateStatus(R.string.finished_loading);
        mProcessSuccess = successOverall;
        return successOverall;
    }

    /**
     * import single file
     *
     * @param file
     * @return
     */
    public boolean readFile(File file) {
        boolean success = true;
        updateStatus(R.string.initializing_import);
        if (null == file) {
            addError(R.string.file_read_error);
            return false;
        }

        try {
            String ext = FilenameUtils.getExtension(file.toString());
            boolean zip = "zip".equalsIgnoreCase(ext);
            if (!zip) {
                success = processBook(file);
            } else {
                InputStream usfmStream = new FileInputStream(file);
                success = readZipStream(usfmStream);
            }
        } catch (Exception e) {
            addError(R.string.file_read_error_detail, file.toString());
            success = false;
        }
        updateStatus(R.string.finished_loading);
        mProcessSuccess = success;
        return success;
    }

    /**
     * import file from uri, if it is a zip file, then all files in zip will be imported
     *
     * @param uri
     * @return
     */
    public boolean readUri(Uri uri) {
        boolean success = true;
        updateStatus(R.string.initializing_import);
        if (null == uri) {
            addError(R.string.file_read_error);
            return false;
        }

        String path = uri.toString();

        try {
            String ext = FilenameUtils.getExtension(path);
            boolean zip = "zip".equalsIgnoreCase(ext);

            InputStream usfmStream = AppContext.context().getContentResolver().openInputStream(uri);
            if (!zip) {
                String text = IOUtils.toString(usfmStream, "UTF-8");
                success = processBook(text, uri.toString());
            } else {
                success = readZipStream(usfmStream);
            }
        } catch (Exception e) {
            addError(R.string.file_read_error_detail, path);
            success = false;
        }
        updateStatus(R.string.finished_loading);
        mProcessSuccess = success;
        return success;
    }

    /**
     * import file from resource. if it is a zip file, then all files in zip will be imported
     *
     * @param fileName
     * @return
     */
    public boolean readResourceFile(Context context, String fileName) {
        boolean success = true;
        updateStatus(R.string.initializing_import);
        String ext = FilenameUtils.getExtension(fileName).toLowerCase();
        boolean zip = "zip".equals(ext);

        try {
            InputStream usfmStream = context.getAssets().open(fileName);
            if (!zip) {
                String text = IOUtils.toString(usfmStream, "UTF-8");
                success = processBook(text, fileName);
            } else {
                success = readZipStream(usfmStream);
            }
        } catch (Exception e) {
            Logger.e(TAG, "error reading " + fileName, e);
            success = false;
        }
        updateStatus(R.string.finished_loading);
        mProcessSuccess = success;
        return success;
    }

    /**
     * add chunk markers (contains verses and chapters) to map by chapter
     *
     * @param book
     * @param chunks
     * @return
     */
    public boolean addChunks(String book, ChunkMarker[] chunks, SourceTranslation sourceTranslation) {
        try {
            for (ChunkMarker chunkMarker : chunks) {

                String chapter = chunkMarker.chapterSlug;
                String firstverse = chunkMarker.firstVerseSlug;

                JSONArray verses = null;
                if (mChunks.containsKey(chapter)) {
                    verses = mChunks.get(chapter);
                } else {
                    verses = new JSONArray();
                    mChunks.put(chapter, verses);
                }

                JSONObject chunk = new JSONObject();
                chunk.put(FIRST_VERSE, firstverse);
//                chunk.put(FILE_NAME, firstverse); // default to the same, later cleanup
                verses.put(chunk);
            }

            for (int i = 1; i <= mChapters.length; i++) { // get file names for chunks
                String chapterId = getChapterFolderName(i + "");
                String[] chapterFrameSlugs = AppContext.getLibrary().getFrameSlugs(sourceTranslation, chapterId);
                JSONArray verseBreaks = getVerseBreaksObj(i + "");
                for (int j = 0; j < verseBreaks.length(); j++) {
                    JSONObject chunk = verseBreaks.getJSONObject(j);
                    chunk.put(FILE_NAME, chapterFrameSlugs[j]);
                }
            }

        } catch (Exception e) {
            Logger.e(TAG, "error parsing chunks " + book, e);
            return false;
        }
        return true;
    }

    /**
     * get the base folder for all the projects
     *
     * @return
     */
    public File getProjectsFolder() {
        return mTempOutput;
    }

    /**
     * get array of the imported project folders
     *
     * @return
     */
    public File[] getImportProjects() {
        if (mImportProjects != null) {
            return mImportProjects.toArray(new File[mImportProjects.size()]);
        }
        return new File[0];
    }

    /**
     * process single document and create a project
     *
     * @param file
     * @return
     */
    private boolean processBook(File file) {
        boolean success;
        try {
            String book = FileUtils.readFileToString(file);
            success = processBook(book, file.toString());
        } catch (Exception e) {
            Logger.e(TAG, "error reading book " + file.toString(), e);
            addError(R.string.error_reading_file, file.toString());
            success = false;
        }
        return success;
    }

    private boolean processBook(String book, String name) {
        return processBook(book, name, true, null);
    }

    public boolean processText(String book, String name, boolean promptForName, String useName) {
        mCurrentBook = mFoundBooks.size();
        boolean success = processBook(book, name, promptForName, useName);
        mProcessSuccess = success;
        return success;
    }

    private boolean processBook(String book, String name, boolean promptForName, String useName) {
        if(mCancel) {
            return false;
        }
        boolean successOverall = true;
        boolean success;
        mBookShortName = "";
        String description = getShortFilePath(name);
        setBookName("", description);
        try {
            mCurrentChapter = 0;
            mChaperCount = 1;

            extractBookID(book);

            // TODO: 4/12/16 verify book

            if (null == mTargetLanguage) {
                addError(R.string.missing_language);
                return false;
            }

//            boolean hasSections = isPresent(book, PATTERN_SECTION_MARKER);
            boolean hasVerses = isPresent(book, PATTERN_USFM_VERSE_SPAN);

            if (useName != null) {
                mBookShortName = useName;
            }

            if (isMissing(mBookShortName)) {
                addError(R.string.missing_book_short_name);
                addBookMissingName(name, null, book);
                return promptForName;
            }

            mBookShortName = mBookShortName.toLowerCase();

            setBookName(mBookShortName, description);

            if (!hasVerses) {
                addError(R.string.no_verse);
                return false;
            }

            mTempDest = new File(mTempOutput, mBookShortName);
            mProjectFolder = new File(mTempDest, mBookShortName + "-" + mTargetLanguage.getId());

            if (isMissing(mBookName)) {
                addError(R.string.missing_book_name);
                mBookName = mBookShortName;
            }

            ChunkMarker[] markers = AppContext.getLibrary().getChunkMarkers(mBookShortName);
            boolean haveChunksList = markers.length > 0;

            if (!haveChunksList) { // no chunk list
                // TODO: 4/13/16 add support for processing by sections

                addWarning(R.string.no_chunk_list, mBookShortName);
                addBookMissingName(mBookName, mBookShortName, book);
                return promptForName;
            } else { // has chunks
                SourceTranslation sourceTranslation = AppContext.getLibrary().getSourceTranslation(mBookShortName, "en", "ulb");
                mChapters = AppContext.getLibrary().getChapters(sourceTranslation);

                mChunks = new HashMap<>(); // clear old map
                addChunks(mBookShortName, markers, sourceTranslation);
                mChaperCount = mChunks.size();

                success = extractChaptersFromBook(book);
                successOverall = successOverall && success;
            }

            if(mCancel) {
                successOverall = false;
            }

            if (successOverall) {
                mCurrentChapter = (mChaperCount + 1);
                updateStatus(R.string.building_manifest);

                success = buildManifest();
                successOverall = successOverall && success;
            }

            if (successOverall) {
                mImportProjects.add(mProjectFolder);
            }

        } catch (Exception e) {
            Logger.e(TAG, "error parsing book", e);
            return false;
        }
        return successOverall;
    }

    public String getShortFilePath(String name) {
        String filename = name;
        if(name != null) {
            int pos = name.indexOf(mTempSrce.toString()); // try to strip off temp folder path
            if (pos >= 0) {
                filename = name.substring(pos + mTempSrce.toString().length() + 1);
            } else { // otherwise we use just file name
                String[] parts = name.split("/");
                if (parts.length > 0) {
                    filename = parts[parts.length - 1];
                }
            }
        }
        return filename;
    }

    private void extractBookID(String book) {
        mBookName = extractString(book, PATTERN_BOOK_NAME_MARKER);
        mBookShortName = extractString(book, PATTERN_BOOK_SHORT_NAME_MARKER);

        String idString = extractString(book, ID_TAG_MARKER);
        if (null != idString) {
            String[] tags = idString.split(" ");
            if (tags.length > 0) {
                mBookShortName = tags[0];
            }
        }
    }

    /**
     * create the manifest for a project
     *
     * @throws JSONException
     */
    private boolean buildManifest() throws JSONException {
        PackageInfo pInfo;
        TargetTranslation targetTranslation;
        try {
            Context context = AppContext.context();
            pInfo = context.getPackageManager().getPackageInfo(context.getPackageName(), 0);
            String projectId = mBookShortName;
            String resourceSlug = Resource.REGULAR_SLUG;
            targetTranslation = TargetTranslation.create(context, AppContext.getProfile().getNativeSpeaker(), TranslationFormat.USFM, mTargetLanguage, projectId, TranslationType.TEXT, resourceSlug, pInfo, mProjectFolder);

        } catch (Exception e) {
            addError(R.string.file_write_error);
            Logger.e(TAG, "failed to build manifest", e);
            return false;
        }

        return true;
    }

    /**
     * extract chapters in book
     *
     * @param text
     * @return
     */
    public boolean extractChaptersFromBook(CharSequence text) {
        Pattern pattern = PATTERN_CHAPTER_NUMBER_MARKER;
        Matcher matcher = pattern.matcher(text);
        int lastIndex = 0;
        CharSequence section;
        mChapter = null;
        mLastChapter = 0;
        boolean successOverall = true;
        boolean success;
        boolean foundChapter = false;
        while (matcher.find() && successOverall) {
            if(mCancel) {
                return false;
            }

            foundChapter = true;
            success = true;
            section = text.subSequence(lastIndex, matcher.start()); // get section before this chapter marker

            String chapter = matcher.group(1); // chapter number for next section
            mCurrentChapter = Integer.valueOf(chapter);
            if(mCurrentChapter > mChunks.size()) { //make sure in range
                break;
            }

            int expectedChapter = mLastChapter + 1;
            if(mCurrentChapter != expectedChapter) { // if out of order
                if (mCurrentChapter > expectedChapter) { // if gap

                    success = processChapterGap(section, mLastChapter, mCurrentChapter);
                    mLastChapter = mCurrentChapter - 1;

                } else if (mCurrentChapter == expectedChapter) {
                    Logger.e(TAG, "duplicate chapter " + mChapter);
                    addError(R.string.duplicate_chapter, mChapter);
                    return false;
                } else {
                    Logger.e(TAG, "out of order chapter " + mChapter + " after " + mLastChapter);
                    addError(R.string.chapter_out_of_order, mChapter, mLastChapter + "");
                    return false;
                }
            } else {
                success = breakUpChapter( section, mChapter);
            }

            successOverall = successOverall && success;
            if(!success) {
                break;
            }

            mLastChapter++;
            mChapter = chapter; // chapter number for next section
            lastIndex = matcher.end();
        }

        if(!foundChapter) { // if no chapters found
            Logger.e(TAG, "no chapters" );
            addError(R.string.no_chapter);
            return false;
        }

        if (successOverall) {
            section = text.subSequence(lastIndex, text.length()); // get last section
            success = breakUpChapter(section, mChapter);
            mLastChapter = Integer.valueOf(mChapter);
            successOverall = successOverall && success;
        }

        if (successOverall) {
            mCurrentChapter = Integer.valueOf(mChapter);
            if ((mChapter == null) || (mCurrentChapter != mChunks.size())) {

                if(mCurrentChapter < mChunks.size()) {
                    success = processChapterGap("", mCurrentChapter, mChunks.size() + 1);
                    successOverall = successOverall && success;
                } else  {
                    String lastChapter = (mChapter != null) ? mChapter : "(null)";
                    addWarning(R.string.chapter_count_invalid, mChunks.size() + "", lastChapter);
                    return false;
                }
            }
        }
        return successOverall;
    }

    /**
     * handle missing chapters in book
     * @param section
     * @param missingStart
     * @param missingEnd
     * @return
     */
    private boolean processChapterGap(CharSequence section, int missingStart, int missingEnd) {
        boolean success;
        if(missingStart <= 0) { // if first chapter is missing, then we start processing there
            missingStart = 1;
            Logger.w(TAG, "missing chapter " + missingStart);
            addWarning(R.string.missing_chapter_n, missingStart + "");
        }

        success = breakUpChapter(section, missingStart + "");

        for(int i = missingStart + 1; i < missingEnd; i++) { // skip missing gaps
            Logger.w(TAG, "missing chapter " + i);
            addWarning(R.string.missing_chapter_n, i + "");
            breakUpChapter("", i + "");
        }
        return success;
    }

    /**
     * break up chapter into sections based on chunk list
     *
     * @param text
     * @return
     */
    private boolean breakUpChapter(CharSequence text, String currentChapterStr) {
        boolean successOverall = true;
        boolean success = true;
        if (!isMissing(currentChapterStr)) {
            try {
                String chapter = getChapterFolderName(currentChapterStr);
                if (null == chapter) {
                    addError(R.string.could_not_find_chapter, currentChapterStr);
                    return false;
                }

                JSONArray versebreaks = getVerseBreaksObj(chapter);

                int currentChapter = Integer.valueOf(chapter);
                updateStatus(R.string.processing_chapter, new Integer(mChaperCount - currentChapter + 1).toString());

                String lastFirst = null;
                for (int i = 0; (i < versebreaks.length()) && success; i++) {
                    String first = versebreaks.getJSONObject(i).getString(FIRST_VERSE);
                    success = extractVerses(chapter, text, lastFirst, first);
                    successOverall = successOverall && success;
                    lastFirst = first;
                }
                if (successOverall) {
                    success = extractVerses(chapter, text, lastFirst, END_MARKER +"");
                    successOverall = successOverall && success;
                }

            } catch (Exception e) {
                Logger.e(TAG, "error parsing chapter " + currentChapterStr, e);
                addError(R.string.could_not_parse_chapter, currentChapterStr);
                return false;
            }
        } else { // save stuff before first chapter
            String chapter1 = getChapterFolderName("1"); // to get width of chapters
            String chapter0 = "0000".substring(0, chapter1.length()); // match length of chapter 1
            success = saveSection(".", "before", text);
            successOverall = successOverall && success;
            success = saveSection(".", "title", mBookName);
            successOverall = successOverall && success;
        }
        return successOverall;
    }

    /**
     * get the chapter name with the appropriate zero padding expected by app
     * @param findChapter
     * @return
     */
    private String getChapterFolderName(String findChapter) {
        try {
            int chapter = Integer.valueOf(findChapter);
            if (chapter > 0) { // first check in expected location
                Chapter chapterN = mChapters[chapter - 1];
                if (Integer.valueOf(chapterN.getId()) == chapter) {
                    return chapterN.getId();
                }
            }

            for (Chapter chapterN : mChapters) { //search for chapter match
                if (Integer.valueOf(chapterN.getId()) == chapter) {
                    return chapterN.getId();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        addError(R.string.could_not_find_chapter, findChapter);
        return null;
    }

    /**
     * get the file name to use for verse chunk
     * @param findChapter
     * @param firstVerse
     * @return
     */
    private String getChunkFileName(String findChapter, String firstVerse)  {
        try {
            JSONArray chunks = getVerseBreaksObj(findChapter);
            for (int i = 0; i < chunks.length(); i++) {
                JSONObject chunk = chunks.getJSONObject(i);
                if (firstVerse.equals(chunk.getString(FIRST_VERSE))) {
                    return chunk.getString(FILE_NAME);
                }
            }
        } catch (JSONException e) {
            e.printStackTrace();
        }

        return firstVerse; // if not found, use same as chapter id
    }

    /**
     * get the array of verse chunks
     * @param findChapter
     * @return
     */
    private JSONArray getVerseBreaksObj(String findChapter) {
        String chapter = findChapter;
        if (mChunks.containsKey(chapter)) {
            return mChunks.get(chapter);
        }

        chapter = "0" + chapter;
        if (mChunks.containsKey(chapter)) {
            return mChunks.get(chapter);
        }

        chapter = "0" + chapter;
        if (mChunks.containsKey(chapter)) {
            return mChunks.get(chapter);
        }

        //try removing leading spaces
        chapter = findChapter;
        while( !chapter.isEmpty() && (chapter.charAt(0) == '0') ) {
            chapter = chapter.substring(1);
            if (mChunks.containsKey(chapter)) {
                return mChunks.get(chapter);
            }
        }

        addError(R.string.could_not_find_chapter, findChapter);
        return null;
    }

    /**
     * extract verses in range of start to end into new section
     *
     * @param chapter
     * @param text
     * @param start
     * @param end
     * @return
     */
    private boolean extractVerses(String chapter, CharSequence text, String start, String end) {
        boolean success = true;
        if (null == start) { // skip over stuff before verse 1 for now
            return true;
        }

        int startVerse = Integer.valueOf(start);
        int endVerse = Integer.valueOf(end);
        success = extractVerseRange(chapter, text, startVerse, endVerse, start);
        return success;
    }

    /**
     * extract verses in range of start to end into new section
     *
     * @param chapter
     * @param text
     * @param start
     * @param end
     * @param firstVerse
     * @return
     */
    private boolean extractVerseRange(String chapter, CharSequence text, int start, int end, String firstVerse) {
        boolean successOverall = true;
        boolean success;
        if (!isMissing(chapter)) {
            Pattern pattern = PATTERN_USFM_VERSE_SPAN;
            Matcher matcher = pattern.matcher(text);
            int lastIndex = 0;
            String section = "";
            int currentVerse = 0;
            int foundVerseCount = 0;
            int endVerseRange = 0;
            boolean done = false;
            boolean matchesFound = false;
            while (matcher.find()) {
                matchesFound = true;

                if (currentVerse >= end) {
                    done = true;
                    break;
                }

                if (currentVerse >= start) {
                    if( (currentVerse == 1) && (start == 1) ){ // pick up initial content of chapter
                        lastIndex = 0; // get everything before this first verse
                    }

                    if(end == END_MARKER) { // just include everything to end
                        done = false;
                        break;
                    }

                    while(true) { // find the end of the section

                        if(endVerseRange > 0) {
                            foundVerseCount += (endVerseRange - currentVerse + 1);
                        } else {
                            foundVerseCount++;
                        }

                        String verse = matcher.group(1);
                        int[] verseRange = getVerseRange(verse);
                        if(null == verseRange) {
                            break;
                        }
                        currentVerse = verseRange[0];
                        endVerseRange = verseRange[1];

                        if (currentVerse >= end) {
                             break;
                        }

                        boolean found = matcher.find();
                        if(!found) {
                            break;
                        }
                    }

                    section = section + text.subSequence(lastIndex, matcher.start()); // get section before this chunk marker
                    done = true;
                    break;
                }


                String verse = matcher.group(1);
                int[] verseRange = getVerseRange(verse);
                if(null == verseRange) {
                    return false;
                }
                currentVerse = verseRange[0];
                endVerseRange = verseRange[1];

                lastIndex = matcher.start();
            }

            if (!done && matchesFound && (currentVerse >= start) && (currentVerse < end)) {
                section = section + text.subSequence(lastIndex, text.length()); // get last section
            }

            if(start != 0) { // text before first verse is not a concern
                int delta = foundVerseCount - (end - start);
                if (section.isEmpty()) {
                    String format = mContext.getResources().getString(R.string.could_not_find_verses_in_chapter);
                    String msg = String.format(format, start, end - 1, chapter);
                    addWarning(msg);
                } else if ((end != END_MARKER) && (delta != 0)) {
                    String format;
                    if(delta < 0) {
                        delta = -delta;
                        format = mContext.getResources().getString(R.string.missing_verses_in_chapter);
                    } else {
                        format = mContext.getResources().getString(R.string.extra_verses_in_chapter);
                    }
                    String msg = String.format(format, delta, start, end - 1, chapter);
                    addWarning(msg);
                }
            }

            String chunkFileName = getChunkFileName(chapter, firstVerse);
            success = saveSection(getChapterFolderName(chapter), chunkFileName, section);
            successOverall = successOverall && success;
        }
        return successOverall;
    }

    /**
     * get verse range
     * @param verse
     * @return
     */
    private int[] getVerseRange(String verse) {
        int[] verseRange;
        int currentVerse;
        int endVerseRange;
        try {
            int currentVers = Integer.valueOf(verse);
            verseRange = new int[] {currentVers, 0};
        } catch (NumberFormatException e) { // might be a range in format 12-13
            String[] range = verse.split("-");
            if (range.length < 2) {
                verseRange = null;
            } else {
                currentVerse = Integer.valueOf(range[0]);
                endVerseRange = Integer.valueOf(range[1]);
                verseRange = new int[]{currentVerse, endVerseRange};
            }
        }
        return verseRange;
    }

    /**
     * save section (chunk) to file in chapter folder
     *
     * @param chapter
     * @param fileName
     * @param section
     * @return
     */
    private boolean saveSection(String chapter, String fileName, CharSequence section) {
        File chapterFolder = new File(mProjectFolder, chapter);
        try {
            String cleanChunk = removePattern(section, PATTERN_SECTION_MARKER);
            FileUtils.forceMkdir(chapterFolder);
            File output = new File(chapterFolder, fileName + ".txt");
            FileUtils.write(output, cleanChunk);
            return true;
        } catch (Exception e) {
            Logger.e(TAG, "error parsing chapter " + mChapter, e);
            addError(R.string.file_write_for_verse, chapter + "/" + fileName);
            return false;
        }
    }

    /**
     * test if CharSequence is null or empty
     *
     * @param text
     * @return
     */
    private boolean isMissing(CharSequence text) {
        if (null == text) {
            return true;
        }
        return text.length() == 0;
    }

    /**
     * extract chapters from document text (used for splitting by sections)
     *
     * @param text
     * @return
     */
    private boolean extractChaptersFromDocument(CharSequence text) {
        Pattern pattern = PATTERN_CHAPTER_NUMBER_MARKER;
        Matcher matcher = pattern.matcher(text);
        int lastIndex = 0;
        int length = text.length();
        CharSequence chapter;
        mChapter = null;
        while (matcher.find()) {
            chapter = text.subSequence(lastIndex, matcher.start()); // get section before this chapter marker
            extractSectionsFromChapter(chapter);
            mChapter = matcher.group(1); // chapter number for next section
            lastIndex = matcher.end();
            mCurrentChapter = Integer.valueOf(mChapter);

            //estimate number of chapters - doesn't need to be exact
            if (mCurrentChapter > 1) {
                float percentIn = (float) lastIndex / length;
                if (percentIn != 0.0f) {
                    mChaperCount = Math.round((mCurrentChapter - 1) / percentIn);
                    if (mChaperCount < 1) { // sanity checks
                        mChaperCount = 1;
                    } else if (mChaperCount > 250) {
                        mChaperCount = 250;
                    } else if (mChaperCount < mCurrentChapter) {
                        mChaperCount = mCurrentChapter;
                    }

                    updateStatus(R.string.processing_chapter, new Integer(mChaperCount - mCurrentChapter + 1).toString());
                }
            }
        }
        chapter = text.subSequence(lastIndex, text.length()); // get last section
        extractSectionsFromChapter(chapter);
        return true;
    }

    /**
     * extract sections from chapter
     *
     * @param chapter
     */
    private void extractSectionsFromChapter(CharSequence chapter) {
        if (!isMissing(mChapter)) {
            Pattern pattern = PATTERN_SECTION_MARKER;
            Matcher matcher = pattern.matcher(chapter);
            int lastIndex = 0;
            CharSequence section;
            while (matcher.find()) {
                section = chapter.subSequence(lastIndex, matcher.start()); // get section before this chunk marker
                if (lastIndex > 0) { // ignore what's before first section
                    processSection(section);
                }
                lastIndex = matcher.end();
            }
            section = chapter.subSequence(lastIndex, chapter.length()); // get last section
            processSection(section);
        }
    }

    /**
     * extract verses from section
     *
     * @param section
     * @return
     */
    private boolean processSection(CharSequence section) {
        if (!isMissing(section)) {
            String firstVerse = extractString(section, PATTERN_USFM_VERSE_SPAN);
            if (null == firstVerse) {
                addError(R.string.missing_verses_in_section);
                return false;
            }

            saveSection(getChapterFolderName(mChapter), firstVerse, section);
        }
        return true;
    }

    /**
     * match regexPattern and get string in group 1 if present
     *
     * @param text
     * @param regexPattern
     * @return
     */
    private String extractString(CharSequence text, Pattern regexPattern) {
        if (text.length() > 0) {
            // find instance
            Matcher matcher = regexPattern.matcher(text);
            String foundItem = null;
            if (matcher.find()) {
                foundItem = matcher.group(1);
                return foundItem.trim();
            }
        }

        return null;
    }

    /**
     * remove pattern if present in text
     *
     * @param text
     * @param removePattern
     * @return
     */
    private String removePattern(CharSequence text, Pattern removePattern) {
        String out = "";
        Matcher matcher = removePattern.matcher(text);
        int lastIndex = 0;
        while (matcher.find()) {
            out = out + text.subSequence(lastIndex, matcher.start()); // get section before this chunk marker
            lastIndex = matcher.end();
        }
        out = out + text.subSequence(lastIndex, text.length()); // get last section
        return out;
    }

    /**
     * test to see if regex pattern is present in text
     *
     * @param text
     * @param regexPattern
     * @return
     */
    private boolean isPresent(CharSequence text, Pattern regexPattern) {
        if (text.length() > 0) {
            // find instance
            Matcher matcher = regexPattern.matcher(text);
            if (matcher.find()) {
                return true;
            }
        }

        return false;
    }

    /**
     * create the necessary temp folders for unzipped source and output
     */
    private void createTempFolders() {
        mTempDir = new File(AppContext.context().getCacheDir(), System.currentTimeMillis() + "");
        mTempDir.mkdirs();
        mTempSrce = new File(mTempDir, "source");
        mTempSrce.mkdirs();
        mTempOutput = new File(mTempDir, "output");
        mTempOutput.mkdirs();
    }

    /**
     * cleanup working directory and values
     */
    public void cleanup() {
        FileUtils.deleteQuietly(mTempDir);
        mTempDir = null;
        mTempSrce = null;
        mTempOutput = null;
        mTempDest = null;
    }

    /**
     * add file and files in sub-folders to list of files to process
     *
     * @param usfmFile
     * @return
     */
    private boolean addFilesInFolder(File usfmFile) {
        Logger.i(TAG, "processing folder: " + usfmFile.toString());

        if (usfmFile.isDirectory()) {
            File[] usfmSubFiles = usfmFile.listFiles();
            for (File usfmSuile : usfmSubFiles) {
                addFilesInFolder(usfmSuile);
            }
            Logger.i(TAG, "found files: " + usfmSubFiles.toString());
        } else {
            addFile(usfmFile);
        }
        return true;
    }

    /**
     * add file to list of files to process
     *
     * @param usfmFile
     * @return
     */
    private boolean addFile(File usfmFile) {
        Logger.i(TAG, "processing file: " + usfmFile.toString());
        mSourceFiles.add(usfmFile);
        return true;
    }

    public interface OnFinishedListener {
        void onFinished(boolean success);
    }

    public interface UpdateStatusListener {
        void statusUpdate(String textStatus, int percentStatus);
    }

    static JSONArray toJsonFileArray(List<File> array) {
        JSONArray jsonArray = new JSONArray();
        for (File item : array) {
            jsonArray.put(item.toString());
        }
        return jsonArray;
    }

    static List<File> fromJsonArrayToFiles(String jsonStr) {
        try {
            JSONArray jsonArray = new JSONArray(jsonStr);
            return fromJsonArrayToFiles(jsonArray);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    static List<File> fromJsonArrayToFiles(JSONArray jsonArray) throws JSONException {
        List<File> array = new ArrayList<>();

        for (int i = 0; i < jsonArray.length(); i++) {
            String path = jsonArray.getString(i);
            File file = new File(path);
            array.add(file);
        }
        return array;
    }

    static JSONArray toJsonStringArray(List<String> array) {
        JSONArray jsonArray = new JSONArray();
        for (String item : array) {
            jsonArray.put(item);
        }
        return jsonArray;
    }

    static List<String> fromJsonArrayToStrings(String jsonStr) {
        try {
            JSONArray jsonArray = new JSONArray(jsonStr);
            return fromJsonArrayToStrings(jsonArray);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    static List<String> fromJsonArrayToStrings(JSONArray jsonArray) throws JSONException {
        List<String> array = new ArrayList<>();

        for (int i = 0; i < jsonArray.length(); i++) {
            String text = jsonArray.getString(i);
            array.add(text);
        }
        return array;
    }

    static Integer getOptInteger(JSONObject json, String key) {
        return (Integer) getOpt(json,key);
    }

    static Boolean getOptBoolean(JSONObject json, String key) {
        return (Boolean) getOpt(json,key);
    }

    static File getOptFile(JSONObject json, String key) {
        String path = getOptString(json, key);
        if(path != null) {
            return new File(path);
        }
        return null;
    }

    static String getOptString(JSONObject json, String key) {
        Object obj = getOpt(json, key);
        return (String) obj;
    }

    static JSONObject getOptJsonObject(JSONObject json, String key) {
        try {
            Object obj = getOpt(json, key);
            return (JSONObject) obj;
        } catch (Exception e) {
            return new JSONObject();
        }
    }

    static JSONArray getOptJsonArray(JSONObject json, String key) {
        try {
            Object obj = getOpt(json, key);
            return (JSONArray) obj;
        } catch (Exception e) {
            return new JSONArray();
        }
    }

    static Object getOpt(JSONObject json, String key) {
        try {
            if(json.has(key)) {
                Object obj = json.get(key);
                return obj;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
}