PathManager.java example

Explorer

Xponents-master
- Basics
  - src
    - main
      - java
        org
        opensextant
        ConfigException.java
        data
        Country.java
        DocInput.java
        GeoBase.java
        Geocoding.java
        Language.java
        LatLon.java
        Place.java
        Taxon.java
        TextInput.java
        extraction
        ExtractionException.java
        ExtractionMetrics.java
        ExtractionResult.java
        Extractor.java
        MatchFilter.java
        NormalizationException.java
        TextEntity.java
        TextMatch.java
        processing
        Parameters.java
        ProcessingException.java
        util
        AnyFilenameFilter.java
        FileUtility.java
        GeodeticUtility.java
        GeonamesUtility.java
        TextUtils.java
    - test
      - java
        MetricsTest.java
        TestGeoUtils.java
        TestGeonamesLanguages.java
        TestGeonamesMeta.java
        TestTextUtils.java
- Examples
  - src
    - main
      - java
        org
        opensextant
        examples
        BasicGeoTemporalProcessing.java
        TaxonomicTagger.java
        WebCrawl.java
        twitter
        MicroMessage.java
        Tweet.java
        TweetGeocoder.java
- Extraction
  - src
    - main
      - java
        org
        opensextant
        extraction
        SolrMatcherSupport.java
        SolrTaggerRequest.java
        extractors
        geo
        BoundaryObserver.java
        CountryCount.java
        CountryObserver.java
        GazetteerMatcher.java
        GazetteerUpdateProcessorFactory.java
        LocationObserver.java
        PlaceCandidate.java
        PlaceCount.java
        PlaceEvidence.java
        PlaceGeocoder.java
        ScoredPlace.java
        SolrGazetteer.java
        TagFilter.java
        rules
        ContextualOrganizationRule.java
        CoordinateAssociationRule.java
        CountryRule.java
        GeocodeRule.java
        LocationChooserRule.java
        MajorPlaceRule.java
        NameCodeRule.java
        NameRule.java
        NonsenseFilter.java
        PersonNameFilter.java
        ProvinceAssociationRule.java
        xtax
        TaxonMatch.java
        TaxonMatcher.java
        output
        AbstractFormatter.java
        CSVFormatter.java
        FormatterFactory.java
        GDBFormatter.java
        GISDataFormatter.java
        GISDataModel.java
        GeoCSVFormatter.java
        KMLFormatter.java
        OpenSextantSchema.java
        ResultsFormatter.java
        ShapefileFormatter.java
        WKTFormatter.java
        processing
        ResultsUtility.java
        XtractorGroup.java
        progress
        ProgressListener.java
        ProgressMonitor.java
        ProgressMonitorBase.java
        util
        SolrProxy.java
        SolrUtil.java
    - test
      - java
        org
        opensextant
        extractors
        test
        TestExtraction.java
        TestGazFactory.java
        TestGazMatcher.java
        TestGazetteer.java
        TestGazetteerConflationKey.java
        TestPersonFilter.java
        TestPlaceGeocoder.java
        TestPlaceGeocoderLanguages.java
        TestStopFilters.java
        TestUtils.java
        TestXTax.java
- MapReduce
  - src
    - main
      - java
        org
        opensextant
        mapreduce
        AbstractMapper.java
        GeoTaggerMapper.java
        KeywordTaggerMapper.java
        Log4JUtils.java
        LoggingUtilities.java
        XponentsTaggerDemo.java
    - test
      - java
        org
        apache
        solr
        core
        CoreContainer.java
- Patterns
  - src
    - main
      - java
        org
        opensextant
        extractors
        flexpat
        AbstractFlexPat.java
        PatternTestCase.java
        RegexPattern.java
        RegexPatternManager.java
        TextMatchResult.java
        poli
        PatternsOfLife.java
        PoliMatch.java
        PoliPatternManager.java
        TestCase.java
        data
        MACAddress.java
        Money.java
        TelephoneNumber.java
        xcoord
        DMSFilter.java
        DMSOrdinate.java
        GeocoordMatch.java
        GeocoordMatchFilter.java
        GeocoordNormalization.java
        GeocoordPattern.java
        GeocoordPrecision.java
        GeocoordTestCase.java
        Hemisphere.java
        MGRSFilter.java
        MGRSParser.java
        PatternManager.java
        PrecisionScales.java
        UTMParser.java
        XConstants.java
        XCoord.java
        xtemporal
        DateMatch.java
        DateNormalization.java
        DateTimePattern.java
        PatternManager.java
        TestCase.java
        XTConstants.java
        XTemporal.java
    - test
      - java
        org
        opensextant
        extractors
        test
        DateNormalizationTest.java
        PrecisionScalesTest.java
        TestPoLi.java
        TestPoLiReporter.java
        TestXCoord.java
        TestXCoordReporter.java
        TestXTemporal.java
        TestXTemporalReporter.java
- XText
  - examples
  - src
    - main
      - java
        org
        opensextant
        xtext
        Content.java
        ConversionListener.java
        ConvertedDocument.java
        Converter.java
        ExclusionFilter.java
        PathManager.java
        XText.java
        collectors
        ArchiveNavigator.java
        CollectionListener.java
        Collector.java
        mailbox
        DefaultMailCrawl.java
        MailClient.java
        MailConfig.java
        NTLMAuth.java
        OutlookPSTCrawler.java
        sharepoint
        DefaultSharepointCrawl.java
        SPLink.java
        SharepointClient.java
        web
        CrawlFilter.java
        DefaultWebCrawl.java
        HyperLink.java
        WebClient.java
        converters
        ConverterAdapter.java
        DefaultConverter.java
        EmbeddedContentConverter.java
        ImageMetadataConverter.java
        MessageConverter.java
        TextTranscodingConverter.java
        TikaHTMLConverter.java
        WebArchiveConverter.java
    - test
      - java
        org
        opensextant
        xtext
        converters
        test
        MessageConverterTest.java
        test
        Decomposer.java
        ImageGroper.java
        MailClientTest.java
        SharepointClientTest.java
        SharepointCrawlTest.java
        TestPST.java
        TestSPLinks.java
        TestTikaPST.java
        Tests.java
        TextTranscodingTest.java
        WebLinkTest.java
- Xlayer
  - src
    - main
      - java
        org
        opensextant
        xlayer
        Transforms.java
        XlayerClient.java
        server
        RequestParameters.java
        TaggerResource.java
        XlayerApp.java
        xgeo
        XlayerRestlet.java
        XlayerServer.java
        XponentsGeotagger.java
    - test
      - java
        XlayerClientTest.java

package org.opensextant.xtext;

import java.io.File;
import java.io.IOException;
import java.util.Date;

import net.sf.json.JSONObject;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FilenameUtils;
import org.opensextant.ConfigException;
import org.opensextant.util.FileUtility;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * PathManager -- a group of routines related to caching conversions and archive collections.
 * It manages the path decisions given a variety of output parameters and the input object.
 *
 * @author ubaldino
 *
 */
public class PathManager {

    private final Logger log = LoggerFactory.getLogger(getClass());

    /**
     * The folder where conversions are saved.
     */
    private String conversionCache = null;

    /**
     * a Prefix path caller wishes to remove from input files and archives.  This helps shorten paths in the cache.
     */
    private String stripPrefixPath = null;

    /**
     * inputRootName = the name of the input in the output cache and export cache.
     */
    private String inputRootName = null;

    /**
     * Embedded mode
     */
    private boolean saveConversionsWithOriginals = false;

    /**
     * saveExtractedChildrenWithOriginals  - determines how embedded items are archived, e.g., Email attachments, or embedded images.
     * They are children to some parent container -- XText yields two things:  the original child, and the conversion of the child.
     *
     * Example:  a.doc (child) saved from A.eml (parent)
     *
     * saveExtractedChildrenWithOriginals = True;    a is saved in same folder where A exists
     * saveExtractedChildrenWithOriginals = False;   a is saved in a separate output archive.
     */
    private boolean saveExtractedChildrenWithOriginals = false;

    private boolean saving = false;

    /**
     * The overall flag to save converted output or not. DEFAULT: true = save
     * it; provided caller specifies either saveWithInput or provides an
     * archiveRoot
     *
     * @param b true to allow saving/caching conversions
     */
    public void enableSaving(boolean b) {
        saving = b;
    }

    public boolean isSaving() {
        return saving;
    }

    public String getConversionCache() {
        return conversionCache;
    }

    /**
     * Set the prefix that will be removed from the leading part of paths as conversions are cached.
     * Have a long, long file path?  And want to shorten it in your cache... choose this prefix after thinking about it.
     * If you strip too much you may end up with name conflicts or not enough organization to the cached stuff.
     *
     * NOTE: an error/warning is printed only if the prefix does not exist.  This is not an exception or error, as you
     * might get the paths to items from some other method and they may not actually exist physically on disk.
     *
     * @param p a prefix path that would be found in the absolute path of documents being converted.
     */
    public void setStripPrefixPath(String p) {
        stripPrefixPath = p;
        if (p != null) {
            if (!new File(p).exists()) {
                log.error("Warning prefix Path does not exist: {}", p);
            }
        }
    }

    public String getStipPrefixPath() {
        return stripPrefixPath;
    }

    public boolean hasInputRoot() {
        return (inputRootName != null);
    }

    /**
     * From the provided caching parameters set ahead of time, infer the location
     * where this input should be located within the archive, relatively.  This
     * should only be set once at the top level, that is
     * <ul><li>if the call is to convert a single file, set it once for the file.
     * <li>if the call is to convert a folder, set it once.
     * <li>... for archives, etc. set it once!
     * </ul>
     * Do not set it for each file when traversing a folder contents.
     * <br>
     * NOTA BENE: Set conversion cache location first.
     *
     * <pre>
     *   cache =  /output/converted/
     *
     *   input =  /my/original/abc.zip  ==> /output/converted/abc_zip
     *   input =  /my/original/abc.doc  ==> /output/converted/abc.doc.txt
     *   input =  /my/original/abc/     ==> /output/converted/abc/
     *
     *
     *   prefix set, as prefix=/my
     *
     *   input =  /my/original/abc.zip  ==> /output/converted/original/abc_zip
     *   input =  /my/original/abc.doc  ==> /output/converted/original/abc.doc.txt
     *   input =  /my/original/abc/     ==> /output/converted/original/abc/
     *
     *  if saved-in-input, none of this applies.
     * </pre>
     *
     * If you are caching conversions in an archive folder, A, then
     * this generally sets your ouputNode to /A/name/
     *
     * An items saved here will be of the form /A/name/relative/path
     * For an input that came from /some/input/name/relative/path
     * @param input  root of your input which will be crawled
     * @throws IOException if path does not exist or is not a directory
     */
    public void setInputRoot(File input) throws IOException {
        if (!saving) {
            return;
        }

        // Reset globals.
        //
        inputRootName = (input.isDirectory() ? input.getName() : input.getParentFile().getName());

        String appendRoot = (input.isDirectory() ? "." : input.getParentFile().getName());
        outputNode = null;

        if (conversionCache != null) {

            // DEFAULT: for files and when not using strip path, the cache folder is literally used.
            outputNode = conversionCache;

            if (stripPrefixPath != null) {
                File testDir = input.isDirectory() ? input : input.getParentFile();
                outputNode = createPath(conversionCache, this.getStrippedInputPath(testDir));
            } else if (input.isDirectory()) {
                outputNode = createPath(conversionCache, appendRoot);
            }
        }
    }

    /**
     * This enables saving in an archive and disables saving with input.
     *
     * @param root  container where conversions will be cached.
     * @throws IOException  on err
     * @see #enableSaveInCache(boolean)
     * @see #enableSaveWithInput(boolean)
     */
    public void setConversionCache(String root) throws IOException {
        if (root == null) {
            throw new IOException("Archive cannot be null");
        }

        // User tried setting a non-null archive... so implicitly they are not saving with input
        //
        this.enableSaveInCache(true);
        conversionCache = fixPath(root);

        File test = new File(conversionCache);

        if (!test.exists() || !test.isDirectory()) {
            throw new IOException("Archive root directory must exist. Non-existant DIR=" + test);
        }

        conversionCache = test.getAbsolutePath();
    }

    /**
     * Save converted content with input. Xtext creates a new "xtext" folder in
     * the containing folder of the current file. This is disabled if a
     * non-null, pre-existing archive root is set.
     * 
     * <pre>
     * input is:     a/b/c.doc
     * saved as:     a/b/xtext/c.doc.txt
     * 
     * DEFAULT: do not save in input folder
     * </pre>
     *
     * @param b flag, true = save conversions close to given input
     * @see #setConversionCache(java.lang.String)
     */
    public void enableSaveWithInput(boolean b) {
        saveConversionsWithOriginals = b;
    }

    public boolean isSaveWithInput() {
        return saveConversionsWithOriginals;
    }

    /**
     * Experimental.
     *
     * ON by default.  If you have email, for example, folder/A.eml
     * then children will appear at folder/A_eml/child.doc  for some child.doc attachment.
     * Behavior may differ in each case.  But essentially, this flag directs XText to write back to inputRoot
     *
     * Embedded parent/child docs (email, compound docs, etc) are special cases,
     * @param b true if children objects should be extracted and save near input
     */
    public void enableSaveChildrenWithInput(boolean b) {
        saveExtractedChildrenWithOriginals = b;
    }

    /**
     * Saving to an archive specified by the caller; This is inferred if a
     * non-null, pre-existing archive root is set; DEFAULT: do not save in
     * archive.
     * 
     * <pre>
     * input is:   a/b/c.doc
     * output is:  archiveRoot/a/b/c.doc.txt
     * </pre>
     *
     * @param b true if save in cache, not with input.
     * @see #setConversionCache(java.lang.String)
     */
    public void enableSaveInCache(boolean b) {
        saveConversionsWithOriginals = !b;
    }

    private String outputNode;

    /**
     * Caller is responsible for checking null.
     *
     * @param path a path
     * @return trimmed path
     */
    public static String trimLeadingSlash(String path) {
        if (path.length() == 0) {
            return path;
        }
        if (path.charAt(0) == '/') {
            return path.substring(1);
        }
        return path;
    }

    /**
     * Prepares a relative path, stripped of the prefix if one is provided.
     * Otherwise, the input path is returned less a leading slash.
     * @param obj a file.
     * @return stripped path
     */
    public String getStrippedInputPath(File obj) {
        String root = obj.getAbsolutePath();
        if (stripPrefixPath != null && root.startsWith(stripPrefixPath)) {
            root = root.substring(stripPrefixPath.length());
        }
        root = trimLeadingSlash(root);
        return root;
    }

    /**
     * Most of the path mechanics are string-based, rather than file-system based,
     * so path adjustments are best done to be sure all paths from configuration
     * or from inputs should conform to a common convention.  paths will be more like URLs, using
     * "/" as the standard path separator.
     *
     * TODO: commons-io FilenameUtils.normalize()  does not work quite right across platforms. Review, Retest.
     *
     * @param p path
     * @return  fixed path
     */
    protected static String fixPath(String p) {
        if (p == null) {
            return null;
        }
        String relPath = p.replace('\\', '/').replace("/./", "/");

        return relPath.startsWith("./") ? relPath.substring(2) : relPath;
    }

    /**
     * NOTE: Use of File() or FilenameUtils.concat() are OS dependent, here
     * what we want is more like a URL string representation always using /a/b/c/
     * Instead of potentially \ and/or / mixed.
     * @param dir  containing dir
     * @param item  item to save in subfolder
     * @return  path resulting path
     * @throws IOException on err, e.g. permissions or disk full, etc.
     */
    protected static String createPath(String dir, String item) throws IOException {
        File f = new File(String.format("%s/%s", dir, item));
        return fixPath(f.getAbsolutePath());
    }

    private String extractedChildrenCache = null;

    public void setExtractedChildrenCache(String folder) {
        extractedChildrenCache = folder;
    }

    public String getExtractedChildrenCache() {
        return extractedChildrenCache;
    }

    /**
     * Run by XText.setup() to verify path issues.
     *
     * @throws IOException on err
     */
    public void configure() throws IOException {
        if (saving && !this.saveConversionsWithOriginals && this.conversionCache == null) {
            throw new IOException(
                    "If not saving conversions with your input folders, you must provide an archive path");
        }

        if (extractedChildrenCache != null) {
            if (!new File(extractedChildrenCache).exists()) {
                throw new IOException(
                        "If saving child items from archives or PST files, you must create the parent folder first. Dir does not exist:"
                                + extractedChildrenCache);
            }
        }
    }

    /**
     * Wrapper around logic to save a conversion.  Save with input or save in other output folder.
     *
     * @param textDoc converted doc to save
     * @throws IOException on err
     */
    public void saveConversion(ConvertedDocument textDoc) throws IOException {

        log.debug("FILE={}, cache-in={}", textDoc.getFile(), outputNode);

        if (this.saveConversionsWithOriginals) {
            // Saves close to original in ./text/ folder where
            // original resides.
            textDoc.saveEmbedded();
        } else {
            String searchPath = String.format("/%s/", inputRootName);
            textDoc.setPathRelativeTo(searchPath, this.saveExtractedChildrenWithOriginals);
            textDoc.save(outputNode);
        }
    }

    public ConvertedDocument getCachedConversion(File input) throws IOException {
        if (this.saveConversionsWithOriginals) {
            // Uncache a file close to the original F <== ./xtext/F.txt
            return getEmbeddedConversion(input);
        } else if (this.inputRootName != null) {
            // Only if the caller is using the XText API extracText(), then
            // will this work.
            // If user is trying to call convertFile(path) directly all the
            // various optimizations here
            // will not necessarily make sense.
            //
            //
            // Uncache a file in some other tree of archives that aligns
            // with the tree of the original source.
            // .../mine/source/path/F <==== /archive/source/path/F.txt
            return getCachedConversion(this.outputNode, this.inputRootName, input);
        }

        // Either no cache set or item was not converted.
        // Item may have not been converted due to error or simply it was already plain text.
        return null;
    }

    public File getArchiveExportDir(File input) throws ConfigException, IOException {

        String aName = FilenameUtils.getBaseName(input.getName());
        String aExt = FilenameUtils.getExtension(input.getName());
        String outputName = String.format("%s_%s", aName, aExt.toLowerCase());

        // Set output name to input name.  That is, once we extract A.zip to ./(originals)/A_zip/   this de-archived folder will
        // Also exist in ./(converted)/A_zip/  or ./(originals)/A_zip/xtext/ embedded.
        //
        //setOutputNode(inputNode);

        String saveTo = null;
        // unpack, traverse, convert, save
        if (extractedChildrenCache != null) {
            // Save converted items in a parallel archive for this zip archive.
            saveTo = PathManager.createPath(extractedChildrenCache, outputName);
        } else if (this.saveExtractedChildrenWithOriginals) {
            saveTo = PathManager.createPath(input.getParentFile().getAbsolutePath(), outputName);
        } else {
            throw new ConfigException(
                    "Archive Files cannot be dearchived without a target folder to store child binaries");
        }

        File saveFolder = new File(saveTo);
        if (!saveFolder.exists()) {
            FileUtility.makeDirectory(saveFolder);
        }

        log.debug("ARCHIVE FILE={}, node-in={}, cache-in={}, export={}", input, outputName,
                outputNode, saveFolder);

        return saveFolder;
    }

    public boolean verifyArchiveExport(String input) {
        if (!this.saveConversionsWithOriginals && !this.saveExtractedChildrenWithOriginals
                && this.conversionCache == null) {
            log.error(
                    "Sorry -- if not saving in input folder, you must provide a separate "
                            + "archive to contain ZIP and other archives that are extracted.  Ignoring FILE={}",
                            input);
            return false;
        }

        return true;

    }

    public static String DEFAULT_EMBED_FOLDER = "xtext";

    /**
     * This provides some means for retrieving previously converted files. ....
     * to avoid converted them.
     *
     * @param obj item to retrieve from cache
     * @return doc ConvertedDocument from cache, otherwise null
     * @throws IOException  on err
     */
    public static ConvertedDocument getEmbeddedConversion(File obj) throws IOException {

        String cacheFolder = makePath(PathManager.fixPath(obj.getParent()), DEFAULT_EMBED_FOLDER);

        // I now have a path name that was likely the one stored in cache.
        // Return the ConvertedDocument if exists at this path.
        // Otherwise it is not in cache.... so converter must convert and save.
        //
        // This instance finds file:./xtext/F.ext.txt  for a file:./F.ext
        //
        return _uncacheConversion(cacheFolder, obj.getName());
    }

    /**
     * Given file /a/b/c.txt find me just the relative part to some root. That
     * is, for example, if we care more about the b folder regardless of that it
     * is physically located in /a. Perform:<pre>
     *
     * getRelativePath( "/a", "/a/b/c.txt") ===> b/c.txt</pre>
     * @param root prefix path
     * @param p full path to an item.
     * @return relative path wrt root
     */
    public static String getRelativePath(String root, String p) {
        String _path = PathManager.fixPath(p);
        int x = _path.indexOf(root); // Possibly a relative root.
        if (x < 0) {
            return p; // "root" not found in p. No relation between root and path given.
        }
        return trimLeadingSlash(_path.substring(x));
    }

    /**
     * Pass in a folder.  and the name of the object to uncache.
     *
     * @param path  containing folder
     * @param fname  original file name sought
     * @return previously converted document or null if not found.
     * @throws IOException on error, likely from getCachedDocument
     */
    private static ConvertedDocument _uncacheConversion(String path, String fname)
            throws IOException {
        // Common
        String targetPath = null;
        if (fname.endsWith(".txt")) {
            String cachedFile = FilenameUtils.getBaseName(fname);
            targetPath = String.format("%s/%s-utf8.txt", path, cachedFile);
        } else {
            targetPath = String.format("%s/%s.txt", path, fname);
        }
        File target = new File(targetPath);
        if (target.exists()) {
            return getCachedDocument(target);
        }
        return null;
    }

    /**
     * This provides some means for retrieving previously converted files. ....
     * to avoid converted them.  This method takes the arguments and tries to infer the
     * actual location of a cached item.
     * TODO:  For compound documents this needs more work.
     *
     * @param cacheDir  shadow dir or separate archive path
     * @param inputDir  original input folder where this item came from
     * @param obj  the requested file.
     * @return the cached version of the conversion; null if not found or if no conversion was made.
     * @throws IOException on err
     */
    public static ConvertedDocument getCachedConversion(String cacheDir, String inputDir, File obj)
            throws IOException {
        String rel_path = getRelativePath(inputDir, obj.getParentFile().getAbsolutePath());

        // This folder contains the cached Item.
        String cacheFolder = makePath(cacheDir, rel_path);

        // I now have a path name that was likely the one stored in cache.
        // Return the ConvertedDocument if exists at this path.
        // Otherwise it is not in cache.... so converter must convert and save.
        //
        // This instance finds file:/<output-path>/<input-dir-name>/<relative-path-to-file>.txt
        //                 (shorter: /O/D/relpath/file.ext.txt )
        //
        //         for     binary /inputpath/D/relpath/file.ext
        //
        //   you gave me:  C:\data\source\
        //   you said output goes to
        //                 D:\archives\
        //
        //   I found file   C:\data\source\something\file.doc
        //
        //   Which is to be cached at:
        //                 D:\archives\source\something\file.doc.txt
        //                 ^^^^^^^^^^^|inputdir|relpath^^^^^^^^^^^^^^
        //                 outputdir  |        |
        //
        //   IFFF a conversion happened.
        //   If no conversion was made, then the original file is either
        //   unconvertable or it is already valid UTF-8 or ASCII-only text/plain.
        //
        return _uncacheConversion(cacheFolder, obj.getName());
    }

    /**
     * Apache Commons file utils "concat(dir, file)" makes a mess of file names.
     * Java can support "/" equally well on all platforms.
     * there is no apparent need to use platform specific file separators in this context.
     * @param dir containing dir
     * @param fname file name
     * @return full path.
     */
    protected static String makePath(File dir, String fname) {
        return makePath(dir.getAbsolutePath(), fname);
    }

    /**
     * Apache Commons file utils "concat(dir, file)" makes a mess of file names.
     * Java can support "/" equally well on all platforms.
     * there is no apparent need to use platform specific file separators in this context.
     * @param dir containing dir
     * @param fname file name
     * @return full path.
     */
    protected static String makePath(String dir, String fname) {
        return String.format("%s%s%s", dir, ConvertedDocument.UNIVERSAL_PATH_SEP, fname);
    }

    public static String getEmbeddedPath(String container, String item) {
        StringBuilder path = new StringBuilder();
        path.append(container);
        path.append(ConvertedDocument.UNIVERSAL_PATH_SEP);
        path.append(DEFAULT_EMBED_FOLDER);
        path.append(ConvertedDocument.UNIVERSAL_PATH_SEP);
        path.append(item);

        return path.toString();

    }

    public final static String DEFAULT_EMBED_FOLDER_IN_PATH = String.format("/%s/",
            DEFAULT_EMBED_FOLDER);
    public final static String DEFAULT_EMBED_FOLDER_IN_WINPATH = String.format("\\%s\\",
            DEFAULT_EMBED_FOLDER);

    /**
     * Simple test to see if filepath contains "./xtext/" for windows path or unix path.
     * @param filepath path to test
     * @return true if file parent is "/xtext/" or "\xtext\, case sensitive is found anywhere in path.
     */
    public final static boolean isXTextCache(String filepath) {
        if (filepath.contains(DEFAULT_EMBED_FOLDER_IN_PATH)) {
            return true;
        }
        // Less often used:
        if (filepath.contains(DEFAULT_EMBED_FOLDER_IN_WINPATH)) {
            return true;
        }
        return false;
    }

    /**
     * If a File is provided, this only checks the immediate parent folder.
     *
     * @param obj path to test.
     * @return  true if file parent is "xtext", case sensitive.
     */
    public final static boolean isXTextCache(File obj) {
        return DEFAULT_EMBED_FOLDER.equals(obj.getParentFile().getName());
    }

    /**
     * Given a path, retrieve a document.
     *
     * @param filepath file to retireve.
     * @return the cached document
     * @throws IOException on err
     */
    public static ConvertedDocument getCachedDocument(String filepath) throws IOException {
        return getCachedDocument(new File(filepath));
    }

    /**
     * Given a path, retrieve a document parsing out the XText format.
     *
     * @param fconv file conversion path to check
     * @return the cached document, if exists
     * @throws IOException on err
     */
    public static ConvertedDocument getCachedDocument(File fconv) throws IOException {
        String buf = FileUtility.readFile(fconv);
        int x = buf.lastIndexOf("\n\n");

        // Get Base64 encoded header
        String header = buf.substring(x).trim();
        if (!header.startsWith(ConvertedDocument.XT_LABEL)) {
            // NOT an XText cache
            return null;
        }
        // Decode JSON
        String json = new String(Base64.decodeBase64(header.substring(ConvertedDocument.XT_LABEL
                .length())));
        JSONObject doc_meta = JSONObject.fromObject(json);
        String fpath = doc_meta.getString("filepath");

        ConvertedDocument doc = new ConvertedDocument(new File(fpath));
        doc.meta = doc_meta;

        // Set plain text buffer
        doc.buffer = buf.substring(0, x);

        // Retrieve values for useful attrs.
        doc.encoding = doc.getProperty("encoding");
        doc.filepath = fpath; /* note: path should already have been normalized, using "/" */
        doc.filesize = Long.parseLong(doc.getProperty("filesize"));
        doc.textpath = fconv.getAbsolutePath();
        doc.is_cached = true;
        doc.is_converted = true;

        doc.filetime = new Date(Long.parseLong(doc.getProperty("filetime")));
        doc.setCreateDate();

        // DocInput requirement: provided id + file paths
        // If there is another Identifier to use,... caller will have an opportunity to set it
        // when the get the instance.
        //
        String idvalue = doc.meta.optString("xtext_id");
        doc.setId(idvalue != null ? idvalue : doc.filepath);

        return doc;
    }

}