RDFCompare.java example

package org.nines;

import java.io.IOException;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.xml.DOMConfigurator;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;

/**
 * RDF Compare will perform comparisions on the target arcive and the main SOLR index.
 * 
 * @author loufoster
 *
 */
public class RDFCompare {

    private RDFIndexerConfig config;
    private boolean includesText = false;
    private Logger log;
    private Logger txtLog;
    private PrintStream sysOut;
    private LinkedHashMap<String, List<String>> errors = new LinkedHashMap<String, List<String>>();
    private int errorCount = 0;
    private int txtErrorCount = 0;
    private SolrClient solrClient;

    //  private static final ArrayList<String> LARGE_TEXT_ARCHIVES = new ArrayList<String>( Arrays.asList(
    //      "PQCh-EAF", "amdeveryday", "amdecj", "oldBailey" ));

    private static final ArrayList<String> REQUIRED_FIELDS = new ArrayList<String>(Arrays.asList("title_sort", "title",
        "genre", "archive", "url", "federation", "year_sort", "year_sort_asc", "year_sort_desc", "freeculture", "is_ocr"));
    private static final ArrayList<String> REQUIRED_PAGES_FIELDS = new ArrayList<String>(Arrays.asList("text", "page_num", "page_of"));

    /**
     * Construct an instance of the RDFCompare with the specified config
     * @param config
     * @throws IOException 
     */
    public RDFCompare(RDFIndexerConfig config) {
        this.config = config;
        String logFileRoot = this.config.getLogfileBaseName("");
        
        String compareLog = logFileRoot + "_compare.log";
        String skippedLog = logFileRoot + "_skipped.log";
        String compareTxtLog = logFileRoot + "_compare_text.log";

        System.setProperty("compare.log.file", compareLog);
        System.setProperty("compare.text.log.file", compareTxtLog);
        System.setProperty("skipped.log.file", skippedLog);
        URL url = ClassLoader.getSystemResource("log4j-compare.xml");
        DOMConfigurator.configure(url);

        // init logging
        this.log = Logger.getLogger("compare");
        this.txtLog = Logger.getLogger("compareTxt");

        // set up sys out so it can handle utf-8 output
        try {
            this.sysOut = new PrintStream(System.out, true, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            this.sysOut = null;
        }

        // init the solr connection
        this.solrClient = new SolrClient(this.config.solrBaseURL);
    }

    /**
     * Perform the comparison based on the config passed into the c'tor
     */
    public void compareArchive() {

        // log start time
        Date start = new Date();
        this.log.info("Started compare at " + start);
        logInfo("====== Scanning archive \"" + config.archiveName + "\" ====== ");

        // get the list of fields and determine if we are to include text
        String fl = config.getFieldList( );
        if( fl.contains( "text" ) == true ) includesText = true;
        if( fl.equals( "*" ) == true ) includesText = true;

        // Start at beginning of list and return 500 hits at a time
        int page = 0;
        int size = this.config.pageSize;
        List<JsonObject> archiveDocs = new ArrayList<JsonObject>();
        HashMap<String, JsonObject> indexHash = new HashMap<String, JsonObject>();
        Set<String> indexUris = new HashSet<String>();
        Set<String> archiveUris = new HashSet<String>();
        String reindexCore = config.coreName( );

        // When fieldlist includes test, and the archive is one that contains
        // large text fields, limit page size to 1
        //    if ( this.includesText && LARGE_TEXT_ARCHIVES.contains(config.archiveName)) {
        //      size = 1;
        //    }

        // counts for text size
        int totalText = 0;
        int maxTextSize = 0;
        int docsWithText = 0;
        int maxText2 = 0;
        int maxText5 = 0;
        int maxText10 = 0;
        int maxText50 = 0;
        int maxText100 = 0;
        int maxText200 = 0;
        int maxText500 = 0;
        int maxText1000 = 0;
        int maxText2000 = 0;
        int maxText5000 = 0;
        int maxText10000 = 0;
        int runningText2 = 0;
        int runningText5 = 0;
        int runningText10 = 0;
        int runningText50 = 0;
        int runningText100 = 0;
        int runningText200 = 0;
        int runningText500 = 0;
        int runningText1000 = 0;
        int runningText2000 = 0;
        int runningText5000 = 0;
        int runningText10000 = 0;
        int count = 0;
        DecimalFormat df = new DecimalFormat();

        // read a page of docs back from index and archive. Compare the page hits.
        // If comparisons were complete, remove the docs from lists.
        // Repeat til all lists are gone.
        boolean archiveDone = false;
        boolean indexDone = false;
        while ( true ) {
            List<JsonObject> pageHits = null;

            // get hits from archive, tally totals and check for end
            if ( archiveDone == false ) {
                pageHits = this.solrClient.getResultsPage( reindexCore, config.archiveName, page, size, fl, null, null );
                if (pageHits.size() < size) {
                    archiveDone = true;
                }

                // save off the set of uris for the archived docs
                for (JsonObject doc : pageHits) {
                    int thisSize = 0;
                    if (doc.has("text")) {
                        docsWithText++;
                        thisSize = doc.get("text").getAsString().length();
                        totalText += thisSize;
                        if (thisSize > maxTextSize)
                            maxTextSize = thisSize;
                    }
                    runningText2 += thisSize;
                    runningText5 += thisSize;
                    runningText10 += thisSize;
                    runningText50 += thisSize;
                    runningText100 += thisSize;
                    runningText200 += thisSize;
                    runningText500 += thisSize;
                    runningText1000 += thisSize;
                    runningText2000 += thisSize;
                    runningText5000 += thisSize;
                    runningText10000 += thisSize;
                    count++;
                    if (count % 2 == 0) {
                        if (runningText2 > maxText2)
                            maxText2 = runningText2;
                        runningText2 = 0;
                    }
                    if (count % 5 == 0) {
                        if (runningText5 > maxText5)
                            maxText5 = runningText5;
                        runningText5 = 0;
                    }
                    if (count % 10 == 0) {
                        if (runningText10 > maxText10)
                            maxText10 = runningText10;
                        runningText10 = 0;
                    }
                    if (count % 50 == 0) {
                        if (runningText50 > maxText50)
                            maxText50 = runningText50;
                        runningText50 = 0;
                    }
                    if (count % 100 == 0) {
                        if (runningText100 > maxText100)
                            maxText100 = runningText100;
                        runningText100 = 0;
                    }
                    if (count % 200 == 0) {
                        if (runningText200 > maxText200)
                            maxText200 = runningText200;
                        runningText200 = 0;
                    }
                    if (count % 500 == 0) {
                        if (runningText500 > maxText500)
                            maxText500 = runningText500;
                        runningText500 = 0;
                    }
                    if (count % 1000 == 0) {
                        if (runningText1000 > maxText1000)
                            maxText1000 = runningText1000;
                        runningText1000 = 0;
                    }
                    if (count % 2000 == 0) {
                        if (runningText2000 > maxText2000)
                            maxText2000 = runningText2000;
                        runningText2000 = 0;
                    }
                    if (count % 5000 == 0) {
                        if (runningText5000 > maxText5000)
                            maxText5000 = runningText5000;
                        runningText5000 = 0;
                    }
                    if (count % 10000 == 0) {
                        if (runningText10000 > maxText10000)
                            maxText10000 = runningText10000;
                        runningText10000 = 0;
                    }
                    archiveDocs.add(doc);
                    archiveUris.add(doc.get("uri").getAsString());
                }
            }

            // get index docs
            if ( indexDone == false) {
                String core = "resources";
                if ( this.config.isPagesArchive() ) {
                    core = "pages";
                }
                pageHits = this.solrClient.getResultsPage( core, config.archiveName, page, size, fl, null, null );
                if (pageHits.size() < size) {
                    indexDone = true;
                }

                // hash the indexed docs by uri to speed stuff up
                for (JsonObject doc : pageHits) {
                    String uri = doc.get("uri").getAsString();
                    indexHash.put(uri, doc);
                    indexUris.add(uri);
                }
            }

            // compare. This will also remove processed docs from each
            compareLists(indexHash, archiveDocs);

            // next page?
            if ( archiveDone == true && indexDone == true ) {
                break;
            } else {
                page++;
            } 
        }
            
        if (runningText2 > maxText2)
            maxText2 = runningText2;
        if (runningText5 > maxText5)
            maxText5 = runningText5;
        if (runningText10 > maxText10)
            maxText10 = runningText10;
        if (runningText50 > maxText50)
            maxText50 = runningText50;
        if (runningText100 > maxText100)
            maxText100 = runningText100;
        if (runningText200 > maxText200)
            maxText200 = runningText200;
        if (runningText500 > maxText500)
            maxText500 = runningText500;
        if (runningText1000 > maxText1000)
            maxText1000 = runningText1000;
        if (runningText2000 > maxText2000)
            maxText2000 = runningText2000;
        if (runningText5000 > maxText5000)
            maxText5000 = runningText5000;
        if (runningText10000 > maxText10000)
            maxText10000 = runningText10000;

        // if there's stuff left in the archiveDocs, and we are looking at text, dump it
        if (archiveDocs.size() > 0 && this.includesText) {
            this.txtLog.info(" ============================= TEXT ADDED TO ARCHIVE ===========================");
            for (JsonObject doc : archiveDocs) {
                this.txtLog
                    .info("---------------------------------------------------------------------------------------------------------------");
                this.txtLog.info(" --- " + doc.get("uri").getAsString() + " ---");
                if (doc.has("text")) {
                    this.txtLog.info(doc.get("text").getAsString());
                    this.txtErrorCount++;
                }
            }
            this.txtLog
                .info("---------------------------------------------------------------------------------------------------------------");
        }

        // done log some stats
        this.log.info("Total Docs Scanned: " + archiveUris.size() + ". Total Errors: " + this.errorCount + ".");
        this.log.info("  retrieved " + archiveUris.size() + " new objects;");
        this.log.info("  retrieved " + indexUris.size() + " old objects;");
        if (this.includesText) {
            this.txtLog.info("Total Docs Scanned: " + archiveUris.size() + ". Total Errors: " + this.txtErrorCount
                + ".");
        }
        this.txtLog.info("Largest Text Size: " + df.format(maxTextSize) + ".");
        this.txtLog.info("Number of Docs with Text: " + df.format(docsWithText) + ".");
        this.txtLog.info("Total Text Size: " + df.format(totalText) + ".");
        this.txtLog.info("Running Text Sizes:\n2=" + df.format(maxText2) + "\n5=" + df.format(maxText5) + "\n10="
            + df.format(maxText10) + "\n50=" + df.format(maxText50) + "\n100=" + df.format(maxText100) + "\n200="
            + df.format(maxText200) + "\n500=" + df.format(maxText500) + "\n1000=" + df.format(maxText1000) + "\n2000="
            + df.format(maxText2000) + "\n5000=" + df.format(maxText5000) + "\n10000=" + df.format(maxText10000));

        Date end = new Date();
        double durationSec = (end.getTime() - start.getTime()) / 1000.0;
        if (durationSec >= 60) {
            logInfo(String.format("JAVA Finished in %3.2f minutes.", (durationSec / 60.0)));
        } else {
            logInfo(String.format("JAVA Finished in %3.2f seconds.", durationSec));
        }

        // now check for skipped stuff
        doSkippedTest(indexUris, archiveUris);
    }

    private void logErrors() {
        for (Map.Entry<String, List<String>> entry : this.errors.entrySet()) {
            String uri = entry.getKey();
            if (uri.equals("txt")) {
                for (String msg : entry.getValue()) {
                    logInfo(msg);
                }
            } else {
                logInfo("---" + uri + "---");
                for (String msg : entry.getValue()) {
                    logInfo("    " + msg);
                }
            }
        }
        this.errors.clear();
    }

    /**
     * Compare the set of URIs in the index ad archive. List out all new documents and
     * all old. Show a skipped count (skipped is a doc in the original index, but not 
     * the archive 
     * @param indexUris Set uf URIs from the main index
     * @param archiveUris List of SolrDocuments in the index
     */
    private void doSkippedTest(Set<String> indexUris, Set<String> archiveUris) {

        // set up logger just for skipped files
        Logger skippedLog = Logger.getLogger("skipped");

        Date started = new Date();
        skippedLog.info("Started: " + started);
        skippedLog.info("====== Scanning archive \"" + config.archiveName + "\" ====== ");
        skippedLog.info("retrieved " + archiveUris.size() + " new objects;");
        skippedLog.info("retrieved " + indexUris.size() + " old objects;");

        Set<String> oldOnly = new HashSet<String>(indexUris);
        oldOnly.removeAll(archiveUris);
        archiveUris.removeAll(indexUris);
        for (String uri : oldOnly) {
            skippedLog.info("    Old: " + uri);
        }
        for (String uri : archiveUris) {
            skippedLog.info("    New: " + uri);
        }

        skippedLog.info("Total not indexed: " + oldOnly.size() + ". Total new: " + archiveUris.size() + ".");
    }

    /**
     * Scan thru each document in the archive and find differences 
     * @param indexHash List of all original docs in the index
     * @param archiveDocs List of docs in the reindexed archive
     * @throws Exception
     */
    private void compareLists(HashMap<String, JsonObject> indexHash, List<JsonObject> archiveDocs) {

        // Run thru al items in new archive. Validate correct data
        // and compare against object in original index if possible
        Iterator<JsonObject> itr = archiveDocs.iterator();
        while (itr.hasNext()) {

            // look up the corresponding object in the original index
            JsonObject doc = itr.next();
            String uri = doc.get("uri").getAsString();
            JsonObject indexDoc = indexHash.get(uri);

            // If we have matches do the work
            if (indexDoc != null) {
                // On full compares, validaate all required
                // fields are present and contain content
                if (this.config.ignoreFields.length() == 0 && this.config.includeFields.equals("*")) {
                    validateRequiredFields(doc);
                }

                // comapre all fields
                try {
                    compareFields(uri, indexDoc, doc);
                } catch (Exception e) {
                    addError(uri, "Threw exception during compareFields: "+e.toString() );
                    StringWriter sw = new StringWriter();
                    e.printStackTrace( new PrintWriter(sw) );
                    addError(uri, "Stack Trace:\n\n"+sw.toString());
                }

                // dump results
                logErrors();

                // done with them
                indexHash.remove(uri);
                itr.remove();
            }
        }
    }

    /**
     * Walk through each field in the new doc and compare it with the
     * old. Log any differences. 
     * @param uri
     * @param indexDoc
     * @param doc
     */
    private void compareFields(String uri, JsonObject indexDoc, JsonObject doc) {

        // loop over all keys in doc
        for (Entry<String, JsonElement> entry : doc.entrySet()) {

            // get key and do special handing for text fields
            String key = entry.getKey();
            if (key.equals("text")) {
                compareText(uri, indexDoc, doc);
                continue;
            }

            // grab new val
            String newVal = toSolrString(entry.getValue());

            // is this a new key?
            if (indexDoc.has(key) == false) {
                if (isIgnoredNewField(key) == false) {
                    addError(uri, key + " " + newVal.replaceAll("\n", " / ") + " introduced in reindexing.");
                }
                continue;
            }

            // get parallel val in indexDoc
            String oldVal = toSolrString(indexDoc.get(key));

            // dump the key from indexDoc so we can detect
            // unindexed values later
            indexDoc.remove(key);

            // don't compare score or indexing dates.
            if (key.equals("score") || key.equals("date_updated") || key.equals("date_created") || key.equals("_version_") ) {
                continue;
            }

            // difference?
            if (newVal.equals(oldVal) == false) {

                // make sure everything is escaped and check again.
                String escapedOrig = getProcessedOrigField(oldVal);
                String escapedNew = getProcessedReindexedField(newVal);
                if (escapedNew.equals(escapedOrig) == false) {

                    // too long to dump in a single error line?
                    if (oldVal.length() > 30) {

                        // log a summary
                        addError(uri,
                            key + " mismatched: length= " + newVal.length() + " (new)" + " vs. " + oldVal.length()
                                + " (old)", true);

                        // then find first diff and log it
                        String[] oldArray = oldVal.split("\n");
                        String[] newArray = newVal.split("\n");
                        for (int i = 0; i <= oldArray.length; i++) {
                            if (oldArray[i].equals(newArray[i]) == false) {

                                addError(uri,
                                    "        at line " + i + ":\n" + "\"" + newArray[i].replaceAll("\n", " / ")
                                        + "\" vs.\n" + "\"" + oldArray[i].replaceAll("\n", " / ") + "\"", true);
                                break;
                            }
                        }

                    } else {

                        // dump the entire diff to the log
                        addError(uri, key + " mismatched: \"" + newVal.replaceAll("\n", " / ") + "\" (new)" + " vs. \""
                            + oldVal.replaceAll("\n", " / ") + "\" (old)");
                    }

                }
            }
        }

        // now see if there are any leftover fields in indexDoc
        // log them is not reindexed
        for (Entry<String, JsonElement> entry : indexDoc.entrySet()) {
            String val = toSolrString(entry.getValue());
            String key = entry.getKey();
			if (isIgnoredOldField(key) == false) {
            	if (val.length() > 100) {
                	val = val.substring(0, 100);
            	}
            	addError(uri, "Key not reindexed: " + key + "=" + val, true);
			}
        }
    }

    /**
     * Convert an Entry contaning solr data to a string
     * @param obj
     * @return The string data represented by the object
     */
    private final String toSolrString(final JsonElement obj) {
        if (obj.isJsonArray()) {

            JsonArray jsonArray = (JsonArray) obj;
            Iterator<JsonElement> itr = jsonArray.iterator();
            StringBuilder out = new StringBuilder();
            while (itr.hasNext()) {
                if (out.length() > 0) {
                    out.append(" | ");
                }
                out.append(itr.next().getAsString());
            }
            return out.toString();
        }
        return obj.getAsString();
    }

    /**
     * Compare just the TEXT field of the index and archive docs
     * @param uri
     * @param indexDoc
     * @param doc
     */
    private void compareText(String uri, JsonObject indexDoc, JsonObject doc) {

        String newTxt = null;
        if (doc.has("text")) {
            newTxt = doc.get("text").getAsString();
        }
        String oldTxt = null;
        if (indexDoc.has("text")) {
            oldTxt = indexDoc.get("text").getAsString();
            indexDoc.remove("text");
        }

        // log additional errors if no new text and doc is flagged
        // such that it must have text (ocr or full text)
        boolean compareTexts = true;
        if ( this.config.isPagesArchive() ) {
            if (newTxt == null ) {
                this.txtLog.error(uri + ": is page data, but is missing page text in the new index.");
                this.txtErrorCount++;
                compareTexts = false;
            } 
            if ( oldTxt == null ) {
                this.txtLog.error(uri + ": is page data, but is missing page text in the pages core.");
                this.txtErrorCount++;
                compareTexts = false;
            }
        } else {
            if (newTxt == null) {
                String val = doc.get("has_full_text").toString();
                if (val.equalsIgnoreCase("false")) {
                    this.txtLog.error(uri + ": field has_full_text is " + val + " but full text does not exist.");
                    this.txtErrorCount++;
                    compareTexts = false;
                }
    
                val = doc.get("is_ocr").toString();
                if (val.equalsIgnoreCase("false")) {
                    this.txtLog.error(uri + ": field is_ocr is " + val + " but full text does not exist.");
                    this.txtErrorCount++;
                    compareTexts = false;
                }
            }
    
            if (newTxt == null && oldTxt != null) {
                this.txtLog.error(uri + ":text field has disappeared from the new index. (old text size = "
                    + oldTxt.length());
                this.txtErrorCount++;
                compareTexts = false;
            } else if (newTxt != null && oldTxt == null) {
                this.txtLog.error(uri + ":text field has appeared in the new index.");
                this.txtErrorCount++;
                compareTexts = false;
            }
        }
        
        if ( compareTexts ) {
            if (newTxt.equals(oldTxt) == false) {
            
                newTxt = getProcessedReindexedText(newTxt);
                oldTxt = getProcessedOrigText(oldTxt);
    
                if (oldTxt.equals(newTxt) == false) {
                    logMismatchedText(uri, oldTxt, newTxt);
                }
            }
        }
    }

    private void logMismatchedText(final String uri, final String oldTxt, final String newTxt) {
        int pos = StringUtils.indexOfDifference(newTxt, oldTxt);
        pos = Math.max(0, pos - 4);
        String newSub = newTxt.substring(pos, Math.min(pos + 51, newTxt.length()));
        String oldSub = oldTxt.substring(pos, Math.min(pos + 51, oldTxt.length()));
        this.txtLog.error("==== " + uri + " mismatch at line 0 col " + pos + ":");
        this.txtLog.error("(new " + newTxt.length() + ")");
        this.txtLog.error(newSub);
        this.txtLog.error("-- vs --");
        this.txtLog.error("(old " + oldTxt.length() + ")");
        this.txtLog.error(oldSub);
        this.txtLog.error("NEW: " + getBytesString(newSub));
        this.txtLog.error("OLD: " + getBytesString(oldSub));
        this.txtErrorCount++;
    }

    private String getBytesString(String text) {
        try {
            byte[] bytes = text.getBytes("UTF-8");
            StringBuffer hexStr = new StringBuffer();
            for (int i = 0; i < bytes.length; i++) {
                hexStr.append(Integer.toString(0xFF & bytes[i])).append(" ");
                if (hexStr.length() > 45)
                    break;
            }
            return hexStr.toString();
        } catch (Exception e) {
            addError("txt", "Invalid bytes in text: " + e.getMessage());
            return "** ERROR **";
        }
    }

    private String getProcessedOrigField(String origVal) {
        return removeExtraWhiteSpace(origVal);
    }

    private String getProcessedReindexedField(String origVal) {
        return removeExtraWhiteSpace(origVal);
    }

    private String getProcessedOrigText(String origTxt) {
        String val = origTxt.replaceAll("““", "“");
        val = val.replaceAll("””", "””");
        val = val.replaceAll("††", "†");
        val = val.replaceAll("\\—+", "—");
        return removeExtraWhiteSpace(val);
    }

    private String getProcessedReindexedText(String srcTxt) {
        String val = srcTxt.replaceAll("““", "“");
        val = val.replaceAll("””", "””");
        val = val.replaceAll("††", "†");
        val = val.replaceAll("\\—+", "—");
        return removeExtraWhiteSpace(val);
    }

    private String removeExtraWhiteSpace(final String srcTxt) {
        String result = srcTxt.replaceAll("\t", " "); // change tabs to spaces
        result = result.replaceAll("\\s+", " "); // get rid of multiple spaces
        result = result.replaceAll(" \n", "\n"); // get rid of trailing spaces
        result = result.replaceAll("\n ", "\n"); // get rid of leading spaces
        result = result.replaceAll("\\n+", " "); // get rid of lines
        return result;
    }

    /**
     * EXCEPTION case. Dont whine about fields we know are newly added
     * @param key
     * @return
     */
    private boolean isIgnoredNewField(String key) {
        if (key.equals("date_created") || key.equals("date_updated")) {
            return true;
        }
        return false;
    }

    private boolean isIgnoredOldField(String key) {
        if (key.equals("batch")) {
            return true;
        }
        return false;
    }

    private void addError(String uri, String err) {
        addError(uri, err, false);
    }

    private void addError(String uri, String err, boolean tail) {
        if (this.errors.containsKey(uri) == false) {
            this.errors.put(uri, new ArrayList<String>());
        }

        if (uri.equals("txt") || tail) {
            this.errors.get(uri).add(err);
        } else {
            this.errors.get(uri).add(0, err);
        }

        if (uri.equals("txt") == false) {
            this.errorCount++;
        }
    }

    /**
     * Ensure that all required fields are present and contain data
     * @param doc Document XML data
     * @throws Exception
     */
    private void validateRequiredFields(JsonObject doc) {

        ArrayList<String> reqFields = REQUIRED_FIELDS;
        if ( this.config.isPagesArchive()) {
            reqFields = REQUIRED_PAGES_FIELDS;
        }
        for (String fieldName : reqFields ) {

            // find the first element in the correct doc that
            // has a name attribute matching the  required field
            String uri = doc.get("uri").getAsString();
            Object docField = doc.get(fieldName);

            // make sure field is present
            if (docField == null) {

                addError(uri, "required field: " + fieldName + " missing in new index");

            } else {

                // if its an array, make sure it has children
                // and that the concatenated children content has length
                if (docField instanceof List) {
                    @SuppressWarnings("unchecked")
                    List<String> list = (List<String>) docField;
                    String val = "";
                    for (String data : list) {
                        val += data;
                    }
                    if (val.length() == 0) {
                        addError(uri, "required ARR field: " + fieldName + " is all spaces in new index");
                    }
                } else {
                    if (docField.toString().trim().length() == 0) {
                        addError(uri, "required STR field: " + fieldName + " is all spaces in new index");
                    }
                }
            }
        }
    }

    /**
     * Log data to file and System.out
     * @param msg
     */
    private void logInfo(final String msg) {
        log.info(msg);
        if (this.sysOut != null) {
            this.sysOut.println(msg);
        } else {
            System.out.println(msg);
        }

    }
}