HTSCARTReader.java example

Explorer
marytts-master
/**   
 *           The HMM-Based Speech Synthesis System (HTS)             
 *                       HTS Working Group                           
 *                                                                   
 *                  Department of Computer Science                   
 *                  Nagoya Institute of Technology                   
 *                               and                                 
 *   Interdisciplinary Graduate School of Science and Engineering    
 *                  Tokyo Institute of Technology                    
 *                                                                   
 *                Portions Copyright (c) 2001-2006                       
 *                       All Rights Reserved.
 *                         
 *              Portions Copyright 2000-2007 DFKI GmbH.
 *                      All Rights Reserved.                  
 *                                                                   
 *  Permission is hereby granted, free of charge, to use and         
 *  distribute this software and its documentation without           
 *  restriction, including without limitation the rights to use,     
 *  copy, modify, merge, publish, distribute, sublicense, and/or     
 *  sell copies of this work, and to permit persons to whom this     
 *  work is furnished to do so, subject to the following conditions: 
 *                                                                   
 *    1. The source code must retain the above copyright notice,     
 *       this list of conditions and the following disclaimer.       
 *                                                                   
 *    2. Any modifications to the source code must be clearly        
 *       marked as such.                                             
 *                                                                   
 *    3. Redistributions in binary form must reproduce the above     
 *       copyright notice, this list of conditions and the           
 *       following disclaimer in the documentation and/or other      
 *       materials provided with the distribution.  Otherwise, one   
 *       must contact the HTS working group.                         
 *                                                                   
 *  NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSTITUTE OF TECHNOLOGY,   
 *  HTS WORKING GROUP, AND THE CONTRIBUTORS TO THIS WORK DISCLAIM    
 *  ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL       
 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   
 *  SHALL NAGOYA INSTITUTE OF TECHNOLOGY, TOKYO INSTITUTE OF         
 *  TECHNOLOGY, HTS WORKING GROUP, NOR THE CONTRIBUTORS BE LIABLE    
 *  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY        
 *  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,  
 *  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS   
 *  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR          
 *  PERFORMANCE OF THIS SOFTWARE.                                    
 *                                                                   
 */
package marytts.cart.io;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.Scanner;
import java.util.StringTokenizer;

import marytts.cart.CART;
import marytts.cart.DecisionNode;
import marytts.cart.LeafNode;
import marytts.cart.Node;
import marytts.cart.DecisionNode.BinaryByteDecisionNode;
import marytts.cart.LeafNode.PdfLeafNode;
import marytts.exceptions.MaryConfigurationException;
import marytts.features.FeatureDefinition;
import marytts.htsengine.PhoneTranslator;
import marytts.htsengine.HMMData.PdfFileFormat;
import marytts.util.MaryUtils;

import org.apache.log4j.Logger;

/**
 * Reader functions for CARTs in HTS format
 * 
 * @author Marcela Charfuelan
 */
public class HTSCARTReader {

	private FeatureDefinition featDef;
	private PhoneTranslator phTrans;
	private Logger logger = MaryUtils.getLogger("HTSCARTReader");
	private int vectorSize; // the vector size of the mean and variance on the leaves of the tree.

	public int getVectorSize() {
		return vectorSize;
	}

	/**
	 * Load the cart from the given file
	 * 
	 * @param numStates
	 *            number of states in the HTS model, it will create one cart tree per state.
	 * @param treeStream
	 *            the HTS tree text file, example tree-mgc.inf.
	 * @param pdfStream
	 *            the corresponding HTS pdf binary file, example mgc.pdf.
	 * @param fileFormat
	 *            fileFormat
	 * @param featDefinition
	 *            the feature definition
	 * @param phTranslator
	 *            a phone translator
	 * @return the size of the mean and variance vectors on the leaves.
	 * @throws IOException
	 *             if a problem occurs while loading
	 * @throws MaryConfigurationException
	 *             MaryConfigurationException
	 */
	public CART[] load(int numStates, InputStream treeStream, InputStream pdfStream, PdfFileFormat fileFormat,
			FeatureDefinition featDefinition, PhoneTranslator phTranslator) throws IOException, MaryConfigurationException {

		featDef = featDefinition;
		// phTrans = phoneTranslator;
		int i, j, length, state;
		BufferedReader s = null;
		String line, aux;

		phTrans = phTranslator;

		// create the number of carts it is going to read
		CART treeSet[] = new CART[numStates];
		for (i = 0; i < numStates; i++)
			treeSet[i] = new CART();

		// First load pdfs, so when creates the tree fill the leaf nodes with
		// the corresponding mean and variances.
		/**
		 * load pdf's, mean and variance pdfs format : pdf[numStates][numPdfs][numStreams][2*vectorSize]
		 * ------------------------------------------------------------------- for dur : pdf[ 1 ][numPdfs][ 1 ][2*numStates ] for
		 * mgc,str,mag: pdf[numStates][numPdfs][ 1 ][2*vectorSize]; for joinModel : pdf[ 1 ][numPdfs][ 1 ][2*vectorSize]; for lf0
		 * : pdf[numStates][numPdfs][numStreams][ 4 ] for gv-switch : pdf[ 1 ][ 1 ][ 1 ][ 1 ]
		 * ------------------------------------------------------------------ - numPdf : corresponds to the unique leaf node id. -
		 * 2*vectorSize : means that mean and variance are in the same vector. - 4 in lf0 : means 0: mean, 1: variance, 2: voiced
		 * weight and 3: unvoiced weight ------------------------------------------------------------------
		 */
		double pdf[][][][];
		pdf = loadPdfs(numStates, pdfStream, fileFormat);

		assert featDefinition != null : "Feature Definition was not set";

		/* read lines of tree-*.inf fileName */
		s = new BufferedReader(new InputStreamReader(treeStream, "UTF-8"));

		// skip questions section
		while ((line = s.readLine()) != null) {
			if (line.indexOf("QS") < 0)
				break; /* a new state is indicated by {*}[2], {*}[3], ... */
		}

		while ((line = s.readLine()) != null) {
			if (line.indexOf("{*}") >= 0) { /* this is the indicator of a new state-tree */
				aux = line.substring(line.indexOf("[") + 1, line.indexOf("]"));
				state = Integer.parseInt(aux);
				// loads one cart tree per state
				treeSet[state - 2].setRootNode(loadStateTree(s, pdf[state - 2]));

				// Now count all data once, so that getNumberOfData()
				// will return the correct figure.
				if (treeSet[state - 2].getRootNode() instanceof DecisionNode)
					((DecisionNode) treeSet[state - 2].getRootNode()).countData();

				logger.debug("load: CART[" + (state - 2) + "], total number of nodes in this CART: "
						+ treeSet[state - 2].getNumNodes());
			}
		} /* while */
		if (s != null)
			s.close();

		/* check that the tree was correctly loaded */
		if (treeSet.length == 0) {
			throw new IOException("LoadTreeSet: error no trees loaded");
		}

		return treeSet;

	}

	/**
	 * Load a tree per state
	 * 
	 * @param s
	 *            : text scanner of the whole tree-*.inf file
	 * @param pdf
	 *            : the pdfs for this state, pdf[numPdfs][numStreams][2*vectorSize]
	 */
	private Node loadStateTree(BufferedReader s, double pdf[][][]) throws IOException, MaryConfigurationException {

		Node rootNode = null;
		Node lastNode = null;

		StringTokenizer sline;
		String aux, buf;

		// create an empty binary decision node with unique id=0, this will be the rootNode
		Node nextNode = new DecisionNode.BinaryByteDecisionNode(0, featDef);

		// this is the rootNode
		rootNode = nextNode;
		nextNode.setIsRoot(true);

		int iaux, feaIndex, ndec, nleaf;
		ndec = 0;
		nleaf = 0;
		Node node = null;
		aux = s.readLine(); /* next line for this state tree must be { */
		int id;

		if (aux.indexOf("{") >= 0) {
			while ((aux = s.readLine()) != null && aux.indexOf("}") < 0) { /* last line for this state tree must be } */
				/* then parse this line, it contains 4 fields */
				/* 1: node index # 2: Question name 3: NO # node 4: YES # node */
				sline = new StringTokenizer(aux);

				/* 1: gets index node and looks for the node whose idx = buf */
				buf = sline.nextToken();
				if (buf.startsWith("-")) {
					id = Integer.parseInt(buf.substring(1));
					ndec++;
				} else if (buf.contentEquals("0"))
					id = 0;
				else
					throw new MaryConfigurationException("LoadStateTree: line does not start with a decision node (-id), line="
							+ aux);
				// 1. find the node in the tree, it has to be already created.
				node = findDecisionNode(rootNode, id);

				if (node == null)
					throw new MaryConfigurationException("LoadStateTree: Node not found, index = " + buf);
				else {
					/* 2: gets question name and question name val */
					buf = sline.nextToken();
					String[] fea_val = buf.split("="); /* splits featureName=featureValue */
					feaIndex = featDef.getFeatureIndex(fea_val[0]);

					/* Replace back punctuation values */
					/* what about tricky phones, if using halfphones it would not be necessary */
					if (fea_val[0].contentEquals("sentence_punc") || fea_val[0].contentEquals("prev_punctuation")
							|| fea_val[0].contentEquals("next_punctuation")) {
						// System.out.print("CART replace punc: " + fea_val[0] + " = " + fea_val[1]);
						fea_val[1] = phTrans.replaceBackPunc(fea_val[1]);
						// System.out.println(" --> " + fea_val[0] + " = " + fea_val[1]);
					} else if (fea_val[0].contains("tobi_")) {
						// System.out.print("CART replace tobi: " + fea_val[0] + " = " + fea_val[1]);
						fea_val[1] = phTrans.replaceBackToBI(fea_val[1]);
						// System.out.println(" --> " + fea_val[0] + " = " + fea_val[1]);
					} else if (fea_val[0].contains("phone")) {
						// System.out.print("CART replace phone: " + fea_val[0] + " = " + fea_val[1]);
						fea_val[1] = phTrans.replaceBackTrickyPhones(fea_val[1]);
						// System.out.println(" --> " + fea_val[0] + " = " + fea_val[1]);
					}

					// add featureName and featureValue to the decision nod
					((BinaryByteDecisionNode) node).setFeatureAndFeatureValue(fea_val[0], fea_val[1]);

					// add NO and YES indexes to the daughther nodes
					/* NO index */
					buf = sline.nextToken();
					if (buf.startsWith("-")) { // Decision node
						iaux = Integer.parseInt(buf.substring(1));
						// create an empty binary decision node with unique id
						BinaryByteDecisionNode auxnode = new DecisionNode.BinaryByteDecisionNode(iaux, featDef);
						((DecisionNode) node).replaceDaughter(auxnode, 1);
					} else { // LeafNode
						iaux = Integer.parseInt(buf.substring(buf.lastIndexOf("_") + 1, buf.length() - 1));
						// create an empty PdfLeafNode
						PdfLeafNode auxnode = new LeafNode.PdfLeafNode(iaux, pdf[iaux - 1]);
						((DecisionNode) node).replaceDaughter(auxnode, 1);
						nleaf++;
					}

					/* YES index */
					buf = sline.nextToken();
					if (buf.startsWith("-")) { // Decision node
						iaux = Integer.parseInt(buf.substring(1));
						// create an empty binary decision node with unique id=0
						BinaryByteDecisionNode auxnode = new DecisionNode.BinaryByteDecisionNode(iaux, featDef);
						((DecisionNode) node).replaceDaughter(auxnode, 0);
					} else { // LeafNode
						iaux = Integer.parseInt(buf.substring(buf.lastIndexOf("_") + 1, buf.length() - 1));
						// create an empty PdfLeafNode
						PdfLeafNode auxnode = new LeafNode.PdfLeafNode(iaux, pdf[iaux - 1]);
						((DecisionNode) node).replaceDaughter(auxnode, 0);
						nleaf++;
					}
				} /* if node not null */
				sline = null;
			} /* while there is another line and the line does not contain } */
		} /* if not "{" */

		logger.debug("loadStateTree: loaded CART contains " + (ndec + 1) + " Decision nodes and " + nleaf + " Leaf nodes.");
		return rootNode;

	} /* method loadTree() */

	/**
	 * @param node
	 *            , decision node
	 * @param numId
	 *            , index to look for.
	 * @return node if node is instance of decision node, aux if aux != null, return aux otherwise
	 */
	private Node findDecisionNode(Node node, int numId) {

		Node aux = null;

		if (node instanceof DecisionNode) {
			// System.out.print(" id=" + ((DecisionNode)node).getUniqueDecisionNodeId());
			if (((DecisionNode) node).getUniqueDecisionNodeId() == numId)
				return node;
			else {
				for (int i = 0; i < ((DecisionNode) node).getNumberOfDaugthers(); i++) {
					aux = findDecisionNode(((DecisionNode) node).getDaughter(i), numId);
					if (aux != null)
						return aux;
				}
			}
		}
		return aux;

	} /* method findDecisionNode */

	/**
	 * Load pdf's, mean and variance the #leaves corresponds to the unique leaf node id pdf -->
	 * [#states][#leaves][#streams][vectorsize] The format of pdf files for mgc, str or mag is: header: 4 byte int: dimension
	 * feature vector 4 byte int: # of leaf nodes for state 1 4 byte int: # of leaf nodes for state 2 ... 4 byte int: # of leaf
	 * nodes for state N probability distributions: 4 byte float means and variances (2*pdfVsize): all leaves for state 1 4 byte
	 * float means and variances (2*pdfVsize): all leaves for state 2 ... 4 byte float means and variances (2*pdfVsize): all
	 * leaves for state N --------------------------------------------------------------------- The format of pdf files for dur
	 * and JoinModeller is: header: 4 byte int: # of HMM states <-- this is the dimension of vector in duration 4 byte int: # of
	 * leaf nodes for state 1 <-- dur has just one state probability distributions: 4 byte float means and variances (2*HMMsize):
	 * all leaves for state 1 --------------------------------------------------------------------- The format of pdf files for
	 * lf0 is: header: 4 byte int: dimension feature vector 4 byte int: # of leaf nodes for state 1 4 byte int: # of leaf nodes
	 * for state 2 ... 4 byte int: # of leaf nodes for state N probability distributions: 4 byte float mean, variance, voiced,
	 * unvoiced (4 floats): stream 1..S, leaf 1..L, state 1 4 byte float mean, variance, voiced, unvoiced (4 floats): stream 1..S,
	 * leaf 1..L, state 2 ... 4 byte float mean, variance, voiced, unvoiced (4 floats): stream 1..S, leaf 1..L, state N
	 */
	private double[][][][] loadPdfs(int numState, InputStream pdfStream, PdfFileFormat fileFormat) throws IOException,
			MaryConfigurationException {

		DataInputStream data_in;
		int i, j, k, l, numDurPdf, lf0Stream;
		double vw, uvw;
		int vsize;
		int numPdf[];
		int numStream;
		int numMSDFlag; /* MSD: Multi stream dimensions: in case of lf0 for example */
		double pdf[][][][] = null; // pdf[numState][numPdf][stream][vsize];

		// TODO: how to make this loading more general, different files have different formats. Right now the way
		// of loading depends on the name of the file, I need to change that!
		// pdfFileName.contains("dur.pdf") || pdfFileName.contains("joinModeller.pdf")
		if (fileFormat == PdfFileFormat.dur || fileFormat == PdfFileFormat.join) {
			/* ________________________________________________________________ */
			/*-------------------- load pdfs for duration --------------------*/
			data_in = new DataInputStream(new BufferedInputStream(pdfStream));
			logger.debug("loadPdfs reading model of type " + fileFormat);

			/* read the number of states & the number of pdfs (leaf nodes) */
			/* read the number of HMM states, this number is the same for all pdf's. */

			numMSDFlag = data_in.readInt();
			numStream = data_in.readInt();
			vectorSize = data_in.readInt();
			// ---vectorSize = numState;
			// System.out.println("loadPdfs: nstate = " + nstate);

			numState = numStream;

			/* check number of states */
			if (numState < 0)
				throw new MaryConfigurationException("loadPdfs: #HMM states must be positive value.");

			/* read the number of duration pdfs */
			numDurPdf = data_in.readInt();
			logger.debug("loadPdfs: numPdf[state:0]=" + numDurPdf);

			/* Now we know the number of duration pdfs and the vector size which is */
			/* the number of states in each HMM. Here the vector size is 2*nstate because */
			/* the first nstate correspond to the mean and the second nstate correspond */
			/* to the diagonal variance vector, the mean and variance are copied here in */
			/* only one vector. */
			/* 2*nstate because the vector size for duration is the number of states */
			pdf = new double[1][numDurPdf][1][2 * numState]; // just one state and one stream
			vsize = (2 * numState);
			/* read pdfs (mean & variance) */
			// NOTE: Here (hts_engine v1.04) the order is different as before, here mean and variance are saved consecutively
			for (i = 0; i < numDurPdf; i++) {
				for (j = 0; j < numState; j++) {
					pdf[0][i][0][j] = data_in.readFloat(); // read mean
					pdf[0][i][0][j + numState] = data_in.readFloat(); // read variance
					// System.out.println("durpdf[" + i + "]" + "[" + j + "]:" + pdf[0][i][0][j]);
				}
			}
			data_in.close();
			data_in = null;

		} else if (fileFormat == PdfFileFormat.lf0) { // pdfFileName.contains("lf0.pdf")
			/* ____________________________________________________________________ */
			/*-------------------- load pdfs for Log F0 --------------*/
			data_in = new DataInputStream(new BufferedInputStream(pdfStream));
			logger.debug("loadPdfs reading model of type " + fileFormat);
			/* read the number of streams for f0 modeling */
			// lf0Stream = data_in.readInt();
			// vectorSize = lf0Stream;
			numMSDFlag = data_in.readInt();
			numStream = data_in.readInt();
			vectorSize = data_in.readInt();

			lf0Stream = numStream;
			// System.out.println("loadPdfs: lf0stream = " + lf0stream);

			if (lf0Stream < 0)
				throw new MaryConfigurationException("loadPdfs:  #stream for log f0 part must be positive value.");

			/* read the number of pdfs for each state position */
			pdf = new double[numState][][][];
			numPdf = new int[numState];
			for (i = 0; i < numState; i++) {
				numPdf[i] = data_in.readInt();
				logger.debug("loadPdfs: numPdf[state:" + i + "]=" + numPdf[i]);
				if (numPdf[i] < 0)
					throw new MaryConfigurationException("loadPdfs: #lf0 pdf at state " + i + " must be positive value.");
				// System.out.println("nlf0pdf[" + i + "] = " + numPdf[i]);
				/* Now i know the size of pdfs for lf0 [#states][#leaves][#streams][lf0_vectorsize] */
				/* lf0_vectorsize = 4: mean, variance, voiced weight, and unvoiced weight */
				/* so i can allocate memory for lf0pdf[][][] */
				pdf[i] = new double[numPdf[i]][lf0Stream][4];
			}

			/* read lf0 pdfs (mean, variance and weight). */
			for (i = 0; i < numState; i++) {
				for (j = 0; j < numPdf[i]; j++) {
					for (k = 0; k < lf0Stream; k++) {
						for (l = 0; l < 4; l++) {
							pdf[i][j][k][l] = data_in.readFloat();
							// System.out.format("pdf[%d][%d][%d][%d]=%f\n", i,j,k,l,pdf[i][j][k][l]);
						}
						// System.out.format("\n");
						// NOTE: Here (hts_engine v1.04) the order seem to be the same as before
						/* pdf[i][j][k][0]; mean */
						/* pdf[i][j][k][1]; vari */
						vw = pdf[i][j][k][2]; /* voiced weight */
						uvw = pdf[i][j][k][3]; /* unvoiced weight */
						if (vw < 0.0 || uvw < 0.0 || vw + uvw < 0.99 || vw + uvw > 1.01)
							throw new MaryConfigurationException("loadPdfs: voiced/unvoiced weights must be within 0.99 to 1.01.");
					}
				}
			}

			data_in.close();
			data_in = null;

		} else if (fileFormat == PdfFileFormat.mgc || fileFormat == PdfFileFormat.str || fileFormat == PdfFileFormat.mag) {
			// pdfFileName.contains("mgc.pdf") ||
			// pdfFileName.contains("str.pdf") ||
			// pdfFileName.contains("mag.pdf")
			/* ___________________________________________________________________________ */
			/*-------------------- load pdfs for mgc, str or mag ------------------------*/
			data_in = new DataInputStream(new BufferedInputStream(pdfStream));
			logger.debug("loadPdfs reading model of type " + fileFormat);
			/* read vector size for spectrum */

			// numStream = 1; // just one stream for mgc, str, mag. This is just to have only one
			// type of pdf vector for all posible pdf's
			// vsize = data_in.readInt();
			// vectorSize = vsize;
			numMSDFlag = data_in.readInt();
			numStream = data_in.readInt();
			vectorSize = data_in.readInt();

			vsize = vectorSize;
			// System.out.println("loadPdfs: vsize = " + vsize);

			if (vsize < 0)
				throw new MaryConfigurationException("loadPdfs: vector size of pdf must be positive.");

			/* Now we need the number of pdf's for each state */
			pdf = new double[numState][][][];
			numPdf = new int[numState];
			for (i = 0; i < numState; i++) {
				numPdf[i] = data_in.readInt();
				logger.debug("loadPdfs: numPdf[state:" + i + "]=" + numPdf[i]);
				if (numPdf[i] < 0)
					throw new MaryConfigurationException("loadPdfs: #pdf at state " + i + " must be positive value.");
				// System.out.println("nmceppdf[" + i + "] = " + nmceppdf[i]);
				/* Now i know the size of mceppdf[#states][#leaves][vectorsize] */
				/* so i can allocate memory for mceppdf[][][] */
				pdf[i] = new double[numPdf[i]][numStream][2 * vsize];
			}

			/* read pdfs (mean, variance). (2*vsize because mean and diag variance */
			/* are allocated in only one vector. */
			for (i = 0; i < numState; i++) {
				for (j = 0; j < numPdf[i]; j++) {
					/*
					 * for( k=0; k<(2*vsize); k++ ){ pdf[i][j][0][k] = data_in.readFloat(); // [0] corresponds to stream, in this
					 * case just one. //System.out.println("pdf["+ i + "][" + j + "][0][" + k + "] =" + pdf[i][j][0][k]); }
					 */
					// NOTE: Here (hts_engine v1.04) the order is different as before, here mean and variance are saved
					// consecutively
					// so now the pdf contains: mean[0], vari[0], mean[1], vari[1], etc...
					for (k = 0; k < vsize; k++) {
						pdf[i][j][0][k] = data_in.readFloat(); // [0] corresponds to stream, in this case just one.
						// System.out.println("pdf["+ i + "][" + j + "][0][" + k + "] =" + pdf[i][j][0][k]);
						pdf[i][j][0][k + vsize] = data_in.readFloat();
					}
				}
			}
			data_in.close();
			data_in = null;

		}

		return pdf;

	} /* method loadPdfs */

	public static void main(String[] args) throws IOException, InterruptedException {
		/* configure log info */
		org.apache.log4j.BasicConfigurator.configure();

		String contextFile = "/project/mary/marcela/openmary/lib/voices/hsmm-slt/cmu_us_arctic_slt_a0001.pfeats";
		Scanner context = new Scanner(new BufferedReader(new FileReader(contextFile)));
		String strContext = "";
		while (context.hasNext()) {
			strContext += context.nextLine();
			strContext += "\n";
		}
		context.close();
		// System.out.println(strContext);
		FeatureDefinition feaDef = new FeatureDefinition(new BufferedReader(new StringReader(strContext)), false);

		CART[] mgcTree = null;
		int numStates = 5;
		String trickyPhones = "/project/mary/marcela/openmary/lib/voices/hsmm-slt/trickyPhones.txt";
		String treefile = "/project/mary/marcela/openmary/lib/voices/hsmm-slt/tree-dur.inf";
		String pdffile = "/project/mary/marcela/openmary/lib/voices/hsmm-slt/dur.pdf";
		int vSize;

		// Check if there are tricky phones, and create a PhoneTranslator object
		PhoneTranslator phTranslator = new PhoneTranslator(new FileInputStream(trickyPhones));

		HTSCARTReader htsReader = new HTSCARTReader();
		try {
			mgcTree = htsReader.load(numStates, new FileInputStream(treefile), new FileInputStream(pdffile), PdfFileFormat.dur,
					feaDef, phTranslator);
			vSize = htsReader.getVectorSize();
			System.out.println("loaded " + pdffile + "  vector size=" + vSize);
		} catch (Exception e) {
			System.out.println(e.getMessage());
		}

	}

}