/** * Copyright 2000-2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.tools.transcription; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import javax.swing.table.AbstractTableModel; import marytts.fst.AlignerTrainer; import marytts.fst.FSTLookup; import marytts.fst.TransducerTrie; import marytts.modules.phonemiser.AllophoneSet; import marytts.util.MaryUtils; import marytts.util.io.FileUtils; /** * TranscriptionTableModel, an AbstractTableModel, stores transcription data * * @author sathish pammi * */ public class TranscriptionTableModel extends AbstractTableModel { private String[] columnNames = { "No.", "Word", "Transcription", "Functional" }; private Object[][] data; private Object[][] lastSavedData; // Data at the time of loading private boolean[] hasManualVerification; private boolean[] hasCorrectSyntax; private int editableColumns = 2; public TranscriptionTableModel() { this.data = new Object[20][4]; this.hasManualVerification = new boolean[20]; this.hasCorrectSyntax = new boolean[20]; for (int i = 0; i < 20; i++) { data[i][0] = ""; data[i][1] = ""; data[i][2] = ""; data[i][3] = Boolean.FALSE; setAsManualVerify(i, false); setAsCorrectSyntax(i, true); } lastSavedData = storeLastSavedData(); } public Object getDataAt(int x, int y) { return this.data[x][y]; } public TranscriptionTableModel(String fileName) throws Exception { String fileData = FileUtils.getFileAsString(new File(fileName), "UTF-8"); String[] words = fileData.split("\n"); this.data = new Object[words.length][4]; this.hasManualVerification = new boolean[words.length]; this.hasCorrectSyntax = new boolean[words.length]; for (int i = 0; i < words.length; i++) { data[i][0] = Integer.toString(i); data[i][1] = words[i]; data[i][2] = ""; data[i][3] = Boolean.FALSE; setAsManualVerify(i, false); setAsCorrectSyntax(i, true); } lastSavedData = storeLastSavedData(); } public Object[][] getData() { return this.data; } public void setAsManualVerify(int x, boolean t) { this.hasManualVerification[x] = t; } public void setAsCorrectSyntax(int x, boolean t) { this.hasCorrectSyntax[x] = t; } public boolean[] getManualVerifiedList() { return this.hasManualVerification; } public boolean[] getCorrectSyntaxList() { return this.hasCorrectSyntax; } /** * Save transcription to a file * * @param fileName * fileName * @throws Exception * Exception */ public void saveTranscription(String fileName) throws Exception { PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8")); // Save copyright notice first MaryUtils.writeCopyrightNotice(out, "#"); // If any transcriptions include spaces, save using | as separator, else use space boolean transContainsSpace = false; for (int i = 0; i < data.length; i++) { String trans = (String) data[i][2]; if (trans.contains(" ")) { transContainsSpace = true; break; } } String separatorChar = transContainsSpace ? "|" : " "; for (int i = 0; i < data.length; i++) { String word = (String) data[i][1]; String trans = (String) data[i][2]; boolean isFunctional = (Boolean) data[i][3]; StringBuilder line = new StringBuilder(); line.append(word); if (!trans.equals("") && this.hasManualVerification[i] && this.hasCorrectSyntax[i]) { line.append(separatorChar).append(trans); } if (isFunctional) { line.append(separatorChar).append("functional"); } out.println(line); } out.flush(); out.close(); // Saved the data - so, stored data have to modify lastSavedData = storeLastSavedData(); } /** * Load transcription from file, either replacing or adding to any existing data. If adding, only words not contained in the * existing data will be added. * * @param fileName * fileName * @param keepCurrentData * whether to keep any current data and add, or use only the loaded data. * @throws Exception * Exception */ public void loadTranscription(String fileName, boolean keepCurrentData) throws Exception { List<String> lines = new ArrayList<String>(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "UTF-8")); String line; while ((line = br.readLine()) != null) { line = line.trim(); if (line.equals("") || line.startsWith("#")) continue; lines.add(line); } if (lines.size() == 0) { return; } int offset; Set<String> currentWords = new HashSet<String>(); if (keepCurrentData && this.data != null) { Object[][] oldData = this.data; boolean[] oldHasManualVerification = this.hasManualVerification; boolean[] oldHasCorrectSyntax = this.hasCorrectSyntax; this.data = new Object[oldData.length + lines.size()][4]; for (int i = 0; i < oldData.length; i++) { System.arraycopy(oldData[i], 0, this.data[i], 0, 4); currentWords.add((String) oldData[i][1]); } this.hasManualVerification = new boolean[this.data.length]; System.arraycopy(oldHasManualVerification, 0, this.hasManualVerification, 0, oldData.length); this.hasCorrectSyntax = new boolean[this.data.length]; System.arraycopy(oldHasCorrectSyntax, 0, this.hasCorrectSyntax, 0, oldData.length); offset = oldData.length; } else { this.data = new Object[lines.size()][4]; this.hasManualVerification = new boolean[data.length]; this.hasCorrectSyntax = new boolean[data.length]; offset = 0; } int pos = offset; // Accept input in two types of format: // 1. space-separated (in which case the transcription is not supposed to contain any space characters); // 2. fields separated by a "pipe" symbol ("|"), as in userdict. String separatorRE; if (lines.get(0).contains("|")) { separatorRE = "\\|"; } else { separatorRE = "\\s+"; } for (int i = 0; i < lines.size(); i++) { String[] words = lines.get(i).split(separatorRE); String word = words[0].trim(); if (currentWords.contains(word)) continue; // do not add new words already contained in old list data[pos][0] = Integer.toString(pos); data[pos][1] = words[0].trim(); if (lines.get(i).endsWith("functional")) { data[pos][3] = Boolean.TRUE; if (words.length == 3) { data[pos][2] = words[1].trim(); setAsManualVerify(pos, true); setAsCorrectSyntax(pos, true); } else { data[pos][2] = ""; setAsManualVerify(pos, false); } } else { data[pos][3] = Boolean.FALSE; if (words.length >= 2) { data[pos][2] = words[1].trim(); setAsManualVerify(pos, true); setAsCorrectSyntax(pos, true); } else { data[pos][2] = ""; setAsManualVerify(pos, false); } } pos++; } lastSavedData = storeLastSavedData(); } /** * Load transcription from HashMap * * @param wordList * wordList * @throws Exception * Exception */ @Deprecated // doesn't seem to get used -- remove? public void loadTranscription(HashMap<String, Integer> wordList) throws Exception { int length = wordList.size(); this.data = new Object[length][4]; this.hasManualVerification = new boolean[length]; this.hasCorrectSyntax = new boolean[length]; Iterator<String> it = wordList.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { data[i][0] = Integer.toString(i); data[i][1] = (String) it.next(); // wordList.get(i); data[i][2] = ""; data[i][3] = Boolean.FALSE; setAsManualVerify(i, false); setAsCorrectSyntax(i, true); } lastSavedData = storeLastSavedData(); } public void loadTranscription(ArrayList<String> wordList) { int length = wordList.size(); this.data = new Object[length][4]; this.hasManualVerification = new boolean[length]; this.hasCorrectSyntax = new boolean[length]; Iterator<String> it = wordList.iterator(); for (int i = 0; it.hasNext(); i++) { data[i][0] = Integer.toString(i); data[i][1] = (String) it.next(); // wordList.get(i); data[i][2] = ""; data[i][3] = Boolean.FALSE; setAsManualVerify(i, false); setAsCorrectSyntax(i, true); } lastSavedData = storeLastSavedData(); } /** * Save user entered and verified transcription in to lexicon format * * @param fileName * fileName * @throws IOException * IOException */ public void saveSampaLexiconFormat(String fileName) throws IOException { if (!hasLexiconData()) return; PrintWriter out = new PrintWriter(new FileWriter(fileName)); for (int i = 0; i < data.length; i++) { String line; if (!((String) data[i][2]).equals("") && this.hasManualVerification[i] && this.hasCorrectSyntax[i]) { line = (String) data[i][1]; // line += "\\"+(String)data[i][2]+"\\"; line += "|" + (String) data[i][2]; out.println(line); } } out.flush(); out.close(); // Saved the data - so, stored data have to modify lastSavedData = storeLastSavedData(); } /** * Save user entered and verified transcription in to lexicon format * * @param fileName * fileName * @param phoneSet * phoneSet * @throws IOException * IOException */ public void saveSampaLexiconFormat(String fileName, AllophoneSet phoneSet) throws IOException { if (!hasLexiconData()) return; PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8")); for (int i = 0; i < data.length; i++) { String line; if (!((String) data[i][2]).equals("") && this.hasManualVerification[i] && this.hasCorrectSyntax[i]) { line = (String) data[i][1]; String grapheme = phoneSet.splitAllophoneString((String) data[i][2]); line += "|" + grapheme; out.println(line); } } out.flush(); out.close(); // Saved the data - so, stored data have to modify lastSavedData = storeLastSavedData(); } /** * Save all functional words into text file * * @param fileName * fileName * @throws IOException * IOException */ public void saveFunctionalWords(String fileName) throws IOException { if (!hasFunctionalData()) return; PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8")); for (int i = 0; i < data.length; i++) { if ((Boolean) data[i][3]) { out.println((String) data[i][1] + "|functional"); } } out.flush(); out.close(); // Saved the data - so, stored data have to modify lastSavedData = storeLastSavedData(); } public boolean hasFunctionalData() { int countData = 0; for (int i = 0; i < data.length; i++) { if ((Boolean) data[i][3]) { countData++; } } return countData != 0; } public boolean hasLexiconData() { int countData = 0; for (int i = 0; i < data.length; i++) { if (!((String) data[i][2]).equals("") && this.hasManualVerification[i] && this.hasCorrectSyntax[i]) { countData++; } } return countData != 0; } /** * For all words in lexicon, verify if they can be looked up in fst file. * * @param lexiconFilename * lexiconFilename * @param fstFilename * fstFilename * @throws IOException * IOException */ public void testFST(String lexiconFilename, String fstFilename) throws IOException { System.err.println("Testing FST..."); FSTLookup fst = new FSTLookup(fstFilename); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(lexiconFilename), "UTF-8")); String line; int nCorrect = 0; int nFailed = 0; while ((line = br.readLine()) != null) { String[] parts = line.split("\\s*\\|\\s*"); String key = parts[0]; String value = parts[1]; String[] lookupResult = fst.lookup(key); assert lookupResult != null; if (lookupResult.length == 1 && value.equals(lookupResult[0])) { nCorrect++; } else { nFailed++; System.err.print("Problem looking up key '" + key + "': Expected value '" + value + "', but got "); if (lookupResult.length == 0) { System.err.println("no result"); } else if (lookupResult.length == 1) { System.err.println("result '" + lookupResult[0] + "'"); } else { System.err.print(+lookupResult.length + " results:"); for (String res : lookupResult) { System.err.print(" '" + res + "'"); } System.err.println(); } } } br.close(); System.err.println("Testing complete. " + (nCorrect + nFailed) + " entries (" + nCorrect + " correct, " + nFailed + " failed)"); } public void createPOSFst(String posFilename, String fstFilename) throws Exception { if (!hasFunctionalData()) return; BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(posFilename), "UTF-8")); AlignerTrainer at = new AlignerTrainer(false, true); at.readLexicon(br, "\\s*\\|\\s*"); br.close(); // make some alignment iterations for (int i = 0; i < 4; i++) { at.alignIteration(); } TransducerTrie t = new TransducerTrie(); for (int i = 0, size = at.lexiconSize(); i < size; i++) { t.add(at.getAlignment(i)); t.add(at.getInfoAlignment(i)); } t.computeMinimization(); File of = new File(fstFilename); DataOutputStream os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(of))); t.writeFST(os, "UTF-8"); os.flush(); os.close(); // testFST(fstFilename); } /** * Creates lexicon in FST format and letter-to-sound models * * @param lexiconFilename * lexiconFilename * @param fstFilename * fstFilename * @throws Exception * Exception */ public void createLexicon(String lexiconFilename, String fstFilename) throws Exception { if (!hasLexiconData()) return; BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(lexiconFilename), "UTF-8")); AlignerTrainer at = new AlignerTrainer(false, true); at.readLexicon(br, "\\s*\\|\\s*"); br.close(); // make some alignment iterations for (int i = 0; i < 4; i++) { at.alignIteration(); } TransducerTrie t = new TransducerTrie(); for (int i = 0, size = at.lexiconSize(); i < size; i++) { t.add(at.getAlignment(i)); t.add(at.getInfoAlignment(i)); } t.computeMinimization(); File of = new File(fstFilename); DataOutputStream os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(of))); t.writeFST(os, "UTF-8"); os.flush(); os.close(); testFST(lexiconFilename, fstFilename); } /** * get column count * * @return columnNames.length */ public int getColumnCount() { return columnNames.length; } /** * get row count * * @return data.length */ public int getRowCount() { return data.length; } /** * get column name * * @param col * col * @return columnNames[col] */ public String getColumnName(int col) { return columnNames[col]; } /** * get value at given location * * @param row * row * @param col * col * @return data[row][col] */ public Object getValueAt(int row, int col) { return data[row][col]; } /* * JTable uses this method to determine the default renderer/ editor for each cell. If we didn't implement this method, then * the last column would contain text ("true"/"false"), rather than a check box. */ public Class getColumnClass(int c) { return getValueAt(0, c).getClass(); } /* * Don't need to implement this method unless your table's editable. */ public boolean isCellEditable(int row, int col) { // Note that the data/cell address is constant, // no matter where the cell appears onscreen. return col >= editableColumns; } /* * Don't need to implement this method unless your table's data can change. */ public void setValueAt(Object value, int row, int col) { data[row][col] = value; fireTableCellUpdated(row, col); } public boolean isDataModified() { for (int i = 0; i < data.length; i++) { for (int j = 0; j < data[0].length; j++) { Object k1 = data[i][j]; Object k2 = lastSavedData[i][j]; if (!data[i][j].equals(lastSavedData[i][j])) { return true; } } } return false; } /** * Store last saved data * * @return newData */ private Object[][] storeLastSavedData() { Object[][] newData = new Object[data.length][data[0].length]; for (int i = 0; i < data.length; i++) { for (int j = 0; j < data[0].length; j++) { newData[i][j] = data[i][j]; } } return newData; } }