package org.maltparser.core.syntaxgraph.writer; import org.maltparser.core.config.ConfigurationDir; import org.maltparser.core.exception.MaltChainedException; import org.maltparser.core.flow.FlowChartInstance; import org.maltparser.core.flow.item.ChartItem; import org.maltparser.core.flow.spec.ChartItemSpecification; import org.maltparser.core.io.dataformat.DataFormatException; import org.maltparser.core.io.dataformat.DataFormatInstance; import org.maltparser.core.io.dataformat.DataFormatManager; import org.maltparser.core.options.OptionManager; import org.maltparser.core.symbol.SymbolTableHandler; import org.maltparser.core.syntaxgraph.TokenStructure; /** * * * @author Johan Hall */ public class WriteChartItem extends ChartItem { private String idName; private String outputFormatName; private String outputFileName; private String outputCharSet; private String writerOptions; private Class<? extends SyntaxGraphWriter> graphWriterClass; private String nullValueStrategy; private SyntaxGraphWriter writer; private String sourceName; private String optiongroupName; private DataFormatInstance outputDataFormatInstance; private TokenStructure cachedGraph = null; public WriteChartItem() { super(); } @Override public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException { super.initialize(flowChartinstance, chartItemSpecification); for (String key : chartItemSpecification.getChartItemAttributes().keySet()) { if (key.equals("id")) { idName = chartItemSpecification.getChartItemAttributes().get(key); } else if (key.equals("source")) { sourceName = chartItemSpecification.getChartItemAttributes().get(key); } else if (key.equals("optiongroup")) { optiongroupName = chartItemSpecification.getChartItemAttributes().get(key); } } if (idName == null) { idName = getChartElement("write").getAttributes().get("id").getDefaultValue(); } else if (sourceName == null) { sourceName = getChartElement("write").getAttributes().get("source").getDefaultValue(); } else if (optiongroupName == null) { optiongroupName = getChartElement("write").getAttributes().get("optiongroup").getDefaultValue(); } setOutputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString()); setOutputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "outfile").toString()); setOutputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString()); setWriterOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "writer_options").toString()); setSyntaxGraphWriterClass((Class<?>) OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "writer")); setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString()); initOutput(getNullValueStrategy()); initWriter(getSyntaxGraphWriterClass(), getOutputFileName(), getOutputCharSet(), getWriterOptions()); } public int preprocess(int signal) throws MaltChainedException { return signal; } public int process(int signal) throws MaltChainedException { if (cachedGraph == null) { cachedGraph = (TokenStructure) flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, sourceName); writer.writeProlog(); } writer.writeSentence(cachedGraph); if (signal == ChartItem.TERMINATE) { writer.writeEpilog(); } return signal; } public int postprocess(int signal) throws MaltChainedException { return signal; } public void terminate() throws MaltChainedException { if (writer != null) { writer.close(); writer = null; } outputDataFormatInstance = null; cachedGraph = null; } public String getOutputFormatName() { if (outputFormatName == null) { return "/appdata/dataformat/conllx.xml"; } return outputFormatName; } public void setOutputFormatName(String outputFormatName) { this.outputFormatName = outputFormatName; } public String getOutputFileName() { if (outputFileName == null) { return "/dev/stdout"; } return outputFileName; } public void setOutputFileName(String outputFileName) { this.outputFileName = outputFileName; } public String getOutputCharSet() { if (outputCharSet == null) { return "UTF-8"; } return outputCharSet; } public void setOutputCharSet(String outputCharSet) { this.outputCharSet = outputCharSet; } public String getWriterOptions() { if (writerOptions == null) { return ""; } return writerOptions; } public void setWriterOptions(String writerOptions) { this.writerOptions = writerOptions; } public Class<? extends SyntaxGraphWriter> getSyntaxGraphWriterClass() { return graphWriterClass; } public void setSyntaxGraphWriterClass(Class<?> graphWriterClass) throws MaltChainedException { try { if (graphWriterClass != null) { this.graphWriterClass = graphWriterClass.asSubclass(org.maltparser.core.syntaxgraph.writer.SyntaxGraphWriter.class); } } catch (ClassCastException e) { throw new DataFormatException("The class '" + graphWriterClass.getName() + "' is not a subclass of '" + org.maltparser.core.syntaxgraph.writer.SyntaxGraphWriter.class.getName() + "'. ", e); } } public String getNullValueStrategy() { if (nullValueStrategy == null) { return "one"; } return nullValueStrategy; } public void setNullValueStrategy(String nullValueStrategy) { this.nullValueStrategy = nullValueStrategy; } public void initOutput(String nullValueStategy) throws MaltChainedException { ConfigurationDir configDir = (ConfigurationDir) flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName); DataFormatManager dataFormatManager = configDir.getDataFormatManager(); SymbolTableHandler symbolTables = configDir.getSymbolTables(); if (configDir.sizeDataFormatInstance() == 0 || dataFormatManager.getInputDataFormatSpec() != dataFormatManager.getOutputDataFormatSpec()) { outputDataFormatInstance = dataFormatManager.getOutputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy); configDir.addDataFormatInstance(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), outputDataFormatInstance); } else { outputDataFormatInstance = configDir.getDataFormatInstance(dataFormatManager.getInputDataFormatSpec().getDataFormatName()); //dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName()); } } public void initWriter(Class<? extends SyntaxGraphWriter> syntaxGraphWriterClass, String outputFile, String outputCharSet, String writerOption) throws MaltChainedException { try { writer = syntaxGraphWriterClass.newInstance(); if (outputFile == null || outputFile.length() == 0 || outputFile.equals("/dev/stdout")) { writer.open(System.out, outputCharSet); } else { writer.open(outputFile, outputCharSet); } writer.setDataFormatInstance(outputDataFormatInstance); writer.setOptions(writerOption); } catch (InstantiationException e) { throw new DataFormatException("The data writer '" + syntaxGraphWriterClass.getName() + "' cannot be initialized. ", e); } catch (IllegalAccessException e) { throw new DataFormatException("The data writer '" + syntaxGraphWriterClass.getName() + "' cannot be initialized. ", e); } } public Class<? extends SyntaxGraphWriter> getGraphWriterClass() { return graphWriterClass; } public SyntaxGraphWriter getWriter() { return writer; } public String getSourceName() { return sourceName; } public DataFormatInstance getOutputDataFormatInstance() { return outputDataFormatInstance; } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } return obj.toString().equals(this.toString()); } @Override public int hashCode() { return 217 + (null == toString() ? 0 : toString().hashCode()); } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(" write "); sb.append("id:"); sb.append(idName); sb.append(' '); sb.append("source:"); sb.append(sourceName); sb.append(' '); sb.append("optiongroup:"); sb.append(optiongroupName); return sb.toString(); } }