/**
* @version $Id: TemplateDecoder.java 1839 2014-04-16 02:33:51Z yukihiro-kinjyo $
*
* 2012/11/28 22:40:41
* @author s.takuro
*
* Copyright 2011-2014 TIDAコンソーシアム All Rights Reserved.
*/
package com.tida_okinawa.corona.correction.template;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_BASE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_CLASS;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_FIX;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_FIX_TRUE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_ID;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_KIND;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_KIND_LABEL;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_KIND_WORD;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_LABEL;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_PART;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_QUANT;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_SEARCH;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_TYPE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.ATTR_VALUE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_AND;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_COMPARE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_DEST;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_LINK;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_MODIFICATION;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_NOT;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_OR;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_ORDER;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_SEQUENCE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_SOURCE;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.TAG_TERM;
import static com.tida_okinawa.corona.correction.template.TemplateUtil.getIntValue;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.eclipse.core.runtime.Assert;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.tida_okinawa.corona.correction.parsing.model.CompType;
import com.tida_okinawa.corona.correction.parsing.model.IModelParser.ModelEncoder;
import com.tida_okinawa.corona.correction.parsing.model.QuantifierType;
import com.tida_okinawa.corona.correction.parsing.model.SearchScopeType;
import com.tida_okinawa.corona.io.model.dic.TermClass;
import com.tida_okinawa.corona.io.model.dic.TermPart;
/**
* @author s.takuro
* #187 構文パターン自動生成
*/
public class TemplateDecoder implements ModelEncoder<String, Template> {
private SAXParser parser;
/**
* 文字列(xml形式)をひな型に変換
*/
public TemplateDecoder() {
try {
parser = SAXParserFactory.newInstance().newSAXParser();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
}
/**
* xmlタグのencoding属性で指定されている文字コードを取得する.
* 読み込みの文字コードがワークスペースの文字コードに依存するので、保存してある文字コードで読み込めるようにする.
*
* @param element
* XML形式のパターン
* @return 指定されている文字コード。指定されていなかったらnull
*/
private static String extractCharset(String element) {
/* encoding属性を探す */
String lowerElement = element.toLowerCase();
int index = lowerElement.indexOf(Messages.TEMPLATE_DECODER_ENCODING);
if (index != -1) {
int startIndex = lowerElement.indexOf("\"", index) + 1; //$NON-NLS-1$
int endIndex = lowerElement.indexOf("\"", startIndex); //$NON-NLS-1$
/* その後ろのダブルクォートからそのまた次のダブルクォートまでが文字コード */
return element.substring(startIndex, endIndex);
}
return null;
}
@Override
public Template encode(String element) {
Assert.isNotNull(element);
ParseHandler handler = new ParseHandler();
try {
String charset = extractCharset(element);
if (charset == null) {
parser.parse(new ByteArrayInputStream(element.getBytes()), handler);
} else {
parser.parse(new ByteArrayInputStream(element.getBytes(charset)), handler);
}
} catch (SAXException e) {
System.out.println(e);
} catch (IOException e) {
e.printStackTrace();
}
return handler.getTopTemplate();
}
class ParseHandler extends DefaultHandler {
private Template topTemplate;
private TemplateContainer currentContainer;
private Template currentElement;
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
currentElement = createTemplate(qName, attributes, currentContainer);
if (topTemplate == null) {
topTemplate = currentElement;
}
if (currentContainer != null) {
/* パターンに親子関係を設定 */
currentContainer.addChild(currentElement);
}
if (currentElement instanceof TemplateContainer) {
/* 親パターンを更新 */
currentContainer = (TemplateContainer) currentElement;
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName);
if (currentElement instanceof TemplateContainer) {
if (currentElement.getParent() != null) {
currentElement = currentContainer = currentElement.getParent();
}
} else {
currentElement = currentContainer;
}
}
/**
* @return 一番初めに作ったパターン(パターンのルート)
*/
public Template getTopTemplate() {
return topTemplate;
}
}
Template createTemplate(String tag, Attributes attr, TemplateContainer parent) {
Template ret = null;
if (equals(TAG_TERM, tag)) {
ret = new TemplateTerm(parent);
TemplateTerm term = (TemplateTerm) ret;
term.setWord(attr.getValue(ATTR_BASE));
term.setLabel(attr.getValue(ATTR_LABEL));
term.setPart(TermPart.valueOf(getIntValue(attr, ATTR_PART, -1)));
term.setWordClass(TermClass.valueOf(getIntValue(attr, ATTR_CLASS, -1)));
term.setQuant(QuantifierType.valueOf(getIntValue(attr, ATTR_QUANT, -1)));
/* 固定かどうかの判定結果取得 */
if (attr.getValue(ATTR_FIX) != null) {
boolean fix = ATTR_FIX_TRUE.equals(attr.getValue(ATTR_FIX));
((VariableTemplate) ret).setFixCheck(fix);
if (fix != true) {
/* 固定でない(可変)の場合に、単語(Word)かどうかの判定結果を取得 */
if (attr.getValue(ATTR_KIND) != null) {
if (ITemplateTermType.TYPE_WORD.equals(attr.getValue(ATTR_KIND))) {
((TemplateTerm) ret).setState(ATTR_KIND_WORD);
} else if (ITemplateTermType.TYPE_LABEL.equals(attr.getValue(ATTR_KIND))) {
((TemplateTerm) ret).setState(ATTR_KIND_LABEL);
} else {
/* 固定でも可変でもないのでnull */
((TemplateTerm) ret).setState(null);
}
} else {
/* 種類の判定が存在しないのでnull */
((TemplateTerm) ret).setState(null);
}
} else {
/* 固定の場合に種類は必要ないのでnull */
((TemplateTerm) ret).setState(null);
}
} else {
((VariableTemplate) ret).setFixCheck(true);
/* 固定かどうかの判定がないのでnull */
((TemplateTerm) ret).setState(null);
}
} else if (equals(TAG_ORDER, tag)) {
ret = new TemplateOrder(parent);
TemplateOrder order = (TemplateOrder) ret;
order.setScope(SearchScopeType.valueOf(getIntValue(attr, ATTR_SEARCH, 0)));
} else if (equals(TAG_SEQUENCE, tag)) {
ret = new TemplateSequence(parent);
TemplateSequence sequence = (TemplateSequence) ret;
sequence.setScope(SearchScopeType.valueOf(getIntValue(attr, ATTR_SEARCH, 0)));
} else if (equals(TAG_OR, tag)) {
ret = new TemplateOr(parent);
TemplateOr or = (TemplateOr) ret;
or.setScope(SearchScopeType.valueOf(getIntValue(attr, ATTR_SEARCH, 0)));
} else if (equals(TAG_AND, tag)) {
ret = new TemplateAnd(parent);
TemplateAnd and = (TemplateAnd) ret;
and.setScope(SearchScopeType.valueOf(getIntValue(attr, ATTR_SEARCH, 0)));
} else if (equals(TAG_NOT, tag)) {
ret = new TemplateNot(parent);
} else if (equals(TAG_LINK, tag)) {
ret = new TemplateLink(parent);
TemplateLink link = (TemplateLink) ret;
link.setId(getIntValue(attr, ATTR_ID, -1));
/* 固定かどうかの判定結果取得 */
if (attr.getValue(ATTR_FIX) != null) {
((VariableTemplate) ret).setFixCheck(ATTR_FIX_TRUE.equals(attr.getValue(ATTR_FIX)) ? true : false);
} else {
((VariableTemplate) ret).setFixCheck(true);
}
} else if (equals(TAG_SOURCE, tag)) {
ret = new TemplateModificationElement(parent, TemplateModificationElement.TYPE_SOURCE);
} else if (equals(TAG_DEST, tag)) {
ret = new TemplateModificationElement(parent, TemplateModificationElement.TYPE_DEST);
} else if (equals(TAG_MODIFICATION, tag)) {
ret = new TemplateModification(parent, false);
TemplateModification modifi = (TemplateModification) ret;
int type = Integer.parseInt(attr.getValue(ATTR_TYPE));
modifi.setType(type);
} else if (equals(TAG_COMPARE, tag)) {
ret = new TemplateCompare(parent);
TemplateCompare compare = (TemplateCompare) ret;
compare.setLabel(attr.getValue(ATTR_LABEL));
compare.setType(CompType.getValue(getIntValue(attr, ATTR_TYPE, -1)));
compare.setValue(getIntValue(attr, ATTR_VALUE, 0));
}
return ret;
}
private static boolean equals(String s1, String s2) {
return s1.equalsIgnoreCase(s2);
}
}