/*
* Copyright 2005-2015 by BerryWorks Software, LLC. All rights reserved.
*
* This file is part of EDIReader. You may obtain a license for its use directly from
* BerryWorks Software, and you may also choose to use this software under the terms of the
* GPL version 3. Other products in the EDIReader software suite are available only by licensing
* with BerryWorks. Only those files bearing the GPL statement below are available under the GPL.
*
* EDIReader is free software: you can redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* EDIReader is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with EDIReader. If not,
* see <http://www.gnu.org/licenses/>.
*/
package com.berryworks.edireader;
import com.berryworks.edireader.error.ErrorMessages;
import com.berryworks.edireader.tokenizer.Token;
import com.berryworks.edireader.util.ContentHandlerBase64Encoder;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.util.List;
import static com.berryworks.edireader.util.FixedLength.emptyIfNull;
/**
* Reads and parses EDIFACT EDI interchanges. This class is not normally
* constructed explicitly from outside the package, although it is declared
* public for special cases. The recommended use of this class is to first
* establish an EDIReader using one of the factory techniques; when the
* EDIReader is called upon to parse the EDI data, it determines which EDI
* standard applies and internally constructs the proper subclass to continue
* with parsing.
*/
public class EdifactReader extends StandardReader {
protected static final int ELEMENTS_IN_SEGMENT_MAXIMUM = 50;
protected static final int ELEMENTS_IN_UNB_MAXIMUM = 30;
protected boolean ungExplicit;
protected boolean witnessedUNA;
@Override
protected Token recognizeBeginning() throws IOException, SAXException {
Token t = getTokenizer().nextToken();
if (t.getType() == Token.TokenType.SEGMENT_START) {
String segType = t.getValue();
if ("UNA".equals(segType)) {
witnessedUNA = true;
// We've already examined this UNA in the preview
getTokenizer().skipSegment();
t = getTokenizer().nextToken();
if (t.getType() == Token.TokenType.SEGMENT_START) {
segType = t.getValue();
} else {
throw new EDISyntaxException(INVALID_UNA, getTokenizer());
}
}
if (!"UNB".equals(segType)) {
if (witnessedUNA) {
throw new EDISyntaxException(
"Mandatory UNB segment was not recognized after UNA. Terminator problem?");
}
throw new EDISyntaxException(FIRST_SEGMENT_MUST_BE_UNA_OR_UNB,
getTokenizer());
}
} else {
throw new EDISyntaxException(FIRST_SEGMENT_MUST_BE_UNA_OR_UNB);
}
return t;
}
/**
* Parse Edifact interchange ( UNB to UNZ )
*
* @param token parsed token that caused this method to be called
* @return token most recently parsed by this method
* @throws SAXException for problem emitting SAX events
* @throws IOException for problem reading EDI data
*/
@Override
protected Token parseInterchange(Token token) throws SAXException,
IOException {
getInterchangeAttributes().clear();
getInterchangeAttributes().addCDATA(getXMLTags().getStandard(), "EDIFACT");
setGroupCount(0);
List<String> compositeList;
/**
* Syntax identifier : version (example: UNOA:2 )
*/
compositeList = getTokenizer().nextCompositeElement();
String syntaxIdentifier = getSubElement(compositeList, 0);
String syntaxVersionNumber = getSubElement(compositeList, 1);
if (syntaxIdentifier.length() > 0) {
getInterchangeAttributes().addCDATA(getXMLTags().getSyntaxIdentifier(),
syntaxIdentifier);
if (syntaxVersionNumber.length() > 0) {
getInterchangeAttributes().addCDATA(getXMLTags().getSyntaxVersion(),
syntaxVersionNumber);
}
}
/**
* Sender address
*/
compositeList = getTokenizer().nextCompositeElement();
String fromId = getSubElement(compositeList, 0);
String fromQual = getSubElement(compositeList, 1);
String fromExtra = getSubElement(compositeList, 2);
/**
* Receiver address
*/
compositeList = getTokenizer().nextCompositeElement();
String toId = getSubElement(compositeList, 0);
String toQual = getSubElement(compositeList, 1);
String toExtra = getSubElement(compositeList, 2);
/**
* Date and time (UNB0401 and UNB0402)
*/
compositeList = getTokenizer().nextCompositeElement();
String date = getSubElement(compositeList, 0);
String time = getSubElement(compositeList, 1);
getInterchangeAttributes().addCDATA(getXMLTags().getDate(), date);
getInterchangeAttributes().addCDATA(getXMLTags().getTime(), time);
/**
* Control number (UNB05)
*/
setInterchangeControlNumber(getTokenizer().nextSimpleValue());
getInterchangeAttributes().addCDATA(getXMLTags().getControl(), getInterchangeControlNumber());
remainderOfUNB();
/**
* Decimal notation
*
* The character used for decimal notation in numbers.
* For example, the value 3.14159 is expressed using "."
* for decimal notation. Another character sometimes used
* for this purpose is "," (comma).
*/
getInterchangeAttributes().addCDATA(
getXMLTags().getDecimal(),
String.valueOf(getDecimalMark()));
startInterchange(getInterchangeAttributes());
generatedSenderAndReceiver(fromId, fromQual, fromExtra, toId, toQual, toExtra);
label:
while (true) {
ungExplicit = true;
token = getTokenizer().nextToken();
if (token.getType() != Token.TokenType.SEGMENT_START) {
throw new EDISyntaxException(
"Invalid beginning of UNG|UNH|UNZ segment", getTokenizer());
}
String sType = token.getValue();
switch (sType) {
case "UNG":
setGroupCount(1 + getGroupCount());
parseFunctionalGroup(token);
break;
case "UNH":
impliedFunctionalGroup(token);
break;
case "UNZ":
break label;
default:
throw new EDISyntaxException(UNEXPECTED_SEGMENT_IN_CONTEXT,
"UNH, UNZ, or UNG", sType, getTokenizer());
}
}
checkGroupCount(getGroupCount(), getTokenizer().nextIntValue(), COUNT_UNZ);
checkInterchangeControlNumber(getInterchangeControlNumber(), getTokenizer().nextSimpleValue(), CONTROL_NUMBER_UNZ);
endInterchange();
return getTokenizer().skipSegment();
}
protected void remainderOfUNB() throws IOException, EDISyntaxException {
if (hitEndOfSegment(getXMLTags().getRecipientReference())
|| hitEndOfSegment(getXMLTags().getApplicationReference())
|| hitEndOfSegment(getXMLTags().getProcessingPriority())
|| hitEndOfSegment(getXMLTags().getAcknowledgementRequest())
|| hitEndOfSegment(getXMLTags().getInterchangeAgreementIdentifier())
|| hitEndOfSegment(getXMLTags().getTestIndicator()))
return;
while (getTokenizer().nextToken().getType() != Token.TokenType.SEGMENT_END) {
if (getTokenizer().getElementInSegmentCount() > ELEMENTS_IN_UNB_MAXIMUM) {
throw new EDISyntaxException("Too many ("
+ getTokenizer().getElementInSegmentCount()
+ ") elements for a UNB. Segment terminator problem?",
getTokenizer());
}
}
}
protected boolean hitEndOfSegment(String attributeName)
throws EDISyntaxException, IOException {
Token token = getTokenizer().nextToken();
if (token.getType() == Token.TokenType.SEGMENT_END) {
return true;
} else if (token.getType() == Token.TokenType.SIMPLE) {
getInterchangeAttributes().addCDATA(attributeName, token.getValue());
}
return false;
}
/**
* Parse Edifact group (UNG to UNE)
*
* @param token parsed token that caused this method to be called
* @return token most recently parsed by this method
* @throws SAXException for problem emitting SAX events
* @throws IOException for problem reading EDI data
*/
protected Token parseFunctionalGroup(Token token) throws SAXException,
IOException {
int docCount = 0;
getGroupAttributes().clear();
// Group type. For example: INVOIC
getGroupAttributes().addCDATA("GroupType", getTokenizer().nextSimpleValue());
List<String> compositeList;
// Application sender
compositeList = getTokenizer().nextCompositeElement();
String sender = getSubElement(compositeList, 0);
getGroupAttributes().addCDATA(getXMLTags().getApplSender(), sender);
// Application receiver
compositeList = getTokenizer().nextCompositeElement();
String receiver = getSubElement(compositeList, 0);
getGroupAttributes().addCDATA(getXMLTags().getApplReceiver(), receiver);
// Date and time
compositeList = getTokenizer().nextCompositeElement();
String date = getSubElement(compositeList, 0);
String time = getSubElement(compositeList, 1);
getGroupAttributes().addCDATA(getXMLTags().getDate(), date);
getGroupAttributes().addCDATA(getXMLTags().getTime(), time);
// Control number
setGroupControlNumber(getTokenizer().nextSimpleValue());
getGroupAttributes().addCDATA(getXMLTags().getControl(), getGroupControlNumber());
// Standard Code. For example: UN
getGroupAttributes().addCDATA("StandardCode", getTokenizer().nextSimpleValue());
// Standard Version. For example: D02B
compositeList = getTokenizer().nextCompositeElement();
String version = getSubElement(compositeList, 0);
String release = getSubElement(compositeList, 1);
getGroupAttributes().addCDATA(getXMLTags().getStandardVersion(), version + release);
startElement(getXMLTags().getGroupTag(), getGroupAttributes());
getTokenizer().skipSegment();
label:
while (true) {
token = getTokenizer().nextToken();
if (token.getType() != Token.TokenType.SEGMENT_START) {
throw new EDISyntaxException(
"Invalid beginning of UNH|UNE segment",
getTokenizer().getSegmentCount());
}
String sType = token.getValue();
switch (sType) {
case "UNH":
docCount++;
parseDocument(token);
break;
case "UNE":
break label;
default:
throw new EDISyntaxException(
"Expected UNE or UNH segment instead of " + sType,
getTokenizer());
}
}
checkTransactionCount(docCount, getTokenizer().nextIntValue(), COUNT_UNE);
checkGroupControlNumber(getGroupControlNumber(), getTokenizer().nextSimpleValue(), CONTROL_NUMBER_UNE);
endElement(getXMLTags().getGroupTag());
return getTokenizer().skipSegment();
}
/**
* Handle implied Edifact group (UNG to UNE)
*
* @param token parsed token that caused this method to be called
* @return token most recently parsed by this method
* @throws SAXException for problem emitting SAX events
* @throws IOException for problem reading EDI data
*/
protected Token impliedFunctionalGroup(Token token) throws SAXException,
IOException {
getGroupAttributes().clear();
startElement(getXMLTags().getGroupTag(), getGroupAttributes());
label:
while (true) {
if (token.getType() != Token.TokenType.SEGMENT_START) {
throw new EDISyntaxException(
"Invalid beginning of UNH|UNZ segment",
getTokenizer().getSegmentCount());
}
String sType = token.getValue();
switch (sType) {
case "UNH":
setGroupCount(1 + getGroupCount());
parseDocument(token);
token = getTokenizer().nextToken();
break;
case "UNZ":
getTokenizer().ungetToken();
break label;
default:
throw new EDISyntaxException(UNEXPECTED_SEGMENT_IN_CONTEXT,
"UNH or UNZ", sType, getTokenizer());
}
}
endElement(getXMLTags().getGroupTag());
return (token);
}
/**
* Parse Edifact Message (UNH to UNT)
*
* @param token parsed token that triggered call to this method
* @return last token parsed
* @throws IOException if problem reading EDI data
* @throws SAXException if invalid EDI is detected
*/
protected Token parseDocument(Token token) throws SAXException,
IOException {
String control;
String messageType = "";
String messageVersion = "";
String messageRelease = "";
int segCount = 2;
getDocumentAttributes().clear();
getDocumentAttributes().addCDATA(getXMLTags().getControl(),
control = getTokenizer().nextSimpleValue());
List<String> v = getTokenizer().nextCompositeElement();
if (v != null) {
int n = v.size();
Object obj = v.get(0);
if (obj != null) {
messageType = (String) obj;
getDocumentAttributes().addCDATA(getXMLTags().getDocumentType(),
messageType);
}
if (n > 1) {
obj = v.get(1);
if (obj != null) {
messageVersion = (String) obj;
getDocumentAttributes().addCDATA(getXMLTags()
.getMessageVersion(), messageVersion);
}
}
if (n > 2) {
obj = v.get(2);
if (obj != null) {
messageRelease = (String) obj;
getDocumentAttributes().addCDATA(getXMLTags()
.getMessageRelease(), messageRelease);
}
}
if (n > 3) {
obj = v.get(3);
if (obj != null) {
getDocumentAttributes().addCDATA(getXMLTags().getAgency(),
(String) obj);
}
}
if (n > 4) {
obj = v.get(4);
if (obj != null) {
getDocumentAttributes().addCDATA(getXMLTags().getAssociation(),
(String) obj);
}
}
}
String accessReference = getTokenizer().nextSimpleValue(false, true);
if (emptyIfNull(accessReference).length() > 0) {
getDocumentAttributes().addCDATA(getXMLTags().getAccessReference(), accessReference);
}
PluginController pluginController =
getPluginControllerFactory().create("EDIFACT", messageType, messageVersion, messageRelease, getTokenizer());
if (pluginController.isEnabled())
getDocumentAttributes().addCDATA(getXMLTags().getName(), pluginController.getDocumentName());
startMessage(getDocumentAttributes());
String segmentType;
while (!(segmentType = getTokenizer().nextSegment()).equals("UNT")) {
segCount++;
if ("UNO".equals(segmentType)) {
parseUNOUNPSequence();
segCount++;
continue;
}
parseSegment(pluginController, segmentType);
}
int toClose = pluginController.getNestingLevel();
for (; toClose > 0; toClose--) {
endElement(getXMLTags().getLoopTag());
}
checkSegmentCount(segCount, getTokenizer().nextIntValue(), COUNT_UNT);
checkTransactionControlNumber(control, getTokenizer().nextSimpleValue(), CONTROL_NUMBER_UNT);
endElement(getXMLTags().getDocumentTag());
/*
* Skip over this UNT segment and return the SEGMENT_END token
*/
return getTokenizer().skipSegment();
}
protected void parseUNOUNPSequence() throws SAXException, IOException {
String lengthField = "";
int length;
try {
lengthField = parseStringFromNextElement();
length = Integer.parseInt(lengthField);
} catch (EDISyntaxException e) {
throw new EDISyntaxException(ErrorMessages.MISSING_UNO_LENGTH, getTokenizer());
} catch (NumberFormatException e) {
throw new EDISyntaxException("UNO object length must be numeric instead of " + lengthField, getTokenizer());
}
String packageReference = parseStringFromNextElement();
getTokenizer().skipSegment();
char[] dataObject = getTokenizer().getChars(length);
Token token = getTokenizer().nextToken();
if (token.getType() == Token.TokenType.SEGMENT_START && "UNP".equals(token.getSegmentType())) {
String unpLengthField = parseStringFromNextElement();
int unpLength;
try {
unpLength = Integer.parseInt(unpLengthField);
} catch (NumberFormatException e) {
throw new EDISyntaxException("UNP object length must be numeric instead of " + unpLengthField);
}
if (length != unpLength)
throw new EDISyntaxException(ErrorMessages.MISMATCHED_UNP_LENGTH, length, unpLength, getTokenizer());
String unpPackageReference = parseStringFromNextElement();
if (unpPackageReference == null || !unpPackageReference.equals(packageReference))
throw new EDISyntaxException(ErrorMessages.MISMATCHED_PACKAGE_REF, packageReference, unpPackageReference, getTokenizer());
getTokenizer().skipSegment();
} else {
throw new EDISyntaxException(ErrorMessages.MISSING_UNP);
}
getDocumentAttributes().clear();
getDocumentAttributes().addCDATA(getXMLTags().getIdAttribute(), packageReference);
startElement(getXMLTags().getPackageTag(), getDocumentAttributes());
new ContentHandlerBase64Encoder().encode(dataObject, getContentHandler());
endElement(getXMLTags().getPackageTag());
}
/**
* Preview the EDI input before attempting to tokenize it in order to
* discover syntactic details including segment terminator and element
* delimiter. Upon return, the input stream must be re-positioned so that
* the tokenizer can read from the beginning of the interchange.
*
* @throws EDISyntaxException if invalid EDI is detected
* @throws IOException for problem reading EDI data
*/
@Override
public void preview() throws EDISyntaxException, IOException {
char[] buf = getTokenizer().lookahead(128);
if (!(buf[0] == 'U' && buf[1] == 'N')) {
throw new EDISyntaxException(
"EDIFACT interchange must begin with UN");
}
if (isPreviewed()) {
throw new EDISyntaxException(
"Internal error: EDIFACT interchange previewed more than once");
}
// Now we establish subDelimiter, delimiter, release, and terminator.
// If there is a UNA segment, we get the values from that. If there
// is no UNA, then we look to the UNB and use the defaults associated
// with the Syntax Code found there. However, if the release char
// in a UNA is space, then it means "not specified" and that there
// is no release character at all (i.e., no release character
// processing for this interchange). This is not the only
// reasonable interpretation of the EDIFACT standards, but
// one that is commonly used.
//
// So our approach will be to react to a UNA if one is there and
// note which of the 4 attributes are established. If one or more
// of the 4 is not established, then and only then do we shift the
// buffer and look at the UNB to establish those attributes not
// yet established.
boolean subDelimiterDetermined = false;
boolean delimiterDetermined = false;
boolean releaseDetermined = false;
boolean decimalMarkDetermined = false;
boolean terminatorDetermined = false;
boolean terminatorSuffixDetermined = false;
setTerminatorSuffix("");
if (buf[2] == 'A') {
// UNA......
// 012345678
setSubDelimiter(buf[3]);
subDelimiterDetermined = true;
setDelimiter(buf[4]);
delimiterDetermined = true;
setDecimalMark(buf[5]);
decimalMarkDetermined = true;
if (buf[6] == ' ') {
// no release processing
setRelease(-1);
} else {
setRelease(buf[6]);
}
releaseDetermined = true;
if (buf[7] == ' ') {
// no repetition character specified
setRepetitionSeparator('\000');
} else {
setRepetitionSeparator(buf[7]);
}
setTerminator(buf[8]);
terminatorDetermined = true;
terminatorSuffixDetermined = shiftUNBoverUNA(buf);
}
if (releaseDetermined && subDelimiterDetermined && delimiterDetermined
&& terminatorDetermined && terminatorSuffixDetermined) {
// We have everything we need; don't bother looking at UNB.
} else {
previewUNB(buf, delimiterDetermined, subDelimiterDetermined,
decimalMarkDetermined, releaseDetermined,
terminatorDetermined, terminatorSuffixDetermined);
}
setPreviewed(true);
}
/**
* Shift the buffer to look at the UNB. This is a little tricky because we
* don't know exactly how many bytes to shift. We need to find the first U
* soon after the end of the UNA segment. Remember, there might be
* whitespace chars between the terminator and the UNB. Take note of this
* whitespace, saving it as a terminatorSuffix, so that a segment could be
* generated with matching whitespace conventions.
*
* @param buf buffer containing chars to be shifted
* @return true if a terminator suffix was recognized
*/
private boolean shiftUNBoverUNA(char[] buf) {
boolean terminatorSuffixDetermined = false;
int nShift = 9;
for (int j = 9; j < 14; j++) {
// buf[9] is the 1st char after UNA terminator
if (Character.isLetter(buf[j])) {
nShift = j;
break;
}
setTerminatorSuffix(getTerminatorSuffix() + buf[j]);
// trace("...appended buf[" + j + "] to suffix");
terminatorSuffixDetermined = true;
}
//noinspection ManualArrayCopy
for (int j = 0; j < buf.length - nShift; j++) {
buf[j] = buf[j + nShift];
}
// trace("Shifted buffer " + nShift + " chars to examine UNB");
return terminatorSuffixDetermined;
}
private void previewUNB(char[] buf, boolean delimiterDetermined,
boolean subDelimiterDetermined, boolean decimalMarkDetermined, boolean releaseDetermined,
boolean terminatorDetermined, boolean terminatorSuffixDetermined)
throws EDISyntaxException {
// UNB+UNOA...
// 01234567
if (buf[2] != 'B')
throw new EDISyntaxException(
"Required UNB segment not found in EDIFACT interchange");
switch (buf[7]) {
case 'B':
if (!delimiterDetermined && buf[3] == '+') {
// Strange data. It seems that there was no UNA to determine the syntax characters, and
// the UNB segment looked like UNB:UNOB+....
// The B in UNOB says that the syntax characters are supposed to be hex !D, 1F, and 1C,
// but actual delimiter appears to be a +. Which one should we believe?
// Let's believe the actual data for the delimiter, and guess that if the delimiter is a +
// then the other two will be the values that traditionally go with a +. We achieve this
// by simply falling through this case.
} else {
if (!delimiterDetermined) {
setDelimiter('\u001D');
delimiterDetermined = true;
}
if (!subDelimiterDetermined) {
setSubDelimiter('\u001F');
subDelimiterDetermined = true;
}
if (!terminatorDetermined) {
setTerminator('\u001C');
terminatorDetermined = true;
}
setRelease(-1);
releaseDetermined = true;
setRepetitionSeparator('\u0019');
}
// Deliberately fall into the sequence below
case 'A':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
if (!delimiterDetermined)
setDelimiter('+');
if (buf[3] != getDelimiter())
throw new EDISyntaxException(
"Expected data element separator after UNB segment tag");
if (!terminatorDetermined)
setTerminator('\'');
if (!subDelimiterDetermined)
setSubDelimiter(':');
if (!decimalMarkDetermined)
setDecimalMark('.');
if (!releaseDetermined)
setRelease('?');
break;
default:
throw new EDISyntaxException(
"Unknown Syntax Identifier in UNB segment: "
+ new String(buf, 4, 4));
}
if (!terminatorSuffixDetermined)
// We still have not observed a terminator suffix
// following the first terminator in the interchange.
// Therefore, we must scan the buffer until we see the
// segment terminator, and then note suffix characters
// following.
setTerminatorSuffix(scanForSuffix(buf, 3));
}
protected String scanForSuffix(char[] buffer, int index) {
StringBuilder suffix = new StringBuilder("");
for (int i = index; i < buffer.length; i++) {
if (buffer[i] == getTerminator()) {
for (int j = 1; j < 3; j++) {
i++;
if (i < buffer.length && !Character.isLetter(buffer[i])) {
suffix.append(buffer[i]);
}
}
break;
}
}
return suffix.toString();
}
public boolean isUNA() {
return witnessedUNA;
}
}