/* * Copyright 2016 Christoph Böhme * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.culturegraph.mf.biblio.iso2709; import static org.culturegraph.mf.biblio.iso2709.Iso2709Constants.FIELD_SEPARATOR; import static org.culturegraph.mf.biblio.iso2709.Iso2709Constants.IDENTIFIER_MARKER; import static org.culturegraph.mf.biblio.iso2709.Iso2709Constants.MIN_BASE_ADDRESS; import static org.culturegraph.mf.biblio.iso2709.Iso2709Constants.MIN_RECORD_LENGTH; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import org.culturegraph.mf.commons.Require; import org.culturegraph.mf.framework.FormatException; /** * Reads a record in ISO 2709:2008 format from a byte array. * * @author Christoph Böhme */ public final class Record { private static final int RECORD_ID_MISSING = -1; private static final char[] EMPTY_IDENTIFIER = new char[0]; private static final byte[] DATA_SEPARATORS = { FIELD_SEPARATOR, IDENTIFIER_MARKER }; private final Iso646ByteBuffer buffer; private final Label label; private final DirectoryEntry directoryEntry; private final int baseAddress; private final int indicatorLength; private final int identifierLength; private final int recordIdFieldStart; private Charset charset = StandardCharsets.UTF_8; private FieldHandler fieldHandler; /** * Creates an instance of {@code Record} which provides access to the record * stored in the array passed as argument. * * @param recordData a byte array containing a record in ISO 2709:2008 format. */ public Record(final byte[] recordData) { Require.notNull(recordData); checkRecordDataLength(recordData); buffer = new Iso646ByteBuffer(recordData); label = new Label(buffer); baseAddress = label.getBaseAddress(); checkBaseAddress(); directoryEntry = new DirectoryEntry(buffer, label.getRecordFormat(), baseAddress); indicatorLength = label.getIndicatorLength(); identifierLength = label.getIdentifierLength(); recordIdFieldStart = findRecordIdFieldStart(); } private void checkRecordDataLength(final byte[] recordData) { if (recordData.length < MIN_RECORD_LENGTH) { throw new FormatException("record is too short"); } } private void checkBaseAddress() { if (baseAddress < MIN_BASE_ADDRESS || baseAddress > buffer.getLength() - 1) { throw new FormatException("base address is out of range"); } } private int findRecordIdFieldStart() { directoryEntry.rewind(); while (!directoryEntry.endOfDirectoryReached()) { if (directoryEntry.isRecordIdField()) { return directoryEntry.getFieldStart(); } directoryEntry.gotoNext(); } return RECORD_ID_MISSING; } public RecordFormat getRecordFormat() { return label.getRecordFormat(); } public char getRecordStatus() { return label.getRecordStatus(); } public char[] getImplCodes() { return label.getImplCodes(); } public char[] getSystemChars() { return label.getSystemChars(); } public char getReservedChar() { return label.getReservedChar(); } /** * Sets the character encoding used for reading the data values. The encoding * should be set before calling {@link #getRecordId()} or * {@link #processFields(FieldHandler)}. If it is called while fields are * being processed, the new encoding becomes effective on the next invocation * of {@link FieldHandler#data(char[], String)}. * * @param charset the character encoding of the data values */ public void setCharset(final Charset charset) { this.charset = Require.notNull(charset); } /** * Returns the current character encoding used for reading the data values. * The default encoding is UTF-8. * * @return the current character encoding. */ public Charset getCharset() { return charset; } /** * Returns the contents of the record identifier field. The record identifier * field has the tag <i>001</i>. It must be the first field in the record. * <p> * Defined in section 4.5.2 of the ISO 2709:2008 standard. * * @return a string which identifies the record or null if the record has * no record identifier. */ public String getRecordId() { if (recordIdFieldStart == RECORD_ID_MISSING) { return null; } final int dataStart = baseAddress + recordIdFieldStart; final int dataLength = buffer.distanceTo(DATA_SEPARATORS, dataStart); return buffer.stringAt(dataStart, dataLength, charset); } /** * Iterates through all fields in the record and calls the appropriate method * on the supplied {@link FieldHandler} instance. * * @param fieldHandler instance of field handler. Must not be null. */ public void processFields(final FieldHandler fieldHandler) { this.fieldHandler = Require.notNull(fieldHandler); boolean continuedField = false; directoryEntry.rewind(); while (!directoryEntry.endOfDirectoryReached()) { if (continuedField) { fieldHandler.additionalImplDefinedPart( directoryEntry.getImplDefinedPart()); } else { processField(); } continuedField = directoryEntry.isContinuedField(); directoryEntry.gotoNext(); } this.fieldHandler = null; } private void processField() { if (directoryEntry.isReferenceField()) { processReferenceField(); } else { processDataField(); } } private void processReferenceField() { final int fieldStart = baseAddress + directoryEntry.getFieldStart(); final int fieldLength = buffer.distanceTo(FIELD_SEPARATOR, fieldStart); final String value = buffer.stringAt(fieldStart, fieldLength, charset); fieldHandler.referenceField(directoryEntry.getTag(), directoryEntry.getImplDefinedPart(), value); } private void processDataField() { final int fieldStart = baseAddress + directoryEntry.getFieldStart(); final char[] indicators = buffer.charsAt(fieldStart, indicatorLength); fieldHandler.startDataField(directoryEntry.getTag(), directoryEntry.getImplDefinedPart(), indicators); processDataValues(fieldStart + indicatorLength); fieldHandler.endDataField(); } private void processDataValues(final int fromIndex) { int start = fromIndex; while (buffer.byteAt(start) != FIELD_SEPARATOR) { start = processDataValue(start); } } /** * Reads the field value starting at {@code fromIndex} and calls * {@link FieldHandler#data(char[], String)}. * * @param fromIndex index at which the identifier of the field value starts. * @return the index of the end of field marker. This is the position write * after the data field in the buffer. It can be used as the next starting * position when processing multiple subfields. */ private int processDataValue(final int fromIndex) { final char[] identifier = getIdentifier(fromIndex); final int dataStart = fromIndex + identifierLength; final int dataLength = buffer.distanceTo(DATA_SEPARATORS, dataStart); final String data = buffer.stringAt(dataStart, dataLength, charset); fieldHandler.data(identifier, data); return dataStart + dataLength; } private char[] getIdentifier(final int fromIndex) { if (identifierLength > 1) { return buffer.charsAt(fromIndex + 1, identifierLength - 1); } return EMPTY_IDENTIFIER; } }