/** * Copyright Intellectual Reserve, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.gedcomx.util; import com.fasterxml.jackson.databind.ObjectMapper; import org.gedcomx.Gedcomx; import org.gedcomx.rt.json.GedcomJacksonModule; import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.zip.GZIPInputStream; /** * Class for iterating through the 'record' elements (GedcomX documents) in a RecordSet one at a time * from a stream (e.g., a gzipped byte array) without having to inflate all the records at once. * This reads JSON-formatted Records. * * User: Brent Hale * Date: 6/3/2015 */ public class JsonRecordSetIterator implements RecordSetIterator { private InputStream inputStream; private Gedcomx nextRecord; private Gedcomx metadata; private ObjectMapper objectMapper; private String id; private boolean noMoreRecords = false; /** * Constructor for a record iterator that takes a filename of a RecordSet file and iterates through its record elements. * @param filename - Filename to read a GedcomX RecordSet file from. * @throws IOException If there's an I/O problem. */ public JsonRecordSetIterator(String filename) throws IOException { this(new FileInputStream(filename), filename.toLowerCase().endsWith(".gz")); } public JsonRecordSetIterator(InputStream inputStream, boolean isGzipped) throws IOException { this(isGzipped ? new GZIPInputStream(inputStream) : inputStream); } /** * Constructor for a record iterator that takes an InputStream of a RecordSet file and * iterates through its record elements. This creates a BufferedInputStream on the * InputStream. * * @param inputStream - InputStream to read a GedcomX RecordSet file from. * @throws IOException If there's an I/O problem. */ public JsonRecordSetIterator(InputStream inputStream) throws IOException { this.inputStream = new BufferedInputStream(inputStream); objectMapper = GedcomJacksonModule.createObjectMapper(); // Read the beginning object brace { plus label: {"metadata": int character = inputStream.read(); assert character == '{'; // Read until you get to the "records": [ section of the stream noMoreRecords = false; readUntil(inputStream, "records"); // Read the opening array bracket [ readUntilChar(inputStream, '['); prepareNext(); } private void readUntil(InputStream inputStream, String untilLabel) throws IOException { // Read until we see the first label (meaning opening quote String name; while (! (name = getName(inputStream)).equals(untilLabel)) { if (name.equals(untilLabel)) { break; } // Otherwise look for some other objects. if (name.equals("metadata")) { readMetadata(inputStream); } else if (name.equals("id")) { id = getName(inputStream, false); } } } private void readMetadata(InputStream inputStream) throws IOException { // I have to do it this way since if I pass the inputStream to objectMapper.readValue() it leaves // the inputStream past the end of the actual metadata object. Then the next getName() is lost. byte[] object = getObjectAsBytes(inputStream); metadata = objectMapper.readValue(object, Gedcomx.class); } /** * Read in the current Json object from the stream. It will read the opening brace til the end brace * and return that in a string. If the first character is a comma then it will ignore it. */ private byte[] getObjectAsBytes(InputStream inputStream) throws IOException { int character; ByteArrayOutputStream bos = new ByteArrayOutputStream(); int openingBraces = 0; int closingBraces = 0; boolean firstTime = true; while ((character = inputStream.read()) >= 0) { if (character == ',') { if (firstTime) { firstTime = false; continue; // Ignore comma if it is the first read character. } } firstTime = false; bos.write(character); if (character == '{') { openingBraces++; } else if (character == '}') { closingBraces++; } if ((openingBraces > 0) && (openingBraces == closingBraces)) { break; } } return bos.toByteArray(); } /** * Read from the stream until it finds an opening quote. Then read the characters until the next quote mark * as the name. * This will also consume the trailing colon character (:). */ private String getName(InputStream inputStream) throws IOException { return getName(inputStream, true); } private String getName(InputStream inputStream, boolean consumeColon) throws IOException { StringBuilder name = new StringBuilder(); readUntilChar(inputStream, '"'); int character; while ((character = inputStream.read()) != '"') { name.append((char)character); } if (consumeColon) { readUntilChar(inputStream, ':'); } return name.toString(); } private void readUntilChar(InputStream inputStream, char c) throws IOException { int character; while ((character = inputStream.read()) != c) { if (character < 0) { close(); break; // End of file. } } } /** * Tell whether the RecordIterator has another GedcomX record to return. * @return true if there is another record to read; false otherwise. */ @Override synchronized public boolean hasNext() { return nextRecord != null; } /** * Prepare the next record to be retrieved. Sets 'nextRecord' to the parsed record, if any, or null * if there are no more. Consumes bytes from the xmlStreamReader. * This does not close the inputStream once there are no more records to read. The metadata * may be after the Records. */ synchronized private void prepareNext() throws IOException { // I have to do it this way since if I pass the inputStream to objectMapper.readValue() it leaves // the inputStream past the end of the actual record object. Then the next get is lost. if (noMoreRecords) { nextRecord = null; return; } byte[] object = getObjectAsBytes(inputStream); nextRecord = objectMapper.readValue(object, Gedcomx.class); // Need to read past the next comma separating records. // We also might see the end of the array bracket ]. int character; while ((character = inputStream.read()) != ',') { if (character == ']') { noMoreRecords = true; break; } } } @Override synchronized public Gedcomx next() { try { if (nextRecord == null) { return null; } Gedcomx record = nextRecord; prepareNext(); return record; } catch (Exception e) { throw new RuntimeException(e); } } /** * This should be read last as you cannot be sure of when the metadata will appear in the data stream. * * @return The Metadata document. */ synchronized public Gedcomx getMetadata() { if (metadata == null) { try { readUntil(inputStream, JsonRecordSetWriter.METADATA_STR); readMetadata(inputStream); } catch (IOException e) { // Do nothing. } } return metadata; } @Override public void remove() { throw new UnsupportedOperationException(); } /** * Close the input stream and accompanying reader if they are still open. * If you want to get the metadata and id of the RecordSet, then get them before you close(). */ public void close() { if (inputStream != null) { try { inputStream.close(); } catch (IOException e) { inputStream = null; } finally { inputStream = null; } } } public String getId() throws IOException { if (id == null) { readUntil(inputStream, JsonRecordSetWriter.ID_STR); id = getName(inputStream, false); } return id; } }