/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.igfs.mapreduce.records;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import org.apache.ignite.IgniteException;
import org.apache.ignite.IgniteFileSystem;
import org.apache.ignite.igfs.IgfsInputStream;
import org.apache.ignite.igfs.mapreduce.IgfsFileRange;
import org.apache.ignite.igfs.mapreduce.IgfsRecordResolver;
import org.apache.ignite.internal.util.tostring.GridToStringExclude;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.internal.S;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.lang.IgniteBiTuple;
import org.jetbrains.annotations.Nullable;
/**
* Record resolver which adjusts records based on provided delimiters. Both start position and length are
* shifted to the right, based on delimiter positions.
* <p>
* Note that you can use {@link IgfsStringDelimiterRecordResolver} if your delimiter is a plain string.
*/
public class IgfsByteDelimiterRecordResolver implements IgfsRecordResolver, Externalizable {
/** */
private static final long serialVersionUID = 0L;
/** Delimiters. */
private byte[][] delims;
/** Maximum delimiter length. */
@GridToStringExclude
private int maxDelimLen;
/**
* Empty constructor required for {@link Externalizable} support.
*/
public IgfsByteDelimiterRecordResolver() {
// No-op.
}
/**
* Creates delimiter-based record resolver.
*
* @param delims Delimiters.
*/
public IgfsByteDelimiterRecordResolver(byte[]... delims) {
if (delims == null || delims.length == 0)
throw new IllegalArgumentException("Delimiters cannot be null or empty.");
this.delims = delims;
int maxDelimLen = 0;
for (byte[] delim : delims) {
if (delim == null)
throw new IllegalArgumentException("Delimiter cannot be null.");
else if (maxDelimLen < delim.length)
maxDelimLen = delim.length;
}
this.maxDelimLen = maxDelimLen;
}
/** {@inheritDoc} */
@Override public IgfsFileRange resolveRecords(IgniteFileSystem fs, IgfsInputStream stream,
IgfsFileRange suggestedRecord) throws IgniteException, IOException {
long suggestedStart = suggestedRecord.start();
long suggestedEnd = suggestedStart + suggestedRecord.length();
IgniteBiTuple<State, Delimiter> firstDelim = findFirstDelimiter(stream, suggestedStart);
State state = firstDelim != null ? firstDelim.getKey() : new State();
Delimiter curDelim = firstDelim.getValue();
while (curDelim != null && curDelim.end < suggestedStart)
curDelim = nextDelimiter(stream, state);
if (curDelim != null && (curDelim.end >= suggestedStart && curDelim.end < suggestedEnd) ||
suggestedStart == 0 ) {
// We found start delimiter.
long start = suggestedStart == 0 ? 0 : curDelim.end;
if (curDelim == null || curDelim.end < suggestedEnd) {
IgniteBiTuple<State, Delimiter> lastDelim = findFirstDelimiter(stream, suggestedEnd);
state = lastDelim != null ? firstDelim.getKey() : new State();
curDelim = lastDelim.getValue();
while (curDelim != null && curDelim.end < suggestedEnd)
curDelim = nextDelimiter(stream, state);
}
long end = curDelim != null ? curDelim.end : stream.position();
return new IgfsFileRange(suggestedRecord.path(), start, end - start);
}
else
// We failed to find any delimiters up to the EOS.
return null;
}
/**
* Calculate maximum delimiters length.
*
* @param delims Delimiters.
* @return Maximum delimiter length.
*/
private int maxDelimiterLength(byte[][] delims) {
int maxDelimLen = 0;
for (byte[] delim : delims) {
if (delim == null)
throw new IllegalArgumentException("Delimiter cannot be null.");
else if (maxDelimLen < delim.length)
maxDelimLen = delim.length;
}
return maxDelimLen;
}
/**
* Find first delimiter. In order to achieve this we have to rewind the stream until we find the delimiter
* which stands at least [maxDelimLen] from the start search position or until we faced stream start.
* Otherwise we cannot be sure that delimiter position is determined correctly.
*
* @param stream IGFS input stream.
* @param startPos Start search position.
* @return The first found delimiter.
* @throws IOException In case of IO exception.
*/
@Nullable private IgniteBiTuple<State, Delimiter> findFirstDelimiter(IgfsInputStream stream, long startPos)
throws IOException {
State state;
Delimiter delim;
long curPos = Math.max(0, startPos - maxDelimLen);
while (true) {
stream.seek(curPos);
state = new State();
delim = nextDelimiter(stream, state);
if (curPos == 0 || delim == null || delim.start - curPos > maxDelimLen - 1)
break;
else
curPos = Math.max(0, curPos - maxDelimLen);
}
return F.t(state, delim);
}
/**
* Resolve next delimiter.
*
* @param is IGFS input stream.
* @param state Current state.
* @return Next delimiter and updated map.
* @throws IOException In case of exception.
*/
private Delimiter nextDelimiter(IgfsInputStream is, State state) throws IOException {
assert is != null;
assert state != null;
Map<Integer, Integer> parts = state.parts;
LinkedList<Delimiter> delimQueue = state.delims;
int nextByte = is.read();
while (nextByte != -1) {
// Process read byte.
for (int idx = 0; idx < delims.length; idx++) {
byte[] delim = delims[idx];
int val = parts.containsKey(idx) ? parts.get(idx) : 0;
if (delim[val] == nextByte) {
if (val == delim.length - 1) {
// Full delimiter is found.
parts.remove(idx);
Delimiter newDelim = new Delimiter(is.position() - delim.length, is.position());
// Read queue from the end looking for the "inner" delimiters.
boolean ignore = false;
int replaceIdx = -1;
for (int i = delimQueue.size() - 1; i >= 0; i--) {
Delimiter prevDelim = delimQueue.get(i);
if (prevDelim.start < newDelim.start) {
if (prevDelim.end > newDelim.start) {
// Ignore this delimiter.
ignore = true;
break;
}
}
else if (prevDelim.start == newDelim.start) {
// Ok, we found matching delimiter.
replaceIdx = i;
break;
}
}
if (!ignore) {
if (replaceIdx >= 0)
delimQueue.removeAll(delimQueue.subList(replaceIdx, delimQueue.size()));
delimQueue.add(newDelim);
}
}
else
parts.put(idx, ++val);
}
else if (val != 0) {
if (delim[0] == nextByte) {
boolean shift = true;
for (int k = 1; k < val; k++) {
if (delim[k] != nextByte) {
shift = false;
break;
}
}
if (!shift)
parts.put(idx, 1);
}
else
// Delimiter sequence is totally broken.
parts.remove(idx);
}
}
// Check whether we can be sure that the first delimiter will not change.
if (!delimQueue.isEmpty()) {
Delimiter delim = delimQueue.get(0);
if (is.position() - delim.end >= maxDelimLen)
return delimQueue.poll();
}
nextByte = is.read();
}
return delimQueue.poll();
}
/** {@inheritDoc} */
@Override public String toString() {
return S.toString(IgfsByteDelimiterRecordResolver.class, this);
}
/** {@inheritDoc} */
@Override public void writeExternal(ObjectOutput out) throws IOException {
if (delims != null) {
out.writeBoolean(true);
out.writeInt(delims.length);
for (byte[] delim : delims)
U.writeByteArray(out, delim);
}
else
out.writeBoolean(false);
}
/** {@inheritDoc} */
@Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
if (in.readBoolean()) {
int len = in.readInt();
delims = new byte[len][];
for (int i = 0; i < len; i++)
delims[i] = U.readByteArray(in);
maxDelimLen = maxDelimiterLength(delims);
}
}
/**
* Delimiter descriptor.
*/
private static class Delimiter {
/** Delimiter start position. */
private final long start;
/** Delimiter end position. */
private final long end;
/**
* Constructor.
*
* @param start Delimiter start position.
* @param end Delimiter end position.
*/
private Delimiter(long start, long end) {
assert start >= 0 && end >= 0 && start <= end;
this.start = start;
this.end = end;
}
}
/**
* Current resolution state.
*/
private static class State {
/** Partially resolved delimiters. */
private final Map<Integer, Integer> parts;
/** Resolved delimiters which could potentially be merged. */
private final LinkedList<Delimiter> delims;
/**
* Constructor.
*/
private State() {
parts = new HashMap<>();
delims = new LinkedList<>();
}
}
}