Joining.java example

Explorer
streamex-master
- src
  - main
    - java
      - one
        util
        streamex
        AbstractStreamEx.java
        BaseStreamEx.java
        CharSpliterator.java
        CollapseSpliterator.java
        ConstSpliterator.java
        CrossSpliterator.java
        DistinctSpliterator.java
        DoubleCollector.java
        DoubleStreamEx.java
        EmitterSpliterator.java
        EntryStream.java
        HeadTailSpliterator.java
        IntCollector.java
        IntStreamEx.java
        Java9Specific.java
        Joining.java
        Limiter.java
        LongCollector.java
        LongStreamEx.java
        MergingCollector.java
        MoreCollectors.java
        OrderedCancellableSpliterator.java
        PairPermutationSpliterator.java
        PairSpliterator.java
        PermutationSpliterator.java
        PrefixOps.java
        PrependSpliterator.java
        RangeBasedSpliterator.java
        StreamContext.java
        StreamEx.java
        StreamExInternals.java
        TailConcatSpliterator.java
        TakeDrop.java
        TreeSpliterator.java
        UnknownSizeSpliterator.java
        UnorderedCancellableSpliterator.java
        VersionSpecific.java
        WithFirstSpliterator.java
        ZipSpliterator.java
        package-info.java
  - test
    - java
      - one
        util
        streamex
        AverageLongTest.java
        BaseStreamExTest.java
        CharSpliteratorTest.java
        CollapseSpliteratorTest.java
        ConstSpliteratorTest.java
        CrossSpliteratorTest.java
        CustomPoolTest.java
        DistinctSpliteratorTest.java
        DoubleCollectorTest.java
        DoubleStreamExTest.java
        EmitterTest.java
        EntryStreamTest.java
        IntCollectorTest.java
        IntStreamExTest.java
        InternalsTest.java
        JoiningTest.java
        LimiterTest.java
        LongCollectorTest.java
        LongStreamExTest.java
        MoreCollectorsTest.java
        OrderedCancellableSpliteratorTest.java
        PairPermutationSpliteratorTest.java
        PairSpliteratorTest.java
        PermutationSpliteratorTest.java
        PrependSpliteratorTest.java
        RangeBasedSpliteratorTest.java
        StreamExHeadTailTest.java
        StreamExTest.java
        TailConcatSpliteratorTest.java
        TestHelpers.java
        TreeSpliteratorTest.java
        UnknownSizeSpliteratorTest.java
        UnorderedCancellableSpliteratorTest.java
        WithFirstSpliteratorTest.java
        ZipSpliteratorTest.java
        api
        StreamExApiTest.java
/*
 * Copyright 2015, 2016 Tagir Valeev
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package one.util.streamex;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Collectors;

import static one.util.streamex.StreamExInternals.*;

/**
 * An advanced implementation of joining {@link Collector}. This collector is
 * capable to join the input {@code CharSequence} elements with given delimiter
 * optionally wrapping into given prefix and suffix and optionally limiting the
 * length of the resulting string (in Unicode code units, code points or
 * grapheme clusters) adding the specified ellipsis sequence. This collector
 * supersedes the standard JDK
 * {@link Collectors#joining(CharSequence, CharSequence, CharSequence)}
 * collectors family.
 * 
 * <p>
 * This collector is <a
 * href="package-summary.html#ShortCircuitReduction">short-circuiting</a> when
 * the string length is limited in either of ways. Otherwise it's not
 * short-circuiting.
 * 
 * <p>
 * Every specific collector represented by this class is immutable, so you can
 * share it. A bunch of methods is provided to create a new collector based on
 * this one.
 * 
 * <p>
 * To create {@code Joining} collector use {@link #with(CharSequence)} static
 * method and specify the delimiter. For further setup use specific instance
 * methods which return new {@code Joining} objects like this:
 * 
 * <pre>{@code
 * StreamEx.of(source).collect(Joining.with(", ").wrap("[", "]")
 *         .maxCodePoints(100).cutAtWord());
 * }</pre>
 * 
 * <p>
 * The intermediate accumulation type of this collector is the implementation
 * detail and not exposed to the API. If you want to cast it to
 * {@code Collector} type, use ? as accumulator type variable:
 * 
 * <pre>{@code
 * Collector<CharSequence, ?, String> joining = Joining.with(", ");
 * }</pre>
 * 
 * @author Tagir Valeev
 * @since 0.4.1
 */
public class Joining extends CancellableCollector<CharSequence, Joining.Accumulator, String> {
    static final class Accumulator {
        List<CharSequence> data = new ArrayList<>();
        int chars = 0, count = 0;
    }

    private static final int CUT_ANYWHERE = 0;
    private static final int CUT_CODEPOINT = 1;
    private static final int CUT_GRAPHEME = 2;
    private static final int CUT_WORD = 3;
    private static final int CUT_BEFORE_DELIMITER = 4;
    private static final int CUT_AFTER_DELIMITER = 5;

    private static final int LENGTH_CHARS = 0;
    private static final int LENGTH_CODEPOINTS = 1;
    private static final int LENGTH_GRAPHEMES = 2;

    private final String delimiter, ellipsis, prefix, suffix;
    private final int cutStrategy, lenStrategy, maxLength;
    private int limit, delimCount = -1;

    private Joining(String delimiter, String ellipsis, String prefix, String suffix, int cutStrategy, int lenStrategy,
            int maxLength) {
        this.delimiter = delimiter;
        this.ellipsis = ellipsis;
        this.prefix = prefix;
        this.suffix = suffix;
        this.cutStrategy = cutStrategy;
        this.lenStrategy = lenStrategy;
        this.maxLength = maxLength;
    }

    private void init() {
        if (delimCount == -1) {
            limit = maxLength - length(prefix) - length(suffix);
            delimCount = length(delimiter);
        }
    }

    private int length(CharSequence s) {
        switch (lenStrategy) {
        case LENGTH_CHARS:
            return s.length();
        case LENGTH_CODEPOINTS:
            if (s instanceof String)
                return ((String) s).codePointCount(0, s.length());
            return (int) s.codePoints().count();
        case LENGTH_GRAPHEMES:
            BreakIterator bi = BreakIterator.getCharacterInstance();
            bi.setText(s.toString());
            int count = 0;
            for (int end = bi.next(); end != BreakIterator.DONE; end = bi.next())
                count++;
            return count;
        default:
            throw new InternalError();
        }
    }

    private static int copy(char[] buf, int pos, String str) {
        str.getChars(0, str.length(), buf, pos);
        return pos + str.length();
    }

    private int copyCut(char[] buf, int pos, String str, int limit, int cutStrategy) {
        if (limit <= 0)
            return pos;
        int endPos = str.length();
        switch (lenStrategy) {
        case LENGTH_CHARS:
            if (limit < str.length())
                endPos = limit;
            break;
        case LENGTH_CODEPOINTS:
            if (limit < str.codePointCount(0, str.length()))
                endPos = str.offsetByCodePoints(0, limit);
            break;
        case LENGTH_GRAPHEMES:
            BreakIterator bi = BreakIterator.getCharacterInstance();
            bi.setText(str);
            int count = limit, end;
            while (true) {
                end = bi.next();
                if (end == BreakIterator.DONE)
                    break;
                if (--count == 0) {
                    endPos = end;
                    break;
                }
            }
            break;
        default:
            throw new InternalError();
        }
        if (endPos < str.length()) {
            BreakIterator bi;
            switch (cutStrategy) {
            case CUT_BEFORE_DELIMITER:
            case CUT_AFTER_DELIMITER:
                endPos = 0;
                break;
            case CUT_WORD:
                bi = BreakIterator.getWordInstance();
                bi.setText(str);
                endPos = bi.preceding(endPos + 1);
                break;
            case CUT_GRAPHEME:
                bi = BreakIterator.getCharacterInstance();
                bi.setText(str);
                endPos = bi.preceding(endPos + 1);
                break;
            case CUT_ANYWHERE:
                break;
            case CUT_CODEPOINT:
                if (Character.isHighSurrogate(str.charAt(endPos - 1)) && Character.isLowSurrogate(str.charAt(endPos)))
                    endPos--;
                break;
            default:
                throw new InternalError();
            }
        }
        str.getChars(0, endPos, buf, pos);
        return pos + endPos;
    }

    private String finisherNoOverflow(Accumulator acc) {
        char[] buf = new char[acc.chars + prefix.length() + suffix.length()];
        int size = acc.data.size();
        int pos = copy(buf, 0, prefix);
        for (int i = 0; i < size; i++) {
            if (i > 0) {
                pos = copy(buf, pos, delimiter);
            }
            pos = copy(buf, pos, acc.data.get(i).toString());
        }
        copy(buf, pos, suffix);
        return new String(buf);
    }

    private Joining withLimit(int lenStrategy, int maxLength) {
        if (maxLength < 0)
            throw new IllegalArgumentException(maxLength + ": must be positive");
        return new Joining(delimiter, ellipsis, prefix, suffix, cutStrategy, lenStrategy, maxLength);
    }

    private Joining withCut(int cutStrategy) {
        return new Joining(delimiter, ellipsis, prefix, suffix, cutStrategy, lenStrategy, maxLength);
    }

    /**
     * Returns a {@code Collector} that concatenates the input elements,
     * separated by the specified delimiter, in encounter order.
     * 
     * <p>
     * This collector is similar to {@link Collectors#joining(CharSequence)},
     * but can be further set up in a flexible way, for example, specifying the
     * maximal allowed length of the resulting {@code String}.
     *
     * @param delimiter the delimiter to be used between each element
     * @return A {@code Collector} which concatenates CharSequence elements,
     *         separated by the specified delimiter, in encounter order
     * @see Collectors#joining(CharSequence)
     */
    public static Joining with(CharSequence delimiter) {
        return new Joining(delimiter.toString(), "...", "", "", CUT_GRAPHEME, LENGTH_CHARS, -1);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but
     * additionally wraps the result with the specified prefix and suffix.
     * 
     * <p>
     * The collector returned by
     * {@code Joining.with(delimiter).wrap(prefix, suffix)} is equivalent to
     * {@link Collectors#joining(CharSequence, CharSequence, CharSequence)}, but
     * can be further set up in a flexible way, for example, specifying the
     * maximal allowed length of the resulting {@code String}.
     * 
     * <p>
     * If length limit is specified for the collector, the prefix length and the
     * suffix length are also counted towards this limit. If the length of the
     * prefix and the suffix exceed the limit, the resulting collector will not
     * accumulate any elements and produce the same output. For example,
     * {@code stream.collect(Joining.with(",").wrap("prefix", "suffix").maxChars(9))}
     * will produce {@code "prefixsuf"} string regardless of the input stream
     * content.
     * 
     * <p>
     * You may wrap several times:
     * {@code Joining.with(",").wrap("[", "]").wrap("(", ")")} is equivalent to
     * {@code Joining.with(",").wrap("([", "])")}.
     * 
     * @param prefix the sequence of characters to be used at the beginning of
     *        the joined result
     * @param suffix the sequence of characters to be used at the end of the
     *        joined result
     * @return a new {@code Collector} which wraps the result with the specified
     *         prefix and suffix.
     */
    public Joining wrap(CharSequence prefix, CharSequence suffix) {
        return new Joining(delimiter, ellipsis, prefix.toString().concat(this.prefix), this.suffix.concat(suffix
                .toString()), cutStrategy, lenStrategy, maxLength);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but uses
     * the specified ellipsis {@code CharSequence} instead of default
     * {@code "..."} when the string limit (if specified) is reached.
     * 
     * @param ellipsis the sequence of characters to be used at the end of the
     *        joined result to designate that not all of the input elements are
     *        joined due to the specified string length restriction.
     * @return a new {@code Collector} which will use the specified ellipsis
     *         instead of current setting.
     */
    public Joining ellipsis(CharSequence ellipsis) {
        return new Joining(delimiter, ellipsis.toString(), prefix, suffix, cutStrategy, lenStrategy, maxLength);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but sets
     * the maximal length of the resulting string to the specified number of
     * UTF-16 characters (or Unicode code units). This setting overwrites any
     * limit previously set by {@code maxChars(int)},
     * {@link #maxCodePoints(int)} or {@link #maxGraphemes(int)} call.
     * 
     * <p>
     * The {@code String} produced by the resulting collector is guaranteed to
     * have {@link String#length() length} which does not exceed the specified
     * limit. An ellipsis sequence (by default {@code "..."}) is used to
     * designate whether the limit was reached. Use
     * {@link #ellipsis(CharSequence)} to set custom ellipsis sequence.
     * 
     * <p>
     * The collector returned by this method is <a
     * href="package-summary.html#ShortCircuitReduction">short-circuiting</a>:
     * it may not process all the input elements if the limit is reached.
     * 
     * @param limit the maximal number of UTF-16 characters in the resulting
     *        String.
     * @return a new {@code Collector} which will produce String no longer than
     *         given limit.
     */
    public Joining maxChars(int limit) {
        return withLimit(LENGTH_CHARS, limit);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but sets
     * the maximal number of Unicode code points of the resulting string. This
     * setting overwrites any limit previously set by {@link #maxChars(int)},
     * {@code maxCodePoints(int)} or {@link #maxGraphemes(int)} call.
     * 
     * <p>
     * The {@code String} produced by the resulting collector is guaranteed to
     * have no more code points than the specified limit. An ellipsis sequence
     * (by default {@code "..."}) is used to designate whether the limit was
     * reached. Use {@link #ellipsis(CharSequence)} to set custom ellipsis
     * sequence.
     * 
     * <p>
     * The collector returned by this method is <a
     * href="package-summary.html#ShortCircuitReduction">short-circuiting</a>:
     * it may not process all the input elements if the limit is reached.
     * 
     * @param limit the maximal number of code points in the resulting String.
     * @return a new {@code Collector} which will produce String no longer than
     *         given limit.
     */
    public Joining maxCodePoints(int limit) {
        return withLimit(LENGTH_CODEPOINTS, limit);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but sets
     * the maximal number of grapheme clusters. This setting overwrites any
     * limit previously set by {@link #maxChars(int)},
     * {@link #maxCodePoints(int)} or {@code maxGraphemes(int)} call.
     * 
     * <p>
     * The grapheme cluster is defined in <a
     * href="http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries"
     * >Unicode Text Segmentation</a> technical report. Basically, it counts
     * base character and the following combining characters as single object.
     * The {@code String} produced by the resulting collector is guaranteed to
     * have no more grapheme clusters than the specified limit. An ellipsis
     * sequence (by default {@code "..."}) is used to designate whether the
     * limit was reached. Use {@link #ellipsis(CharSequence)} to set custom
     * ellipsis sequence.
     * 
     * <p>
     * The collector returned by this method is <a
     * href="package-summary.html#ShortCircuitReduction">short-circuiting</a>:
     * it may not process all the input elements if the limit is reached.
     * 
     * @param limit the maximal number of grapheme clusters in the resulting
     *        String.
     * @return a new {@code Collector} which will produce String no longer than
     *         given limit.
     */
    public Joining maxGraphemes(int limit) {
        return withLimit(LENGTH_GRAPHEMES, limit);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but cuts
     * the resulting string at any point when limit is reached.
     * 
     * <p>
     * The resulting collector will produce {@code String} which length is
     * exactly equal to the specified limit if the limit is reached. If used
     * with {@link #maxChars(int)}, the resulting string may be cut in the
     * middle of surrogate pair.
     * 
     * @return a new {@code Collector} which cuts the resulting string at any
     *         point when limit is reached.
     */
    public Joining cutAnywhere() {
        return withCut(CUT_ANYWHERE);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but cuts
     * the resulting string between any code points when limit is reached.
     * 
     * <p>
     * The resulting collector will not split the surrogate pair when used with
     * {@link #maxChars(int)} or {@link #maxCodePoints(int)}. However it may
     * remove the combining character which may result in incorrect rendering of
     * the last displayed grapheme.
     * 
     * @return a new {@code Collector} which cuts the resulting string between
     *         code points.
     */
    public Joining cutAtCodePoint() {
        return withCut(CUT_CODEPOINT);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but cuts
     * the resulting string at grapheme cluster boundary when limit is reached.
     * This is the default behavior.
     * 
     * <p>
     * The grapheme cluster is defined in <a
     * href="http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries"
     * >Unicode Text Segmentation</a> technical report. Thus the resulting
     * collector will not split the surrogate pair and will preserve any
     * combining characters or remove them with the base character.
     * 
     * @return a new {@code Collector} which cuts the resulting string at
     *         grapheme cluster boundary.
     */
    public Joining cutAtGrapheme() {
        return withCut(CUT_GRAPHEME);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but cuts
     * the resulting string at word boundary when limit is reached.
     * 
     * <p>
     * The beginning and end of every input stream element or delimiter is
     * always considered as word boundary, so the stream of
     * {@code "one", "two three"} collected with
     * {@code Joining.with("").maxChars(n).ellipsis("").cutAtWord()} may produce
     * the following strings depending on {@code n}:
     * 
     * <pre>{@code
     * ""
     * "one"
     * "onetwo"
     * "onetwo "
     * "onetwo three"
     * }</pre>
     * 
     * @return a new {@code Collector} which cuts the resulting string at word
     *         boundary.
     */
    public Joining cutAtWord() {
        return withCut(CUT_WORD);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but cuts
     * the resulting string before the delimiter when limit is reached.
     * 
     * @return a new {@code Collector} which cuts the resulting string at before
     *         the delimiter.
     */
    public Joining cutBeforeDelimiter() {
        return withCut(CUT_BEFORE_DELIMITER);
    }

    /**
     * Returns a {@code Collector} which behaves like this collector, but cuts
     * the resulting string after the delimiter when limit is reached.
     * 
     * @return a new {@code Collector} which cuts the resulting string at after
     *         the delimiter.
     */
    public Joining cutAfterDelimiter() {
        return withCut(CUT_AFTER_DELIMITER);
    }

    @Override
    public Supplier<Accumulator> supplier() {
        return Accumulator::new;
    }

    @Override
    public BiConsumer<Accumulator, CharSequence> accumulator() {
        if (maxLength == -1)
            return (acc, str) -> {
                if (!acc.data.isEmpty())
                    acc.chars += delimiter.length();
                acc.chars += str.length();
                acc.data.add(str);
            };
        init();
        return (acc, str) -> {
            if (acc.count <= limit) {
                if (!acc.data.isEmpty()) {
                    acc.chars += delimiter.length();
                    acc.count += delimCount;
                }
                acc.chars += str.length();
                acc.count += length(str);
                acc.data.add(str);
            }
        };
    }

    @Override
    public BinaryOperator<Accumulator> combiner() {
        if (maxLength == -1)
            return (acc1, acc2) -> {
                if (acc1.data.isEmpty())
                    return acc2;
                if (acc2.data.isEmpty())
                    return acc1;
                acc1.chars += delimiter.length() + acc2.chars;
                acc1.data.addAll(acc2.data);
                return acc1;
            };
        init();
        BiConsumer<Accumulator, CharSequence> accumulator = accumulator();
        return (acc1, acc2) -> {
            if (acc1.data.isEmpty())
                return acc2;
            if (acc2.data.isEmpty())
                return acc1;
            int len = acc1.count + acc2.count + delimCount;
            if (len <= limit) {
                acc1.count = len;
                acc1.chars += delimiter.length() + acc2.chars;
                acc1.data.addAll(acc2.data);
            } else {
                for (CharSequence s : acc2.data) {
                    if (acc1.count > limit)
                        break;
                    accumulator.accept(acc1, s);
                }
            }
            return acc1;
        };
    }

    @Override
    public Function<Accumulator, String> finisher() {
        if (maxLength == -1) {
            return this::finisherNoOverflow;
        }
        init();
        if (limit <= 0) {
            char[] buf = new char[prefix.length() + suffix.length()];
            int pos = copyCut(buf, 0, prefix, maxLength, cutStrategy);
            pos = copyCut(buf, pos, suffix, maxLength - length(prefix), cutStrategy);
            String result = new String(buf, 0, pos);
            return acc -> result;
        }
        return acc -> {
            if (acc.count <= limit)
                return finisherNoOverflow(acc);
            char[] buf = new char[acc.chars + prefix.length() + suffix.length()];
            int size = acc.data.size();
            int pos = copy(buf, 0, prefix);
            int ellipsisCount = length(ellipsis);
            int rest = limit - ellipsisCount;
            if (rest < 0) {
                pos = copyCut(buf, pos, ellipsis, limit, CUT_ANYWHERE);
            } else {
                for (int i = 0; i < size; i++) {
                    String s = acc.data.get(i).toString();
                    int count = length(s);
                    if (i > 0) {
                        if (cutStrategy == CUT_BEFORE_DELIMITER && delimCount + count > rest) {
                            break;
                        }
                        if (delimCount > rest) {
                            pos = copyCut(buf, pos, delimiter, rest, cutStrategy);
                            break;
                        }
                        rest -= delimCount;
                        pos = copy(buf, pos, delimiter);
                    }
                    if (cutStrategy == CUT_AFTER_DELIMITER && delimCount + count > rest) {
                        break;
                    }
                    if (count > rest) {
                        pos = copyCut(buf, pos, s, rest, cutStrategy);
                        break;
                    }
                    pos = copy(buf, pos, s);
                    rest -= count;
                }
                pos = copy(buf, pos, ellipsis);
            }
            pos = copy(buf, pos, suffix);
            return new String(buf, 0, pos);
        };
    }

    @Override
    public Set<Characteristics> characteristics() {
        init();
        if (limit <= 0)
            return Collections.singleton(Characteristics.UNORDERED);
        return Collections.emptySet();
    }

    @Override
    Predicate<Accumulator> finished() {
        if (maxLength == -1)
            return null;
        init();
        if (limit <= 0)
            return acc -> true;
        return acc -> acc.count > limit;
    }
}