/*license*\
XBN-Java: Copyright (C) 2014, Jeff Epstein (aliteralmind __DASH__ github __AT__ yahoo __DOT__ com)
This software is dual-licensed under the:
- Lesser General Public License (LGPL) version 3.0 or, at your option, any later version;
- Apache Software License (ASL) version 2.0.
Either license may be applied at your discretion. More information may be found at
- http://en.wikipedia.org/wiki/Multi-licensing.
The text of both licenses is available in the root directory of this project, under the names "LICENSE_lgpl-3.0.txt" and "LICENSE_asl-2.0.txt". The latest copies may be downloaded at:
- LGPL 3.0: https://www.gnu.org/licenses/lgpl-3.0.txt
- ASL 2.0: http://www.apache.org/licenses/LICENSE-2.0.txt
\*license*/
package com.github.xbn.regexutil;
import com.github.xbn.regexutil.z.RegexGroupExtractor_Fieldable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import com.github.xbn.io.IOUtil;
import com.github.xbn.io.RTIOException;
import com.github.xbn.lang.CrashIfObject;
import com.github.xbn.lang.Copyable;
import com.github.xbn.util.BitBinaryUtil;
import com.github.xbn.util.itr.AbstractIterator;
/**
<p>Iterates through a search string, placing all capture-groups for each match into a list. This is intended for capture groups that are mutually-exclusive (do not overlap).</p>
{@.codelet.and.out com.github.xbn.examples.regexutil.RegexGroupExtractorXmpl%eliminateCommentBlocksAndPackageDecl()}
<A NAME="cfg"></a><h3>Builder Configuration: {@link com.github.xbn.regexutil.z.RegexGroupExtractor_Cfg RegexGroupExtractor_Cfg}</h3>
<p><ul>
<li><b>Using:</b> <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#useMatches() useMatches}()</code>, <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#useFind() useFind}()</code>, <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#useLookingAt() useLookingAt}()</code></li>
<li><b>Pattern:</b> <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#pattern(String) pattern}(s)</code>, <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#pattern(String, int) pattern}(s,i)</code>, <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#pattern(Pattern) pattern}(p)</code></li>
<li><b>Other:</b> <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#requiredGroups(int) requiredGroups}(i)</code>, <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#chainID(boolean, Object) chainID}(b,o)</code></li>
</ul></p>
* @since 0.1.0
* @author Copyright (C) 2014, Jeff Epstein ({@code aliteralmind __DASH__ github __AT__ yahoo __DOT__ com}), dual-licensed under the LGPL (version 3.0 or later) or the ASL (version 2.0). See source code for details. <a href="http://xbnjava.aliteralmind.com">{@code http://xbnjava.aliteralmind.com}</a>, <a href="https://github.com/aliteralmind/xbnjava">{@code https://github.com/aliteralmind/xbnjava}</a>
**/
public class RegexGroupExtractor extends AbstractIterator<List<String>> implements Copyable, PatternHaser {
//state: immutable
private final int iRqdGrps;
private final SimplePatternHaser sph ;
//state: mutable
private ArrayList<String> alGroups = null ;
private Matcher m = null ;
//internal
//constructors...START
/**
<p>Create a new instance with a pattern <i>that contains groups</i>.</p>
* @param fieldable May not be <code>null</code>.
* @see #RegexGroupExtractor(RegexGroupExtractor, String) this(rgx,s)
* @see #RegexGroupExtractor(RegexGroupExtractor) this(rgx)
* @see #RegexGroupExtractor(RegexGroupExtractor, String) this(rgx,s)
* @see #RegexGroupExtractor(RegexGroupExtractor, Pattern) this(rgx,p)
*/
public RegexGroupExtractor(RegexGroupExtractor_Fieldable fieldable) {
iRqdGrps = fieldable.getRequiredGroups();
if(iRqdGrps < 1 && iRqdGrps != -1) {
throw new IllegalArgumentException("fieldable.getRequiredGroups() (" + iRqdGrps + ") must be -1 or greater than zero.");
}
sph = (new SimplePatternHaser()).pattern(fieldable.getPattern(), "fieldable.getPattern()").
matcherUses(fieldable.getMatcherUses(), "fieldable.getMatcherUses()");
getMatcherUses().crashIfForbiddenValue(MatcherUses.CUSTOM, "fieldable.getMatcherUses()", null);
//Search the empty-string
//So matcher can be reused (reset)
m = getPattern().matcher("");
if(BitBinaryUtil.doesIntHaveBit(getPattern().flags(), Pattern.LITERAL)) {
throw new IllegalArgumentException("fieldable.getPattern() contains Pattern.LITERAL.");
}
}
/**
<p>Create a new instance as a duplicate of another, with a new regex.</p>
* <p>Equal to
<br/> <code>{@link #RegexGroupExtractor(RegexGroupExtractor, Pattern) this}(to_copy, {@link NewPatternFor}.{@link NewPatternFor#regex(String, String) regex}(regex_withGroups))</code></p>
* @param regex_withGroups May not be <code>null</code>.
*/
public RegexGroupExtractor(RegexGroupExtractor to_copy, String regex_withGroups) {
this(to_copy, NewPatternFor.regex(regex_withGroups, "regex_withGroups"));
}
public RegexGroupExtractor(RegexGroupExtractor to_copy, String regex_withGroups, int bit_flags) {
this(to_copy, NewPatternFor.regex(regex_withGroups, bit_flags, "regex_withGroups"));
}
/**
<p>Create a new instance as a duplicate of another.</p>
* <p>Equal to
<br/> <code>{@link #RegexGroupExtractor(RegexGroupExtractor, Pattern) this}(to_copy, {@link RegexUtil}.{@link RegexUtil#getPatternCopy(PatternHaser, String) getPatternCopy}(to_copy, ...))</code></p>
* @param to_copy May not be <code>null</code>.
*/
public RegexGroupExtractor(RegexGroupExtractor to_copy) {
this(to_copy, RegexUtil.getPatternCopy(to_copy, "to_copy"));
}
/**
<p>Create a new instance as a duplicate of another, with a new pattern.</p>
* @param to_copy May not be <code>null</code>.
* @param ptrn_withGroups If <code>null</code>, the <a href="#getPattern()">pattern</a> in <code>to_copy</code> is duplicated. If non-{@code null}, this is expected to have at least one group. If <code>to_copy.{@link #getRequiredGroupCount() getRequiredGroupCount}()</code> is not {@code -1}, this must contain exactly that number of groups. Get with {@link #getPattern() getPattern}{@code ()}.
* @see #RegexGroupExtractor(RegexGroupExtractor, Pattern) this(rgx, p)
*/
public RegexGroupExtractor(RegexGroupExtractor to_copy, Pattern ptrn_withGroups) {
sph = new SimplePatternHaser(to_copy, ptrn_withGroups);
iRqdGrps = to_copy.getRequiredGroupCount();
m = getPattern().matcher("");
}
//constructors...END
//getters...START
public Pattern getPattern() {
return sph.getPattern();
}
public int getMatchedIndex() {
return sph.getMatchedIndex();
}
public int getMatchCount() {
return sph.getMatchCount();
}
public boolean wasJustMatched() {
return sph.wasJustMatched();
}
/**
<p>Should the string be matched as a whole?. Should <code><i>[{@link java.util.regex.Matcher}]</i>.{@link java.util.regex.Matcher#matches() matches}()</code> be used?.</p>
* @return A non-null {@link MatcherUses} that defines what is matched in each iteration.
* @see com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#useMatches() Cfg.useMatches()
* @see com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#useFind() Cfg.useFind()
* @see com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#useLookingAt() Cfg.useLookingAt()
*/
public MatcherUses getMatcherUses() {
return sph.getMatcherUses();
}
//Composition implementation: null...END
/**
<p>How many groups are required in the pattern?.</p>
* @return <ul>
<li>{@code -1}: At least one group is required.</li>
<li>A number greater than zero: That exact number of groups is required.</li>
</ul><b>Set by:</b> <code>{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder Cfg}.{@link com.github.xbn.regexutil.z.RegexGroupExtractor_CfgForNeeder#requiredGroups(int) requiredGroups}(i)</code>
*/
public int getRequiredGroupCount() {
return iRqdGrps;
}
//getters...END
/**
<p>Set the string to be searched. If {@link #getMatcherUses() getMatcherUses}{@code ()} is true, follow this with a call to {@link #next() next}{@code ()}.</p>
* @param to_search May not be <code>null</code>.
*/
public RegexGroupExtractor search(String to_search) {
try {
m.reset(to_search);
} catch(RuntimeException rx) {
throw CrashIfObject.nullOrReturnCause(to_search, "to_search", null, rx);
}
//Assume no match
sph.declareNotMatched();
if(getMatcherUses().isMatches()) {
if(!m.matches()) {
alGroups = null;
return this;
}
addGroupsToNewList();
m = null;
return this;
} else if(getMatcherUses().isFind()) {
if(!m.find()) {
alGroups = null;
return this;
}
} else if(getMatcherUses().isLookingAt()) {
if(!m.lookingAt()) {
alGroups = null;
return this;
}
}
//Matched
sph.matchedIndex(m.start());
addGroupsToNewList();
return this;
}
private void addGroupsToNewList() {
if(alGroups != null) {
throw new IllegalStateException("(alGroups != null)");
}
int iGrps = m.groupCount();
if(iGrps == 0) {
throw new IllegalArgumentException("Pattern has no capture groups.");
}
alGroups = new ArrayList<String>(iGrps);
for(int i = 1; i <= m.groupCount(); i++) {
alGroups.add(m.group(i));
}
if(getRequiredGroupCount() != -1 && iGrps != getRequiredGroupCount()) {
throw new IllegalArgumentException("Actual group count=" + iGrps + ", getRequiredGroupCount()=" + getRequiredGroupCount() + ", All-groups:[" + nextAsJoined(" // ") + "]");
}
}
public boolean hasNext() {
return (alGroups != null);
}
/**
<p>Returns all groups in the next iteration concatenated into a single string--for debugging and testing.</p>
* @return <code>{@link org.apache.commons.lang3.StringUtils StringUtils}*.{@link org.apache.commons.lang3.StringUtils#join(Iterable, String) join}({@link #next() next}(), separator)</code>
*/
public String nextAsJoined(String separator) {
return StringUtils.join(next(), separator);
}
/**
<p>Append the next set of groups, joined into a single string, and followed by a new-line--for debugging and testing.</p>
* @return <code>{@link #appendNextAsJoinedlns(int, Appendable, String) appendNextAsJoinedlns}(1, to_appendTo, separator)</code>
*/
public Appendable appendNextAsJoinedln(Appendable to_appendTo, String separator) {
return appendNextAsJoinedlns(1, to_appendTo, separator);
}
/**
<p>Append the next set of groups, joined into a single string and followed by some new-lines--for debugging and testing.</p>
<p>This<ol>
<li>Calls
<br/> <code>to_appendTo.append({@link #nextAsJoined(String) nextAsJoined}(separator))</code></li>
<li><i><b>Returns</b></i>
<br/> <code>{@link com.github.xbn.io.IOUtil IOUtil}.{@link com.github.xbn.io.IOUtil#appendNewLinesX(int, Appendable) appendNewLinesX}(new_lineCount, to_appendTo)</code></li>
</ol></p>
* @exception RTIOException If a {@code java.io.IOException} is thrown for any reason.
*/
public Appendable appendNextAsJoinedlns(int new_lineCount, Appendable to_appendTo, String separator) {
try {
to_appendTo.append(nextAsJoined(separator));
return IOUtil.appendNewLinesX(new_lineCount, to_appendTo);
} catch(IOException iox) {
throw new RTIOException("appendNextAsJoinedlns", iox);
}
}
/**
<p>Get the list containing all groups in the next iteration.</p>
* @see #nextAsJoined(String) nextAsJoined(s)
* @exception NoSuchElementException If {@link #hasNext() hasNext}{@code ()} is {@code false}.
*/
public List<String> next() {
if(!hasNext()) {
throw new NoSuchElementException("hasNext() is false. Must search(s).");
}
List<String> lg2Ret = alGroups;
alGroups = null;
if(m.find()) {
addGroupsToNewList();
}
return lg2Ret;
}
/**
<p>Duplicate this <code>RegexGroupExtractor</code>, where all internal fields are duplicateted, and the search is started anew.</p>
* @return <code>(new {@link #RegexGroupExtractor(RegexGroupExtractor) RegexGroupExtractor}(this))</code>
*/
public RegexGroupExtractor getObjectCopy() {
return (new RegexGroupExtractor(this));
}
}