package ecologylab.bigsemantics.metametadata.fieldparsers;
import java.util.regex.Pattern;
import ecologylab.serialization.ElementState;
import ecologylab.serialization.annotations.simpl_inherit;
import ecologylab.serialization.annotations.simpl_other_tags;
import ecologylab.serialization.annotations.simpl_scalar;
import ecologylab.serialization.annotations.simpl_tag;
/**
* The element allowing extracting information from a flat string.
*
* This is just the element used for holding information required by the extraction process.
*
* For the actual extraction functionality, see {@link FieldParser} and derivative classes.
*
* @author quyin
*
*/
@simpl_inherit
@simpl_tag("field_parser")
public class FieldParserElement extends ElementState
{
static int BAD_VALUE = Integer.MIN_VALUE;
/**
* The name of the parser, e.g. regex_find
*/
@simpl_scalar
private String name;
/**
* The regex used by the parser, if needed.
*
* For regex_find, this is the regex that will be matched (using find()). For regex_split and
* regex_split_and_find, this is the regex that delimits each element in the list.
*/
@simpl_scalar
@simpl_other_tags("regex_split")
private Pattern regex;
/**
* This field is only used with regex_split_and_find, in which case regex_split will be the
* delimiter and this will be the matching one.
*/
@simpl_scalar
private Pattern regexFind;
/**
* This should be set to true, if the field_parser is going to be applied to a collection of
* flat strings.
*
* When it is true, the xpath / tag name will be used to generate a set of input strings, which
* will then be used for each element in the collection respectively.
*
* When it is false, the xpath / tag name will be used to generate a single input string, which
* will then be used to create a set of values for each element in the collection.
*/
@simpl_scalar
private boolean forEachElement = false;
@simpl_scalar
private int beginIndex = BAD_VALUE;
@simpl_scalar
private int endIndex = BAD_VALUE;
@simpl_scalar
private boolean trim = true;
@simpl_scalar
private boolean normalize_text = true;
public FieldParserElement()
{
}
FieldParserElement(String name, Pattern regex)
{
this.name = name;
this.regex = regex;
}
public String getName()
{
return name;
}
public Pattern getRegex()
{
return regex;
}
public Pattern getRegexFind()
{
return regexFind;
}
public boolean isForEachElement()
{
return forEachElement;
}
public int getBeginIndex()
{
return beginIndex;
}
public int getEndIndex()
{
return endIndex;
}
public boolean isTrim()
{
return trim;
}
public boolean isNormalizeText()
{
return normalize_text;
}
public void setNormalizeText(boolean joinLines)
{
this.normalize_text = joinLines;
}
}