package name.abuchen.portfolio.datatransfer.pdf;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import name.abuchen.portfolio.Messages;
import name.abuchen.portfolio.datatransfer.Extractor.Item;
/* package */final class PDFParser
{
/* package */static class DocumentType
{
private String mustInclude;
private String mustExclude;
private List<Block> blocks = new ArrayList<>();
private Map<String, String> context = new HashMap<>();
private BiConsumer<Map<String, String>, String[]> contextProvider;
public DocumentType(String mustInclude)
{
this(mustInclude, null);
}
public DocumentType(String mustInclude, BiConsumer<Map<String, String>, String[]> contextProvider)
{
this.mustInclude = mustInclude;
this.contextProvider = contextProvider;
}
public boolean matches(String text)
{
if (!text.contains(mustInclude))
return false;
if (mustExclude != null)
return !text.contains(mustExclude);
return true;
}
public void setMustExclude(String mustExclude)
{
this.mustExclude = mustExclude;
}
public void addBlock(Block block)
{
blocks.add(block);
}
public List<Block> getBlocks()
{
return blocks;
}
/**
* Gets the current context for this parse run.
*
* @return current context map
*/
public Map<String, String> getCurrentContext()
{
return context;
}
public void parse(String filename, List<Item> items, String text)
{
String[] lines = text.split("\\r?\\n"); //$NON-NLS-1$
// reset context and parse it from this file
context.clear();
parseContext(context, filename, lines);
for (Block block : blocks)
block.parse(filename, items, lines);
}
/**
* Parses the current context and could be overridden in a subclass to
* fill the context.
*
* @param context
* context map
* @param filename
* current filename
* @param lines
* content lines of the file
*/
protected void parseContext(Map<String, String> context, String filename, String[] lines)
{
// if a context provider is given call it, else parse the current
// context in a subclass
if (contextProvider != null)
{
contextProvider.accept(context, lines);
}
}
}
/* package */static class Block
{
private Pattern marker;
private Transaction<?> transaction;
public Block(String marker)
{
this.marker = Pattern.compile(marker);
}
public void set(Transaction<?> transaction)
{
this.transaction = transaction;
}
public void parse(String filename, List<Item> items, String[] lines)
{
List<Integer> blocks = new ArrayList<>();
for (int ii = 0; ii < lines.length; ii++)
{
Matcher matcher = marker.matcher(lines[ii]);
if (matcher.matches())
blocks.add(ii);
}
for (int ii = 0; ii < blocks.size(); ii++)
{
int startLine = blocks.get(ii);
int endLine = ii + 1 < blocks.size() ? blocks.get(ii + 1) - 1 : lines.length - 1;
transaction.parse(filename, items, lines, startLine, endLine);
}
}
}
/* package */static class Transaction<T>
{
private Supplier<T> supplier;
private Function<T, Item> wrapper;
private List<Section<T>> sections = new ArrayList<>();
public Transaction<T> subject(Supplier<T> supplier)
{
this.supplier = supplier;
return this;
}
public Section<T> section(String... attributes)
{
Section<T> section = new Section<>(this, attributes);
sections.add(section);
return section;
}
public Transaction<T> wrap(Function<T, Item> wrapper)
{
this.wrapper = wrapper;
return this;
}
public void parse(String filename, List<Item> items, String[] lines, int lineNoStart, int lineNoEnd)
{
T target = supplier.get();
for (Section<T> section : sections)
section.parse(filename, items, lines, lineNoStart, lineNoEnd, target);
if (wrapper == null)
throw new IllegalArgumentException("Wrapping function missing"); //$NON-NLS-1$
Item item = wrapper.apply(target);
if (item != null)
items.add(item);
}
}
/* package */static class Section<T>
{
private boolean isOptional = false;
private Transaction<T> transaction;
private String[] attributes;
private List<Pattern> pattern = new ArrayList<>();
private BiConsumer<T, Map<String, String>> assignment;
public Section(Transaction<T> transaction, String[] attributes)
{
this.transaction = transaction;
this.attributes = attributes;
}
public Section<T> optional()
{
this.isOptional = true;
return this;
}
public Section<T> find(String string)
{
pattern.add(Pattern.compile("^" + string + "$")); //$NON-NLS-1$ //$NON-NLS-2$
return this;
}
public Section<T> match(String regex)
{
pattern.add(Pattern.compile(regex));
return this;
}
public Transaction<T> assign(BiConsumer<T, Map<String, String>> assignment)
{
this.assignment = assignment;
return transaction;
}
public void parse(String filename, List<Item> items, String[] lines, int lineNo, int lineNoEnd, T target)
{
Map<String, String> values = new HashMap<>();
int patternNo = 0;
for (int ii = lineNo; ii <= lineNoEnd; ii++)
{
Pattern p = pattern.get(patternNo);
Matcher m = p.matcher(lines[ii]);
if (m.matches())
{
// extract attributes
extractAttributes(values, p, m);
// next pattern?
patternNo++;
if (patternNo >= pattern.size())
break;
}
}
if (patternNo < pattern.size())
{
// if section is optional, ignore if patterns do not match
if (isOptional)
return;
throw new IllegalArgumentException(MessageFormat.format(Messages.MsgErrorNotAllPatternMatched,
patternNo, pattern.size(), pattern.toString(), filename));
}
if (values.size() != attributes.length)
throw new IllegalArgumentException(MessageFormat.format(Messages.MsgErrorMissingValueMatches,
values.keySet().toString(), Arrays.toString(attributes), filename));
if (assignment == null)
throw new IllegalArgumentException("Assignment function missing"); //$NON-NLS-1$
assignment.accept(target, values);
}
private void extractAttributes(Map<String, String> values, Pattern p, Matcher m)
{
for (String attribute : attributes)
{
if (p.pattern().contains("<" + attribute + ">")) //$NON-NLS-1$ //$NON-NLS-2$
{
String v = m.group(attribute);
if (v != null)
values.put(attribute, v);
}
}
}
}
private PDFParser()
{}
}