/*
* This file is part of ELKI:
* Environment for Developing KDD-Applications Supported by Index-Structures
*
* Copyright (C) 2017
* ELKI Development Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.lmu.ifi.dbs.elki.datasource.parser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.data.ExternalID;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.spatial.Polygon;
import de.lmu.ifi.dbs.elki.data.spatial.PolygonsObject;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.io.ParseUtil;
/**
* Parser to load polygon data (2D and 3D only) from a simple format. One record
* per line, points separated by whitespace, numbers separated by colons.
* Multiple polygons components can be separated using
* {@link #POLYGON_SEPARATOR}.
*
* Unparseable parts will be treated as labels.
*
* @author Erich Schubert
* @since 0.4.0
*
* @apiviz.has PolygonsObject
*/
public class SimplePolygonParser extends AbstractStreamingParser {
/**
* Class logger
*/
private static final Logging LOG = Logging.getLogger(SimplePolygonParser.class);
/**
* Pattern to catch coordinates
*/
public static final Pattern COORD = Pattern.compile("^(" + CSVReaderFormat.NUMBER_PATTERN + "),\\s*(" + CSVReaderFormat.NUMBER_PATTERN + ")(?:,\\s*(" + CSVReaderFormat.NUMBER_PATTERN + "))?$");
/**
* Polygon separator
*/
public static final String POLYGON_SEPARATOR = "--";
/**
* Event to report next.
*/
Event nextevent = null;
/**
* Constructor.
*
* @param format Input format
*/
public SimplePolygonParser(CSVReaderFormat format) {
super(format);
}
/**
* Metadata.
*/
protected BundleMeta meta = null;
/**
* Whether or not the data set has labels.
*/
protected boolean haslabels = false;
/**
* Current polygon.
*/
protected PolygonsObject curpoly = null;
/**
* Current labels.
*/
protected LabelList curlbl = null;
/**
* Current external id.
*/
protected ExternalID cureid = null;
/**
* (Reused) storage of coordinates.
*/
final private List<double[]> coords = new ArrayList<>();
/**
* (Reused) storage of polygons.
*/
final private List<Polygon> polys = new ArrayList<>();
/**
* (Reused) store for labels.
*/
final private ArrayList<String> labels = new ArrayList<>();
@Override
public Event nextEvent() {
if(nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
while(reader.nextLineExceptComments()) {
if(parseLine()) {
if(meta == null || (curlbl != null && !haslabels)) {
haslabels = haslabels || curlbl != null;
buildMeta();
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
}
return Event.META_CHANGED;
}
}
return Event.END_OF_STREAM;
}
catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
}
}
/**
* Update the meta element.
*/
protected void buildMeta() {
if(haslabels) {
meta = new BundleMeta(3);
meta.add(TypeUtil.POLYGON_TYPE);
meta.add(TypeUtil.EXTERNALID);
meta.add(TypeUtil.LABELLIST);
}
else {
meta = new BundleMeta(2);
meta.add(TypeUtil.POLYGON_TYPE);
meta.add(TypeUtil.EXTERNALID);
}
}
@Override
public BundleMeta getMeta() {
return meta;
}
@Override
public Object data(int rnum) {
if(rnum > (haslabels ? 2 : 1)) {
throw new ArrayIndexOutOfBoundsException();
}
return (rnum == 0) ? curpoly : (rnum == 1) ? cureid : curlbl;
}
/**
* Parse a single line.
*
* @return {@code true} if the line was read successful.
*/
private boolean parseLine() {
cureid = null;
curpoly = null;
curlbl = null;
polys.clear();
coords.clear();
labels.clear();
Matcher m = COORD.matcher(reader.getBuffer());
for(/* initialized by nextLineExceptComments */; tokenizer.valid(); tokenizer.advance()) {
m.region(tokenizer.getStart(), tokenizer.getEnd());
if(m.find()) {
try {
double c1 = ParseUtil.parseDouble(m.group(1));
double c2 = ParseUtil.parseDouble(m.group(2));
if(m.group(3) != null) {
double c3 = ParseUtil.parseDouble(m.group(3));
coords.add(new double[] { c1, c2, c3 });
}
else {
coords.add(new double[] { c1, c2 });
}
continue;
}
catch(NumberFormatException e) {
LOG.warning("Looked like a coordinate pair but didn't parse: " + tokenizer.getSubstring());
}
}
// Match polygon separator:
// FIXME: Avoid unnecessary subSequence call.
final int len = tokenizer.getEnd() - tokenizer.getStart();
if(POLYGON_SEPARATOR.length() == len && //
reader.getBuffer().subSequence(tokenizer.getStart(), tokenizer.getEnd()).equals(POLYGON_SEPARATOR)) {
if(!coords.isEmpty()) {
polys.add(new Polygon(new ArrayList<>(coords)));
}
continue;
}
String cur = tokenizer.getSubstring();
// First label will become the External ID
if(cureid == null) {
cureid = new ExternalID(cur);
}
else {
labels.add(cur);
}
}
// Complete polygon
if(!coords.isEmpty()) {
polys.add(new Polygon(coords));
}
curpoly = new PolygonsObject(polys);
curlbl = (haslabels || !labels.isEmpty()) ? LabelList.make(labels) : null;
return true;
}
@Override
protected Logging getLogger() {
return LOG;
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractStreamingParser.Parameterizer {
@Override
protected SimplePolygonParser makeInstance() {
return new SimplePolygonParser(format);
}
}
}