// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.2
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.util.*;
/**
* Iterates over the "nodes" in a segment.
* <p>
* Every object returned is a Segment. All tags found with the Segment.getAllTags() method are included, as well as segments representing the plain text in between them,
* and character references within the plain text are also included as separate nodes.
*/
class NodeIterator implements Iterator<Segment> {
private final Segment segment;
private final Source source;
private int pos;
private Tag nextTag;
private CharacterReference characterReferenceAtCurrentPosition=null;
private final boolean legacyIteratorCompatabilityMode=Source.LegacyIteratorCompatabilityMode;
public NodeIterator(final Segment segment) {
this.segment=segment;
source=segment.source;
if (segment==source) source.fullSequentialParse();
pos=segment.begin;
nextTag=source.getNextTag(pos);
if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null;
}
public boolean hasNext() {
return pos<segment.end || nextTag!=null;
}
public Segment next() {
final int oldPos=pos;
if (nextTag!=null) {
if (oldPos<nextTag.begin) return nextNonTagSegment(oldPos,nextTag.begin);
final Tag tag=nextTag;
nextTag=nextTag.getNextTag();
if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null;
if (pos<tag.end) pos=tag.end;
return tag;
} else {
if (!hasNext()) throw new NoSuchElementException();
return nextNonTagSegment(oldPos,segment.end);
}
}
private Segment nextNonTagSegment(final int begin, final int end) {
if (!legacyIteratorCompatabilityMode) {
final CharacterReference characterReference=characterReferenceAtCurrentPosition;
if (characterReference!=null) {
characterReferenceAtCurrentPosition=null;
pos=characterReference.end;
return characterReference;
}
final ParseText parseText=source.getParseText();
int potentialCharacterReferenceBegin=parseText.indexOf('&',begin,end);
while (potentialCharacterReferenceBegin!=-1) {
final CharacterReference nextCharacterReference=CharacterReference.construct(source,potentialCharacterReferenceBegin,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL);
if (nextCharacterReference!=null) {
if (potentialCharacterReferenceBegin==begin) {
pos=nextCharacterReference.end;
return nextCharacterReference;
} else {
pos=nextCharacterReference.begin;
characterReferenceAtCurrentPosition=nextCharacterReference;
return new Segment(source,begin,pos);
}
}
potentialCharacterReferenceBegin=parseText.indexOf('&',potentialCharacterReferenceBegin+1,end);
}
}
return new Segment(source,begin,pos=end);
}
public void skipToPos(final int pos) {
if (pos<this.pos) return; // can't go backwards
this.pos=pos;
nextTag=source.getNextTag(pos);
}
public void remove() {
throw new UnsupportedOperationException();
}
}