/*
* Copyright 2000-2013 Enonic AS
* http://www.enonic.com/license
*/
package com.enonic.cms.core.link;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class LinkScanner
{
private final ArrayList<Pattern> patterns;
public LinkScanner()
{
this.patterns = new ArrayList<Pattern>();
this.patterns.add( buildElementPattern( "a", "href" ) );
this.patterns.add( buildElementPattern( "img", "src" ) );
this.patterns.add( buildElementPattern( "table", "background" ) );
}
public List<LinkMatch> scan( String input )
{
ArrayList<LinkMatch> list = new ArrayList<LinkMatch>();
for ( Pattern pattern : this.patterns )
{
scan( list, input, pattern );
}
Collections.sort( list );
return list;
}
private void scan( List<LinkMatch> result, String input, Pattern pattern )
{
Matcher matcher = pattern.matcher( input );
while ( matcher.find() )
{
result.add( new LinkMatch( matcher.group( 1 ), matcher.start( 1 ), matcher.end( 1 ) ) );
}
}
private Pattern buildElementPattern( String elem, String attr )
{
StringBuffer str = new StringBuffer();
str.append( "<" ).append( elem ).append( "\\s+" );
str.append( "[^>]*" ).append( attr );
str.append( "\\s*=\\s*" ).append( "[\"']([^\"']+)[\"']" );
return Pattern.compile( str.toString(), Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL );
}
}