/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ro.nextreports.server.report.util;
import java.util.List;
import java.util.ArrayList;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.File;
/**
* User: mihai.panaitescu
* Date: 10-Dec-2009
* Time: 11:47:20
*/
public class HtmlParser {
private List<String> images = new ArrayList<String>();
// write regex so the image file name from src is a group : between '(' and ')'
// and get with matcher.group(1)
// see HTMLExporter in engine : always is of form "<img src" and ends with </img> (never ends with /> )
private String imgRegex = "<img\\s+src=\"([^\"]+)\"";
public HtmlParser(String text) {
Pattern pattern = Pattern.compile(imgRegex);
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
String s = matcher.group(1);
// use this line to get rid of ./
s = new File(s).getName();
if (!images.contains(s)) {
images.add(s);
}
}
}
public List<String> getImages() {
return images;
}
}