package com.muzima.service;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class HTMLProviderParser {
public static final String DATA_PROVIDER_TAG = "data-provider";
public List<String> parse(String html) {
Set<String> providers = new HashSet<String>();
Document htmlDoc = Jsoup.parse(html);
//Select all elements containing data-providers attr and is not a div.
Elements elements = htmlDoc.select("*:not(div)[" + DATA_PROVIDER_TAG + "]");
for (Element element : elements) {
providers.add((element.attr(DATA_PROVIDER_TAG)));
}
return new ArrayList<String>(providers);
}
}