package com.constellio.app.modules.es.connectors.http; import static java.util.Arrays.asList; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.jsoup.Connection.Response; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.constellio.app.modules.es.connectors.http.ConnectorHttpRuntimeException.ConnectorHttpRuntimeException_CannotGetAbsoluteHref; import com.constellio.data.io.services.facades.IOServices; public class ConnectorHttpUtils { private static final String READ_BINARY_CONTENT_RESOURCE = "ConnectorHttpUtils-ReadBinaryContent"; public static String toAbsoluteHRef(String currentUrl, String href) { try { if (StringUtils.isBlank(href) || StringUtils.isBlank(currentUrl) || href.startsWith("http")) { return href; } else if (href.startsWith("/")) { int firstSlashIndex = currentUrl.indexOf("/", 7); if (firstSlashIndex == -1) { return href; } else { return currentUrl.substring(0, firstSlashIndex) + href; } } else { List<String> parts = new ArrayList<>(asList(currentUrl.split("/"))); while (href.startsWith("../")) { parts.remove(parts.size() - 1); href = href.substring(3); } return StringUtils.join(parts, "/") + "/" + href; } } catch (Exception e) { throw new ConnectorHttpRuntimeException_CannotGetAbsoluteHref(currentUrl, href); } } public static FetchedDocumentContent fetch(String url) throws IOException { FetchedDocumentContent fetchedDocumentContent = new FetchedDocumentContent(); Response response = Jsoup.connect(url).execute(); //String contentType = response.contentType(); fetchedDocumentContent.document = response.parse(); fetchedDocumentContent.title = fetchedDocumentContent.document.title(); String[] urlParts = url.split("/"); fetchedDocumentContent.fileName = urlParts[urlParts.length - 1]; return fetchedDocumentContent; } public static class FetchedDocumentContent { private Document document; private String title; private String fileName; public InputStream newInputStream(IOServices ioServices) { return ioServices.newByteInputStream(document.text().getBytes(), READ_BINARY_CONTENT_RESOURCE); } public long getContentLength() { return document.text().length(); } public Document getDocument() { return document; } public String getTitle() { return title; } public String getFileName() { return fileName; } public String baseUri() { return document.baseUri(); } } }