/* * Copyright 2014 michael-simons.eu. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ac.simons.autolinker; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.parser.Tag; /** * Turns all kinds of plain text urls into anchor elements. If no protocol is * given, http is assumend. * * @author Michael J. Simons, 2014-12-27 */ public class UrlAutoLinker implements AutoLinker { /** * Maximum length of the anchor text until it gets truncated. */ private final int maxLabelLength; /** * Instantiate a new URL autolinker with a given maximum label length. * * @param maxLabelLength Maximum length of the anchor text until it gets * truncated */ public UrlAutoLinker(int maxLabelLength) { this.maxLabelLength = maxLabelLength; } @Override public List<Node> createLinks(final TextNode textNode) { final List<Node> rv = new ArrayList<>(); int start = 0; final String nodeText = textNode.getWholeText(); final String baseUri = textNode.baseUri(); final Matcher matcher = Regex.VALID_URL.matcher(nodeText); while (matcher.find()) { // Add a new textnode for everything before the url final String textBefore = String.format("%s%s", nodeText.substring(start, matcher.start()), matcher.group(Regex.VALID_URL_GROUP_BEFORE)); if (!textBefore.isEmpty()) { rv.add(new TextNode(textBefore, baseUri)); } final Optional<String> protocol = Optional.ofNullable(matcher.group(Regex.VALID_URL_GROUP_PROTOCOL)); final Element newAnchor = new Element(Tag.valueOf("a"), baseUri); final String url = String.format("%s%s", protocol.isPresent() ? "" : "http://", matcher.group(Regex.VALID_URL_GROUP_URL)); newAnchor.attr("href", url); newAnchor.attr("title", url); newAnchor.appendChild(new TextNode(Strings.truncate(matcher.group(Regex.VALID_URL_GROUP_URL).replaceFirst(Pattern.quote(protocol.orElse("http://")), ""), maxLabelLength), baseUri)); rv.add(newAnchor); start = matcher.end(); } // Add a new textnode for everything after final String textAfter = nodeText.substring(start); if (!textAfter.isEmpty()) { rv.add(new TextNode(textAfter, baseUri)); } return rv; } }