/******************************************************************************* * Copyright (c) 2015 David Green. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * David Green - initial API and implementation *******************************************************************************/ package org.eclipse.mylyn.wikitext.commonmark.internal.inlines; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.mylyn.wikitext.commonmark.internal.Line; import com.google.common.base.Optional; public class HtmlEntitySpan extends SourceSpan { private final Pattern pattern = Pattern.compile("&(#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});.*", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); @Override public Optional<? extends Inline> createInline(Cursor cursor) { char c = cursor.getChar(); if (c == '&') { Matcher matcher = cursor.matcher(pattern); if (matcher.matches()) { String ent = matcher.group(1); int offset = cursor.getOffset(); int length = ent.length() + 2; Line lineAtOffset = cursor.getLineAtOffset(); if (isInvalidUnicodeCodepoint(ent)) { return Optional.of(new Characters(lineAtOffset, offset, length, "\ufffd")); } return Optional.of(new HtmlEntity(lineAtOffset, offset, length, ent)); } } return Optional.absent(); } protected boolean isInvalidUnicodeCodepoint(String ent) { if (ent.charAt(0) == '#') { try { int codePoint; char firstCharFollowingHash = ent.charAt(1); if (firstCharFollowingHash == 'x' || firstCharFollowingHash == 'X') { codePoint = Integer.parseInt(ent.substring(2), 16); } else { codePoint = Integer.parseInt(ent.substring(1)); } return codePoint <= 0 || codePoint > 0xffff; } catch (NumberFormatException e) { return true; } } return false; } }