/**
* Copyright (c) 2013-2016 Angelo ZERR.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Angelo Zerr <angelo.zerr@gmail.com> - initial API and implementation
*/
package tern.server.protocol.html;
/**
* HTML helper used to extract JS content from HTML file content.
*
*/
public class HtmlHelper {
private HtmlHelper() {
}
/**
* Extract JS content from the given HTML content. The HTML elements are
* replaced with space and JS content is kept. The JS content is declared
* inside script elements.
*
* @param html
* the HTML content which contains JS content.
* @return the result of the extract of JS content from the given HTML
* content. The HTML elements are replaced with space and JS content
* is kept.
*/
public static String extractJS(String html) {
return extractJS(html, ScriptTagRegion.SCRIPT_TAG);
}
/**
* Extract JS content from the given HTML content. The HTML elements are
* replaced with space and JS content is kept. JS content is declared inside
* the given tags elements.
*
* @param html
* the HTML content which contains JS content.
* @param tagRegions
* list of HTML tags which contains JS content.
* @return the result of the extract of JS content from the given HTML
* content. The HTML elements are replaced with space and JS content
* is kept.
*/
public static String extractJS(String html, ScriptTagRegion... tagRegions) {
IState state = createState(tagRegions);
StringBuilder s = new StringBuilder();
char[] chars = html.toCharArray();
for (int i = 0; i < chars.length; i++) {
char c = chars[i];
switch (c) {
case '\n':
case '\r':
case '\t':
case ' ':
s.append(c);
break;
default:
// try to search region
Region matchedRegion = state.update(c);
if (matchedRegion == null) {
// none matched region
if (state.isNextRegionToFindType(RegionType.END_SCRIPT)) {
// the next region to find is end script (ex :
// </script>)
// we are inside script element content, add JS
// character inside the buffer.
s.append(c);
} else {
// here we are not inside script content, add a space.
s.append(' ');
}
} else {
// a region is found
if (matchedRegion.getType().equals(RegionType.END_SCRIPT)) {
// the matched region is end script (ex : </script>)
// replace last characters of the buffer (</script>)
// with spaces.
int length = matchedRegion.getLength();
s = s.replace(i - length, i, matchedRegion.getSpaces());
// reset the state.
state.reset();
}
s.append(' ');
}
}
}
return s.toString();
}
/**
* Returns a state instance from the given tags.
*
* @param tagRegions
* list of HTML tags which contains JS content.
* @return a state instance from the given tags.
*/
private static IState createState(ScriptTagRegion[] tagRegions) {
if (tagRegions.length == 1) {
return new State(tagRegions[0]);
}
return new MultiState(tagRegions);
}
}