package com.akjava.gwt.markdowneditor.client;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.akjava.gwt.lib.client.LogUtils;
import com.akjava.gwt.markdowneditor.client.MarkdownPredicates.StartWithTitle1OrTitle2Predicate;
import com.akjava.lib.common.utils.CSVUtils;
import com.akjava.lib.common.utils.StringUtils;
import com.akjava.lib.common.utils.log.GWTLogger;
import com.google.common.base.Joiner;
public class ExtractTextFromMarkdown {
private GWTLogger logger;
public GWTLogger getLogger() {
return logger;
}
public void setLogger(GWTLogger logger) {
this.logger = logger;
}
private boolean isWhitespace(char ch){
return ch==' ' || ch=='\t';
}
public static boolean debug;
public ExtractedResult extract(String markdown){
return extract(markdown,false);
}
public ExtractedResult extract(String markdown,boolean parseAll){
String[] lines=CSVUtils.splitLinesWithGuava(markdown).toArray(new String[0]);
ExtractedResult result=new ExtractedResult();
result.setMarkdownTemplateMap(new LinkedHashMap<String, String>());
Analyzer analyzer=new Analyzer();
List<String> passStrings=new ArrayList<String>();
for(int i=0;i<lines.length;i++){
String line=lines[i];
if(logger!=null){
logger.log("extract:"+line);
}
String next=null;
if(i<lines.length-1){
next=lines[i+1];
}
if(line.isEmpty()){
passStrings.add("");
continue;
}
analyzer.setLineAt(i);
if(analyzer.linecode){
if(line.indexOf("```")!=-1){
analyzer.linecode=false;
passStrings.add(line);
continue;
}
else{
passStrings.add(line);
continue;
}
}else{
if(line.indexOf("```")!=-1){
analyzer.linecode=true;
passStrings.add(line);
continue;
}
}
String trimed=line.trim();
if(line.startsWith(">")){
//do it later
//passStrings.add(line);
//continue;
}else{
if(line.indexOf("<")!=-1 && line.indexOf(">")!=-1){
passStrings.add(line);
continue;
}
}
if(trimed.startsWith("***")){
passStrings.add(line);
continue;
}
//remove possible tag
//ignore title
if(MarkdownPredicates.getStartWithTitleLinePredicate().apply(line)){
passStrings.add(line);
continue;
}
//TODO support table
if(next!=null && MarkdownPredicates.getStartWithTitle1OrTitle2Predicate().apply(next)){
passStrings.add(line);
passStrings.add(next);
i++;
continue;
}
if(MarkdownPredicates.getTableLinePredicate().apply(trimed)){
passStrings.add(line);
continue;
}
if(debug)System.out.println("start-parse-character");
String newLine="";
for(int j=0;j<line.length();j++){
//
if(j==0){
int match=StringUtils.countStartWith(line,'>');
//LogUtils.log("match:"+match+","+line);
if(match>0){
j=match;
newLine+=line.substring(0,match);
}
}
//skip-list
if(j==0 && trimed.startsWith("-")){
int cotinued=0;
//check until continue
for(int k=j+1;k<trimed.length();k++){
if(!isWhitespace(trimed.charAt(k))){//because GWT not supported yet,@see https://code.google.com/p/google-web-toolkit/issues/detail?id=1935
break;
}
cotinued=k;
}
int length=cotinued;
if(length>0){
//whitespace - whitespace
int trimstart=line.indexOf("-");
j=trimstart+length+1;
newLine+=line.substring(0,trimstart+length+1);
}
}
char ch=line.charAt(j);
if(analyzer.italic){
if(ch=='*'){//close italic
analyzer.italic=false;
int cotinued=j;
//check until continue
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='*'){
break;
}
cotinued=k;
}
//int length=cotinued-j+1;
j=cotinued;//skip here
String italicLine=line.substring(analyzer.textStart,cotinued+1);
if(parseAll){
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String text=italicLine.replace("*", "");
newLine+=addNewLine("*","*",key,text);
result.addTemplate(key, fixSpaceText(text));
}else{
newLine+=italicLine;
}
}else{
//in italic ignore
if(debug)System.out.println("italic:"+line.substring(analyzer.textStart, j+1));
}
}else if(analyzer.bold){
if(ch=='*'){
int cotinued=j;
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='*'){
break;
}
cotinued=k;
}
int length=cotinued-j+1;
if(length>1){
String boldLine=line.substring(analyzer.textStart,cotinued+1);
if(parseAll){
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String text=boldLine.replace("*", "");
newLine+=addNewLine("**","**",key,text);
result.addTemplate(key, fixSpaceText(text));
}else{
newLine+=boldLine;
}
j=cotinued;
analyzer.bold=false;
}else{
if(debug)System.out.println("bold:"+line.substring(analyzer.textStart, j+1));
}
}else{
if(debug)System.out.println("bold:"+line.substring(analyzer.textStart, j+1));
}
}else if(analyzer.strike){
if(ch=='~'){
int cotinued=j;
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='~'){
break;
}
cotinued=k;
}
int length=cotinued-j+1;
if(length>1){
String strikeText=line.substring(analyzer.textStart,cotinued+1);
if(parseAll){
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String text=strikeText.replace("~", "");
newLine+=addNewLine("~~","~~",key,text);
result.addTemplate(key, fixSpaceText(text));
}else{
newLine+=strikeText;
}
j=cotinued;
analyzer.strike=false;
}else{
if(debug)System.out.println("strike:"+line.substring(analyzer.textStart, j+1));
}
}else{
if(debug)System.out.println("strike:"+line.substring(analyzer.textStart, j+1));
}
}else if(analyzer.textcode){
if(ch=='`'){
int cotinued=j;
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='`'){
break;
}
cotinued=k;
}
int length=cotinued-j+1;
if(length>0){
String codeText=line.substring(analyzer.textStart,cotinued+1);
if(parseAll){
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String text=codeText.replace("`", "");
newLine+=addNewLine("`","`",key,text);
result.addTemplate(key, fixSpaceText(text));
}else{
newLine+=codeText;
}
j=cotinued;
analyzer.textcode=false;
}else{
if(debug)System.out.println("textcode:"+line.substring(analyzer.textStart, j+1));
}
}else{
if(debug)System.out.println("textcode:"+line.substring(analyzer.textStart, j+1));
}
}else{
if(ch=='*'){
//add safe text
String safeText=analyzer.text;
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
newLine+=addNewLine("","",key,value);
result.addTemplate(key, fixSpaceText(value));
analyzer.text="";
}
analyzer.textStart=j;
int cotinued=j;
//check until continue
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='*'){
break;
}
cotinued=k;
}
int length=cotinued-j+1;
if(length==1){
analyzer.italic=true;
if(debug)System.out.println("italic:"+line.substring(analyzer.textStart, cotinued+1));
}else{
analyzer.bold=true;
if(debug)System.out.println("bold:"+line.substring(analyzer.textStart, cotinued+1));
}
j=cotinued;//skip here
}else if(ch=='~'){
//add safe text
String safeText=analyzer.text;
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
String converted="";
result.addTemplate(key, value);
converted="${"+key+"}";
newLine+=converted;
analyzer.text="";
}
analyzer.textStart=j;
int cotinued=j;
//check until continue
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='~'){
break;
}
cotinued=k;
}
int length=cotinued-j+1;
if(length>1){
analyzer.strike=true;
if(debug)System.out.println("strike:"+line.substring(analyzer.textStart, cotinued+1));
}
j=cotinued;//skip here
}else if(ch=='`'){
//add safe text
String safeText=analyzer.text;
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
String converted="";
result.addTemplate(key, value);
converted="${"+key+"}";
newLine+=converted;
analyzer.text="";
}
analyzer.textStart=j;
int cotinued=j;
//check until continue
for(int k=j+1;k<line.length();k++){
if(line.charAt(k)!='`'){
break;
}
cotinued=k;
}
int length=cotinued-j+1;
if(length>0){
analyzer.textcode=true;
if(debug)System.out.println("textcode:"+line.substring(analyzer.textStart, cotinued+1));
}
j=cotinued;//skip here
}else if(ch=='|'){
String safeText=analyzer.text;
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
String converted="";
result.addTemplate(key, value);
converted="${"+key+"}";
newLine+=converted;
analyzer.text="";
}
newLine+='|';//just skip
}else if(ch=='['){
//LogUtils.log("["+line);
boolean findLink=false;
int connection=line.indexOf("](",j+1);
if(connection!=-1){
//int end=line.indexOf(")");//bug version to test catch error
int end=line.indexOf(")",connection);
if(end!=-1){//find link
//make key
String safeText=analyzer.text;
//LogUtils.log("safe-text:"+safeText);
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
String converted="";
result.addTemplate(key, value);
converted="${"+key+"}";
newLine+=converted;
analyzer.text="";
}
//skip links
String linkText=line.substring(j,end+1);
if(parseAll){
String key=analyzer.getValueKey();
analyzer.incrementIndex();
int startLink=linkText.indexOf("[");
int endLink=linkText.indexOf("](");
String text=linkText.substring(startLink+1,endLink);
newLine+=addNewLine(linkText.substring(0,startLink+1),linkText.substring(endLink),key,text);
result.addTemplate(key, fixSpaceText(text));
}else{
newLine+=linkText;
}
j=end;//auto increment
findLink=true;
}
}
if(!findLink){
analyzer.text+=ch;
if(debug)System.out.println("safe-text:"+analyzer.text);
}
}else if(ch=='!'){
boolean findLink=false;
if(j<line.length()-1 && line.charAt(j+1)=='['){//TODO support alt
int connection=line.indexOf("](",j+1);
if(connection!=-1){
int end=line.indexOf(")");
if(end!=-1){//find link
//make key
String safeText=analyzer.text;
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
String converted="";
result.addTemplate(key, value);
converted="${"+key+"}";
newLine+=converted;
analyzer.text="";
}
//skip links
newLine+=line.substring(j,end+1);
j=end;//auto increment
findLink=true;
}
}
}
if(!findLink){
analyzer.text+=ch;
if(debug)System.out.println("safe-text:"+analyzer.text);
}
}
else{
//safe text
analyzer.text+=ch;
if(debug)System.out.println("safe-text:"+analyzer.text);
}
}
}
if(analyzer.italic){
newLine+=line.substring(analyzer.textStart);
passStrings.add(newLine);
}else if(analyzer.bold){
newLine+=line.substring(analyzer.textStart);
passStrings.add(newLine);
}else if(analyzer.strike){
newLine+=line.substring(analyzer.textStart);
passStrings.add(newLine);
}else if(analyzer.textcode){
newLine+=line.substring(analyzer.textStart);
passStrings.add(newLine);
}else{
//add safe text
String safeText=analyzer.text;
if(!safeText.isEmpty()){
//do template
String key=analyzer.getValueKey();
analyzer.incrementIndex();
String value=safeText;
/*
String converted="";
result.addTemplate(key, value);
converted="${"+key+"}";
newLine+=converted;
*/
newLine+=addNewLine("","",key,value);
result.addTemplate(key, fixSpaceText(value));
}
passStrings.add(newLine);
}
}
if(parseAll){
for(int i=0;i<passStrings.size();i++){
String line=passStrings.get(i);
//prev line is title;
if(i!=0 && MarkdownPredicates.getStartWithTitle1OrTitle2Predicate().apply(line)){
String head=passStrings.get(i-1);
String key=analyzer.getValueKey();
passStrings.set(i-1, "${"+key+"}");
result.addTemplate(key, head);
analyzer.incrementIndex();
}else if(MarkdownPredicates.getStartWithTitleLinePredicate().apply(line)){
int index=line.lastIndexOf("#");
if(index!=-1){
String key=analyzer.getValueKey();
passStrings.set(i,line.substring(0,index+1)+"${"+key+"}");
result.addTemplate(key, line.substring(index+1));
analyzer.incrementIndex();
}
}
}
}
result.setExtractedMarkdown(Joiner.on("\n").join(passStrings));
return result;
}
private String addNewLine(String before,String after,String keyName,String text){
if(text.startsWith(" ")){
before+=" ";
}
if(text.endsWith(" ")){
after=" "+after;
}
//TODO accurate space
return before+"${"+keyName+"}"+after;
}
private String fixSpaceText(String text){
return text.trim();
}
private class Analyzer{
private String text="";
boolean italic;
boolean bold;
boolean strike;
int valueIndex=1;
//int lineAt;
int textStart;
//int textEnd;
boolean textcode;
boolean linecode;
private ExtractedResult extractedResult;
public void setLineAt(int at){
//lineAt=at;
textStart=0;
//textEnd=0;
bold=false;
italic=false;
strike=false;
textcode=false;
text="";
}
private int getValueIndex(){
return valueIndex;
}
private String getValueKey(){
return "v"+valueIndex;
}
private void incrementIndex(){
valueIndex++;
}
}
public static class ExtractedResult{
private void addTemplate(String key,String value){
markdownTemplateMap.put(key,value);
}
private Map<String,String> markdownTemplateMap;
public Map<String, String> getMarkdownTemplateMap() {
return markdownTemplateMap;
}
private String extractedMarkdown;
public void setMarkdownTemplateMap(Map<String, String> markdownTemplateMap) {
this.markdownTemplateMap = markdownTemplateMap;
}
public void setExtractedMarkdown(String extractedMarkdown) {
this.extractedMarkdown = extractedMarkdown;
}
public String getExtractedMarkdownTemplateText() {
return extractedMarkdown;
}
}
}