/*
* MarkupStripper.java
* Copyright (C) 2007 David Milne, d.n.milnegmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.wikipedia.miner.util ;
import java.util.*;
import java.util.regex.*;
/**
* This provides tools to strip out markup from wikipedia articles, or anything else that has been written
* in mediawiki's format. It's all pretty simple, so don't expect perfect parsing. It is particularly bad at
* dealing with templates (these are simply removed rather than resolved).
*/
public class MarkupStripper {
private Pattern linkPattern = Pattern.compile("\\[\\[(.*?:)?(.*?)(\\|.*?)?\\]\\]") ;
private Pattern isolatedBefore = Pattern.compile("(\\s*|.*\\n(\\s*))", Pattern.DOTALL) ;
private Pattern isolatedAfter = Pattern.compile("(\\s*|(\\s*)\\n.*)", Pattern.DOTALL) ;
private EmphasisResolver emphasisResolver = new EmphasisResolver() ;
/**
* Returns a copy of the given markup, where all markup has been removed except for
* internal links to other wikipedia pages (e.g. to articles or categories), section
* headers, list markers, and bold/italic markers.
*
* By default, unwanted markup is completely discarded. You can optionally specify
* a character to replace the regions that are discared, so that the length of the
* string and the locations of unstripped characters is not modified.
*/
public String stripAllButInternalLinksAndEmphasis(String markup, Character replacement) {
//deal with comments and math regions entirely seperately.
//Comments often contain poorly nested items that the remaining things will complain about.
//Math regions contain items that look confusingly like templates.
Vector<int[]> regions = gatherSimpleRegions(markup, "\\<\\!--(.*?)--\\>") ;
regions = mergeRegionLists(regions, gatherComplexRegions(markup, "\\<math(\\s*?)([^>\\/]*?)\\>", "\\<\\/math(\\s*?)\\>")) ;
String clearedMarkup = stripRegions(markup, regions, replacement) ;
//deal with templates entirely seperately. They often end in |}} which confuses the gathering of tables.
regions = gatherTemplates(clearedMarkup) ;
clearedMarkup = stripRegions(clearedMarkup, regions, replacement) ;
//now gather all of the other regions we want to ignore
regions = gatherTables(clearedMarkup) ;
regions = mergeRegionLists(regions, gatherHTML(clearedMarkup)) ;
regions = mergeRegionLists(regions, gatherExternalLinks(clearedMarkup)) ;
regions = mergeRegionLists(regions, gatherMagicWords(clearedMarkup)) ;
//ignore these regions now (they need to be blanked before we can correctly identify the remaining regions)
clearedMarkup = stripRegions(clearedMarkup, regions, replacement) ;
//System.out.println("Prior to removing misformatted start: ") ;
//System.out.println(" - " + clearedMarkup) ;
regions = gatherMisformattedStarts(clearedMarkup) ;
clearedMarkup = stripRegions(clearedMarkup, regions, replacement) ;
return clearedMarkup ;
}
/**
* Returns a copy of the given markup, where all links to wikipedia pages
* (categories, articles, etc) have been removed. Links to articles are
* replaced with the appropriate anchor markup. All other links are removed completely.
*
* By default, unwanted markup is completely discarded. You can optionally specify
* a character to replace the regions that are discarded, so that the length of the
* string and the locations of unstripped characters is not modified.
*/
public String stripInternalLinks(String markup, Character replacement) {
Vector<int[]> regions = gatherComplexRegions(markup, "\\[\\[", "\\]\\]") ;
StringBuffer strippedMarkup = new StringBuffer() ;
int lastPos = markup.length() ;
//because regions are sorted by end position, we work backwards through them
int i = regions.size() ;
while (i > 0) {
i -- ;
int[] region = regions.elementAt(i) ;
//only deal with this region is not within a region we have already delt with.
if (region[0] < lastPos) {
//copy everything between this region and start of last one we dealt with.
strippedMarkup.insert(0,markup.substring(region[1], lastPos)) ;
String linkMarkup = markup.substring(region[0], region[1]) ;
// by default (if anything goes wrong) we will keep the link as it is
String strippedLinkMarkup = linkMarkup ;
Matcher m = linkPattern.matcher(linkMarkup) ;
if (m.matches()) {
String prefix = m.group(1) ;
String dest = m.group(2) ;
String anchor = m.group(3) ;
if (prefix != null) {
// this is not a link to another article, so get rid of it entirely
if (replacement != null)
strippedLinkMarkup = linkMarkup.replaceAll(".",replacement.toString()) ;
else
strippedLinkMarkup = "" ;
} else {
if (anchor != null) {
//this has an anchor defined, so use that but blank out everything else
if (replacement != null)
strippedLinkMarkup = replacement + replacement + dest.replaceAll(".", replacement.toString()) + replacement + anchor.substring(1) + anchor.substring(1) + replacement ;
else
strippedLinkMarkup = anchor.substring(1) ;
} else {
//this has no anchor defined, so treat dest as anchor and blank out everything else
if (replacement != null) {
strippedLinkMarkup = replacement + replacement + dest + replacement + replacement ;
} else {
strippedLinkMarkup = dest ;
}
}
}
} else {
//logProblem("our pattern for delimiting links has a problem") ;
}
strippedMarkup.insert(0,strippedLinkMarkup) ;
lastPos = region[0] ;
}
}
if (lastPos > 0)
strippedMarkup.insert(0,markup.substring(0, lastPos)) ;
return strippedMarkup.toString() ;
}
public String stripEmphasis(String markup, Character replacement) {
String resolvedMarkup = emphasisResolver.resolveEmphasis(markup) ;
Vector<int[]> regions = gatherSimpleRegions(resolvedMarkup, "\\<\\/?[bi]\\>") ;
StringBuffer clearedMarkup = new StringBuffer() ;
int lastPos = 0 ;
int i = regions.size() ;
while (i > 0) {
i -- ;
int[] region = regions.elementAt(i) ;
//only deal with this region is not within a region we have already dealt with.
if (region[0] < lastPos) {
//print (" - - dealing with it\n") ;
//copy markup after this region and before beginning of the last region we dealt with
if (region[1] < lastPos)
clearedMarkup.insert(0, resolvedMarkup.substring(region[1], lastPos)) ;
if (replacement != null) {
String tag = resolvedMarkup.substring(region[0], region[1]) ;
String fill ;
if (tag.matches("\\<\\/?b\\>"))
fill = "'''" ;
else
fill = "''" ;
fill.replaceAll(".", replacement.toString()) ;
clearedMarkup.insert(0, fill) ;
}
lastPos = region[0] ;
} else {
//print (" - - already dealt with\n") ;
}
}
clearedMarkup.insert(0, resolvedMarkup.substring(0, lastPos)) ;
return clearedMarkup.toString() ;
}
/**
* Returns a copy of the given markup, where all links to wikipedia pages
* that are not articles (categories, language links, etc) have been removed.
*
* By default, unwanted markup is completely discarded. You can optionally specify
* a character to replace the regions that are discarded, so that the length of the
* string and the locations of unstripped characters is not modified.
*/
public String stripNonArticleInternalLinks(String markup, Character replacement) {
//currItem = "non-article internal links" ;
Vector<int[]> regions = gatherComplexRegions(markup, "\\[\\[", "\\]\\]") ;
StringBuffer strippedMarkup = new StringBuffer() ;
int lastPos = markup.length() ;
//because regions are sorted by end position, we work backwards through them
int i = regions.size() ;
while (i > 0) {
i -- ;
int[] region = regions.elementAt(i) ;
//System.out.println(" - - REGION: " + markup.substring(region[0], region[1])) ;
//only deal with this region is not within a region we have already delt with.
if (region[0] < lastPos) {
//copy everything between this region and start of last one we dealt with.
strippedMarkup.insert(0, markup.substring(region[1], lastPos)) ;
String linkMarkup = markup.substring(region[0], region[1]) ;
//print("link [region[0],region[1]] = linkMarkup\n\n") ;
// by default (if anything goes wrong) we will keep the link as it is
String strippedLinkMarkup = linkMarkup ;
Matcher m = linkPattern.matcher(linkMarkup) ;
if (m.matches()) {
String prefix = m.group(1) ;
//String dest = m.group(2) ;
//String anchor = m.group(3) ;
if (prefix != null) {
// this is not a link to another article, so get rid of it entirely
if (replacement != null) {
strippedLinkMarkup = linkMarkup.replaceAll(".", replacement.toString()) ;
} else {
strippedLinkMarkup = "" ;
}
}
} else {
//logProblem("our pattern for delimiting links has a problem") ;
}
strippedMarkup.insert(0, strippedLinkMarkup) ;
lastPos = region[0] ;
}
}
if (lastPos > 0)
strippedMarkup.insert(0, markup.substring(0, lastPos)) ;
return strippedMarkup.toString() ;
}
/**
* Removes all sections (both header and content, including nested sections) with the given sectionNames
*
* @param sectionName the name of the section (case insensitive) to remove.
* @param markup the markup to be stripped
* @return the stripped markup
*/
public String stripSections(String markup, String[] sectionNames, Character replacement) {
Vector<int[]> regions = new Vector<int[]>() ;
for (String sectionName:sectionNames)
regions = mergeRegionLists(regions, gatherSection(markup, sectionName)) ;
return stripRegions(markup, regions, replacement) ;
}
public String stripSectionHeaders(String markup, Character replacement) {
Vector<int[]> regions = this.gatherSectionHeaders(markup) ;
return stripRegions(markup, regions, replacement) ;
}
/**
* Convenience method which combines both of the above methods - i.e. returns a copy of the
* given markup, where all markup has been removed except for section headers and list markers.
*
* By default, unwanted markup is completely discarded. You can optionally specify
* a character to replace the regions that are discared, so that the length of the
* string and the locations of unstripped characters is not modified.
*/
public String stripToPlainText(String markup, Character replacement) {
String clearedMarkup = stripAllButInternalLinksAndEmphasis(markup, replacement) ;
clearedMarkup = stripInternalLinks(clearedMarkup, replacement) ;
return clearedMarkup ;
}
/**
* Returns a copy of the given markup, where the given regions have been removed.
* Regions are identified using one of the gather methods.
*
* By default, unwanted markup is completely discarded. You can optionally specify
* a character to replace the regions that are discared, so that the length of the
* string and the locations of unstripped characters is not modified.
*/
public String stripRegions(String markup, Vector<int[]> regions, Character replacement) {
StringBuffer clearedMarkup = new StringBuffer() ;
int lastPos = markup.length() ;
//because regions are sorted by end position, we work backwards through them
int i = regions.size() ;
while (i > 0) {
i -- ;
int[] region = regions.elementAt(i) ;
//only deal with this region is not within a region we have already delt with.
if (region[0] < lastPos) {
//print (" - - dealing with it\n") ;
//copy markup after this region and before beginning of the last region we delt with
if (region[1] < lastPos)
clearedMarkup.insert(0, markup.substring(region[1], lastPos)) ;
if (replacement != null) {
String fill = markup.substring(region[0],region[1]).replaceAll(".", replacement.toString()) ;
clearedMarkup.insert(0, fill) ;
}
lastPos = region[0] ;
} else {
//print (" - - already dealt with\n") ;
}
}
clearedMarkup.insert(0, markup.substring(0, lastPos)) ;
return clearedMarkup.toString() ;
}
public String stripExcessNewlines(String markup) {
String strippedMarkup = markup.replaceAll("\n{3,}", "\n\n") ;
return strippedMarkup.trim();
}
// ======================================================================================================
/**
* Gathers areas within the markup which correspond to links to other wikipedia pages
* (as identified by [[ and ]] pairs). Note: these can be nested (e.g. for images)
*/
public Vector<int[]> gatherInternalLinks(String markup) {
//currItem = "internal links" ;
return gatherComplexRegions(markup, "\\[\\[", "\\]\\]") ;
}
/**
* Gathers areas within the markup which correspond to templates (as identified by {{ and }} pairs).
*/
public Vector<int[]> gatherTemplates(String markup) {
//currItem = "templates" ;
return gatherComplexRegions(markup, "\\{\\{", "\\}\\}") ;
}
public Vector<int[]> getIsolatedRegions(Vector<int[]> regions, String markup) {
Vector<int[]> isolatedRegions = new Vector<int[]>() ;
for (int[] region:regions) {
if (isIsolated(region, markup))
isolatedRegions.add(region) ;
} ;
return isolatedRegions ;
}
public Vector<int[]> excludeIsolatedRegions(Vector<int[]> regions, String markup) {
Vector<int[]> unisolatedRegions = new Vector<int[]>() ;
for (int[] region:regions) {
if (!isIsolated(region, markup))
unisolatedRegions.add(region) ;
} ;
return unisolatedRegions ;
}
private boolean isIsolated(int[] region, String markup) {
String before = markup.substring(0, region[0]) ;
String after = markup.substring(region[1]) ;
Matcher m = isolatedBefore.matcher(before) ;
if (!m.matches())
return false ;
m = isolatedAfter.matcher(after) ;
if(!m.matches())
return false ;
return true ;
}
/**
* Gathers areas within the markup which correspond to tables (as identified by {| and |} pairs).
*/
public Vector<int[]> gatherTables(String markup) {
//currItem = "tables" ;
return gatherComplexRegions(markup, "\\{\\|", "\\|\\}") ;
}
/**
* Gathers areas within the markup which correspond to html tags.
*
* DIV and REF regions will enclose beginning and ending tags, and everything in between,
* since we assume this content is supposed to be discarded. All other regions will only include the
* individual tag, since we assume the content between such pairs is supposed to be retained.
*/
public Vector<int[]> gatherHTML(String markup) {
//currItem = "html" ;
//gather and merge references
Vector<int[]> regions = gatherReferences(markup) ;
//gather <div> </div> pairs
regions = mergeRegionLists(regions, gatherComplexRegions(markup, "\\<div(\\s*?)([^>\\/]*?)\\>", "\\<\\/div(\\s*?)\\>")) ;
//gather remaining tags
regions = mergeRegionLists(regions, gatherSimpleRegions(markup, "\\<(.*?)\\>")) ;
return regions ;
}
/**
* Gathers areas within the markup which correspond to references (markup to support claims or facts).
* The regions will enclose beginning and ending tags, and everything in between,
* since we assume this content is supposed to be discarded.
*/
public Vector<int[]> gatherReferences(String markup) {
//currItem = "references" ;
//gather <ref/>
Vector<int[]> regions = gatherSimpleRegions(markup, "\\<ref(\\s*?)([^>]*?)\\/\\>") ;
//gather <ref> </ref> pairs (these shouldnt be nested, but what the hell...)
regions = mergeRegionLists(regions, gatherComplexRegions(markup, "\\<ref(\\s*?)([^>\\/]*?)\\>", "\\<\\/ref(\\s*?)\\>")) ;
return regions ;
}
/**
* Gathers items which MediaWiki documentation mysteriously refers to as "majic words": e.g. __NOTOC__
*/
public Vector<int[]> gatherMagicWords(String markup) {
//currItem = "magic words" ;
return gatherSimpleRegions(markup, "\\_\\_([A-Z]+)\\_\\_") ;
}
/**
* Gathers all links to external web pages
*/
public Vector<int[]> gatherExternalLinks(String markup) {
//currItem = "external links" ;
return gatherSimpleRegions(markup, "\\[(http|www|ftp).*?\\]") ;
}
/**
* Gathers bold and italic markup
*/
public Vector<int[]> gatherEmphasis(String markup) {
//currItem = "emphasis" ;
return gatherSimpleRegions(markup, "'{2,}") ;
}
/**
* Gathers section headers
*/
public Vector<int[]> gatherSectionHeaders(String markup) {
Vector<int[]> regions = new Vector<int[]>() ;
Pattern p = Pattern.compile("\\n\\s*((={2,})[^=].*?\\2)[^=]") ;
Matcher m = p.matcher(markup) ;
while (m.find()) {
int[] region = {m.start(1), m.end(1)} ;
regions.add(region) ;
}
return regions ;
}
public Vector<int[]> gatherSection(String markup, String sectionName) {
Vector<int[]> regions = new Vector<int[]>() ;
//find start of section
Pattern startP = Pattern.compile("\\n\\s*(={2,})\\s*" + sectionName + "\\s*\\1", Pattern.CASE_INSENSITIVE) ;
Matcher startM = startP.matcher(markup) ;
if(startM.find()) {
int start = startM.start(1) ;
int level = startM.group(1).length() ;
int end ;
//look for start of section that is at same level or higher
Pattern endP = Pattern.compile("\\n\\s*(={2,"+level+"})[^=].*\\1") ;
Matcher endM = endP.matcher(markup) ;
if (endM.find(startM.end()))
end = endM.start() ;
else
end = markup.length() -1 ;
int[] region = {start, end} ;
regions.add(region) ;
}
return regions ;
}
/**
* Gathers markup which indicates indented items, or numbered and unnumbered list items
*/
public Vector<int[]> gatherListAndIndentMarkers(String markup) {
//currItem = "list and intent markers" ;
Vector<int[]> regions = gatherSimpleRegions(markup, "\n( *)([//*:]+)") ;
//increment start positions of all regions by one, so they don't include the newline character
for (int[] region:regions)
region[0]++ ;
//add occurance of list item on first line (if there is one)
regions = mergeRegionLists(regions, gatherSimpleRegions(markup, "^( *)([//*:]+)")) ;
return regions ;
}
private boolean isEntirelyItalicised(String line) {
String resolvedLine = emphasisResolver.resolveEmphasis(line) ;
Pattern p = Pattern.compile("(\\s*)\\<i\\>(.*?)\\<\\/i\\>\\.?(\\s*)") ;
Matcher m = p.matcher(resolvedLine) ;
if (m.matches()) {
if (m.group(1).contains("</i>"))
return false ;
else
return true ;
} else {
return false ;
}
}
/**
* Gathers paragraphs within the markup referred to by the given pointer, which are at the
* start and either begin with an indent or are entirely encased in italics. These correspond to quotes or disambiguation and
* navigation notes that the author should have used templates to identify, but didn't.
* This will only work after templates, and before list markers have been cleaned out.
*/
public Vector<int[]> gatherMisformattedStarts(String markup) {
//currItem = "starts" ;
String[] lines = markup.split("\n") ;
int ignoreUntil = 0 ;
for (String line:lines) {
boolean isWhitespace = line.matches("^(\\s*)$") ;
boolean isIndented = line.matches("^(\\s*):.*") ;
boolean isItalicised = isEntirelyItalicised(line) ;
boolean isImage = line.matches("^(\\s*)\\[\\[Image\\:(.*?)\\]\\](\\s*)") ;
//System.out.println(" - - '" + line + "' " + isIndented + "," + isItalicised) ;
if (isWhitespace || isIndented || isItalicised || isImage) {
//want to ignore this line
ignoreUntil = ignoreUntil + line.length() + 1 ;
//print(" - - - discard\n") ;
} else {
//print(" - - - keep\n") ;
break ;
}
}
int[] region = {0, ignoreUntil} ;
Vector<int[]> regions = new Vector<int[]>() ;
regions.add(region) ;
return regions ;
}
/**
* Gathers simple regions: ones which cannot be nested within each other.
*
* The returned regions (an array of start and end positions) will be sorted
* by end position (and also by start position, since they can't overlap)
*/
public Vector<int[]> gatherSimpleRegions(String markup, String regex) {
//an array of regions we have identified
//each region is given as an array containing start and end character indexes of the region.
Vector<int[]> regions = new Vector<int[]>() ;
Pattern p = Pattern.compile(regex, Pattern.DOTALL) ;
Matcher m = p.matcher(markup) ;
while(m.find()) {
int[] region = {m.start(), m.end()} ;
regions.add(region) ;
}
return regions ;
}
/**
* Gathers complex regions: ones which can potentially be nested within each other.
*
* The returned regions (an array of start and end positions) will be either
* non-overlapping or cleanly nested, and sorted by end position.
*/
public Vector<int[]> gatherComplexRegions(String markup, String startRegex, String endRegex) {
//an array of regions we have identified
//each region is given as an array containing start and end character indexes of the region.
Vector<int[]> regions = new Vector<int[]>() ;
//a stack of region starting positions
Vector<Integer> startStack = new Vector<Integer>() ;
Pattern p = Pattern.compile("((" + startRegex + ")|(" + endRegex + "))", Pattern.DOTALL) ;
Matcher m = p.matcher(markup) ;
while(m.find()) {
Integer p1 = m.start() ;
Integer p2 = m.end() ;
if (m.group(2) != null) {
//this is the start of an item
startStack.add(p1) ;
} else {
//this is the end of an item
if (!startStack.isEmpty()) {
int start = startStack.elementAt(startStack.size()-1) ;
startStack.removeElementAt(startStack.size()-1) ;
int[] region = {start, p2} ;
regions.add(region) ;
//print (" - item [region[0],region[1]]: ".substr(markup, region[0], region[1]-region[0])."\n") ;
} else {
//logProblem("oops, we found the end of an item, but have no idea where it started") ;
}
}
}
if (!startStack.isEmpty()) {
//logProblem("oops, we got to the end of the markup and still have items that have been started but not finished") ;
}
return regions ;
}
/**
* Collapses a region list, by discarding any regions which are contained within
* other regions.
*
* The resulting region list will be non-overlapping and sorted by end positions.
*//*
private Vector<int[]> collapseRegionList(Vector<int[]> regions) {
Vector<int[]> newRegions = new Vector<int[]>() ;
int index = regions.size() -1 ;
int lastPos = -1 ;
while (index >= 0) {
int[] region = regions.elementAt(index) ;
if (lastPos <0 || region[1] <= lastPos) {
newRegions.add(0, region) ;
lastPos = region[0] ;
}
index-- ;
}
return newRegions ;
}*/
/**
* Merges two lists of regions into one sorted list. Regions that are contained
* within other regions are discarded.
*
* The resulting region list will be non-overlapping and sorted by end positions.
*/
private Vector<int[]> mergeRegionLists(Vector<int[]> regionsA, Vector<int[]> regionsB) {
int indexA = regionsA.size() -1 ;
int indexB = regionsB.size() - 1;
Vector<int[]> newRegions = new Vector<int[]>() ;
int lastPos = -1 ;
while (indexA >= 0 && indexB >= 0) {
int[] regionA = regionsA.elementAt(indexA) ;
int[] regionB = regionsB.elementAt(indexB) ;
if (lastPos >= 0 && regionA[0] >= lastPos && regionA[0] >= lastPos) {
//both of these are inside regions that we have already dealt with, so discard them
indexA-- ;
indexB-- ;
} else {
if (regionB[1] > regionA[1]) {
//lets see if we need to copy B across
if ((regionB[0] >= regionA[0] && regionB[1] <= regionA[1]) || (lastPos>=0 && regionB[0] >= lastPos)) {
//either A or the last region we dealt with completely contains B, so we just discard B
} else {
//deal with B now
int[] newRegion = {regionB[0], min(regionB[1], lastPos)} ;
newRegions.add(0, newRegion) ;
lastPos = regionB[0] ;
}
indexB-- ;
} else {
//lets see if we need to copy A across
if ((regionA[0] >= regionB[0] && regionA[1] <= regionB[1]) || (lastPos>=0 && regionA[0] >= lastPos)) {
//either B or the last region we dealt with completely contains A, so we just discard A
} else {
//deal with A now
int[] newRegion = {regionA[0], min(regionA[1], lastPos)} ;
newRegions.add(0, newRegion) ;
lastPos = regionA[0] ;
}
indexA-- ;
}
}
}
//deal with any remaining A regions
while (indexA >= 0) {
int[] regionA = regionsA.elementAt(indexA) ;
if (lastPos >= 0 && regionA[0] > lastPos) {
//this is already covered, so ignore it
} else {
int[] newRegion = {regionA[0], min(regionA[1], lastPos)} ;
newRegions.add(0, newRegion) ;
lastPos = regionA[0] ;
}
indexA-- ;
}
//deal with any remaining B regions
while (indexB >= 0) {
int[] regionB = regionsB.elementAt(indexB) ;
if (lastPos >= 0 && regionB[0] > lastPos) {
//this is already covered, so ignore it
} else {
int[] newRegion = {regionB[0], min(regionB[1], lastPos)} ;
newRegions.add(0, newRegion) ;
lastPos = regionB[0] ;
}
indexB-- ;
}
return newRegions ;
}
private int min(int a, int b) {
if (a>=0 && b>=0) {
return Math.min(a,b) ;
} else {
if (a>=0)
return a ;
else
return b ;
}
}
}