built-in markdown crack down on locals; still very slow

This commit is contained in:
steve donovan 2013-04-17 12:08:11 +02:00
parent 68cd576bf4
commit 167a4595a5
1 changed files with 126 additions and 119 deletions

View File

@ -5,7 +5,7 @@
<http://www.frykholm.se/files/markdown.lua> <http://www.frykholm.se/files/markdown.lua>
**Author:** Niklas Frykholm, <niklas@frykholm.se> **Author:** Niklas Frykholm, <niklas@frykholm.se>
**Date:** 31 May 2008 **Date:** 31 May 2008
This is an implementation of the popular text markup language Markdown in pure Lua. This is an implementation of the popular text markup language Markdown in pure Lua.
@ -47,10 +47,10 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
software and associated documentation files (the "Software"), to deal in the Software software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify, merge, without restriction, including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, subject to the following conditions: to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies The above copyright notice and this permission notice shall be included in all copies
or substantial portions of the Software. or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
@ -130,31 +130,33 @@ setfenv(1, M)
-- Locks table t from changes, writes an error if someone attempts to change the table. -- Locks table t from changes, writes an error if someone attempts to change the table.
-- This is useful for detecting variables that have "accidently" been made global. Something -- This is useful for detecting variables that have "accidently" been made global. Something
-- I tend to do all too much. -- I tend to do all too much.
function lock(t) function M.lock(t)
function lock_new_index(t, k, v) local function lock_new_index(t, k, v)
error("module has been locked -- " .. k .. " must be declared local", 2) error("module has been locked -- " .. k .. " must be declared local", 2)
end end
local mt = {__newindex = lock_new_index} local mt = {__newindex = lock_new_index}
if getmetatable(t) then mt.__index = getmetatable(t).__index end if getmetatable(t) then
mt.__index = getmetatable(t).__index
end
setmetatable(t, mt) setmetatable(t, mt)
end end
-- Returns the result of mapping the values in table t through the function f -- Returns the result of mapping the values in table t through the function f
function map(t, f) local function map(t, f)
local out = {} local out = {}
for k,v in pairs(t) do out[k] = f(v,k) end for k,v in pairs(t) do out[k] = f(v,k) end
return out return out
end end
-- The identity function, useful as a placeholder. -- The identity function, useful as a placeholder.
function identity(text) return text end local function identity(text) return text end
-- Functional style if statement. (NOTE: no short circuit evaluation) -- Functional style if statement. (NOTE: no short circuit evaluation)
function iff(t, a, b) if t then return a else return b end end local function iff(t, a, b) if t then return a else return b end end
-- Splits the text into an array of separate lines. -- Splits the text into an array of separate lines.
function split(text, sep) local function split(text, sep)
sep = sep or "\n" sep = sep or "\n"
local lines = {} local lines = {}
local pos = 1 local pos = 1
@ -168,7 +170,7 @@ function split(text, sep)
end end
-- Converts tabs to spaces -- Converts tabs to spaces
function detab(text) local function detab(text)
local tab_width = 4 local tab_width = 4
local function rep(match) local function rep(match)
local spaces = -match:len() local spaces = -match:len()
@ -180,7 +182,7 @@ function detab(text)
end end
-- Applies string.find for every pattern in the list and returns the first match -- Applies string.find for every pattern in the list and returns the first match
function find_first(s, patterns, index) local function find_first(s, patterns, index)
local res = {} local res = {}
for _,p in ipairs(patterns) do for _,p in ipairs(patterns) do
local match = {s:find(p, index)} local match = {s:find(p, index)}
@ -192,7 +194,7 @@ end
-- If a replacement array is specified, the range [start, stop] in the array is replaced -- If a replacement array is specified, the range [start, stop] in the array is replaced
-- with the replacement array and the resulting array is returned. Without a replacement -- with the replacement array and the resulting array is returned. Without a replacement
-- array the section of the array between start and stop is returned. -- array the section of the array between start and stop is returned.
function splice(array, start, stop, replacement) local function splice(array, start, stop, replacement)
if replacement then if replacement then
local n = stop - start + 1 local n = stop - start + 1
while n > 0 do while n > 0 do
@ -213,7 +215,7 @@ function splice(array, start, stop, replacement)
end end
-- Outdents the text one step. -- Outdents the text one step.
function outdent(text) local function outdent(text)
text = "\n" .. text text = "\n" .. text
text = text:gsub("\n ? ? ?", "\n") text = text:gsub("\n ? ? ?", "\n")
text = text:sub(2) text = text:sub(2)
@ -221,15 +223,15 @@ function outdent(text)
end end
-- Indents the text one step. -- Indents the text one step.
function indent(text) local function indent(text)
text = text:gsub("\n", "\n ") text = text:gsub("\n", "\n ")
return text return text
end end
-- Does a simple tokenization of html data. Returns the data as a list of tokens. -- Does a simple tokenization of html data. Returns the data as a list of tokens.
-- Each token is a table with a type field (which is either "tag" or "text") and -- Each token is a table with a type field (which is either "tag" or "text") and
-- a text field (which contains the original token data). -- a text field (which contains the original token data).
function tokenize_html(html) local function tokenize_html(html)
local tokens = {} local tokens = {}
local pos = 1 local pos = 1
while true do while true do
@ -239,7 +241,7 @@ function tokenize_html(html)
break break
end end
if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end
local _, stop local _, stop
if html:match("^<!%-%-", start) then if html:match("^<!%-%-", start) then
_,stop = html:find("%-%->", start) _,stop = html:find("%-%->", start)
@ -249,7 +251,7 @@ function tokenize_html(html)
_,stop = html:find("%b<>", start) _,stop = html:find("%b<>", start)
end end
if not stop then if not stop then
-- error("Could not match html tag " .. html:sub(start,start+30)) -- error("Could not match html tag " .. html:sub(start,start+30))
table.insert(tokens, {type="text", text=html:sub(start, start)}) table.insert(tokens, {type="text", text=html:sub(start, start)})
pos = start + 1 pos = start + 1
else else
@ -272,27 +274,27 @@ end
local HASH = { local HASH = {
-- Has the hash been inited. -- Has the hash been inited.
inited = false, inited = false,
-- The unique string prepended to all hash values. This is to ensure -- The unique string prepended to all hash values. This is to ensure
-- that hash values do not accidently coincide with an actual existing -- that hash values do not accidently coincide with an actual existing
-- string in the document. -- string in the document.
identifier = "", identifier = "",
-- Counter that counts up for each new hash instance. -- Counter that counts up for each new hash instance.
counter = 0, counter = 0,
-- Hash table. -- Hash table.
table = {} table = {}
} }
-- Inits hashing. Creates a hash_identifier that doesn't occur anywhere -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
-- in the text. -- in the text.
function init_hash(text) local function init_hash(text)
HASH.inited = true HASH.inited = true
HASH.identifier = "" HASH.identifier = ""
HASH.counter = 0 HASH.counter = 0
HASH.table = {} HASH.table = {}
local s = "HASH" local s = "HASH"
local counter = 0 local counter = 0
local id local id
@ -305,7 +307,7 @@ function init_hash(text)
end end
-- Returns the hashed value for s. -- Returns the hashed value for s.
function hash(s) local function hash(s)
assert(HASH.inited) assert(HASH.inited)
if not HASH.table[s] then if not HASH.table[s] then
HASH.counter = HASH.counter + 1 HASH.counter = HASH.counter + 1
@ -320,7 +322,7 @@ end
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- The protection module is used to "protect" parts of a document -- The protection module is used to "protect" parts of a document
-- so that they are not modified by subsequent processing steps. -- so that they are not modified by subsequent processing steps.
-- Protected parts are saved in a table for later unprotection -- Protected parts are saved in a table for later unprotection
-- Protection data -- Protection data
@ -342,18 +344,18 @@ local PD = {
-- Nested data. -- Nested data.
-- </div> -- </div>
-- </div> -- </div>
function block_pattern(tag) local function block_pattern(tag)
return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n" return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n"
end end
-- Pattern for matching a block tag that begins and ends with a newline -- Pattern for matching a block tag that begins and ends with a newline
function line_pattern(tag) local function line_pattern(tag)
return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n" return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n"
end end
-- Protects the range of characters from start to stop in the text and -- Protects the range of characters from start to stop in the text and
-- returns the protected string. -- returns the protected string.
function protect_range(text, start, stop) local function protect_range(text, start, stop)
local s = text:sub(start, stop) local s = text:sub(start, stop)
local h = hash(s) local h = hash(s)
PD.blocks[h] = s PD.blocks[h] = s
@ -363,7 +365,7 @@ end
-- Protect every part of the text that matches any of the patterns. The first -- Protect every part of the text that matches any of the patterns. The first
-- matching pattern is protected first, etc. -- matching pattern is protected first, etc.
function protect_matches(text, patterns) local function protect_matches(text, patterns)
while true do while true do
local start, stop = find_first(text, patterns) local start, stop = find_first(text, patterns)
if not start then break end if not start then break end
@ -373,7 +375,7 @@ function protect_matches(text, patterns)
end end
-- Protects blocklevel tags in the specified text -- Protects blocklevel tags in the specified text
function protect(text) local function protect(text)
-- First protect potentially nested block tags -- First protect potentially nested block tags
text = protect_matches(text, map(PD.tags, block_pattern)) text = protect_matches(text, map(PD.tags, block_pattern))
-- Then protect block tags at the line level. -- Then protect block tags at the line level.
@ -385,12 +387,12 @@ function protect(text)
end end
-- Returns true if the string s is a hash resulting from protection -- Returns true if the string s is a hash resulting from protection
function is_protected(s) local function is_protected(s)
return PD.blocks[s] return PD.blocks[s]
end end
-- Unprotects the specified text by expanding all the nonces -- Unprotects the specified text by expanding all the nonces
function unprotect(text) local function unprotect(text)
for k,v in pairs(PD.blocks) do for k,v in pairs(PD.blocks) do
v = v:gsub("%%", "%%%%") v = v:gsub("%%", "%%%%")
text = text:gsub(k, v) text = text:gsub(k, v)
@ -410,22 +412,22 @@ end
-- Returns true if the line is a ruler of (char) characters. -- Returns true if the line is a ruler of (char) characters.
-- The line must contain at least three char characters and contain only spaces and -- The line must contain at least three char characters and contain only spaces and
-- char characters. -- char characters.
function is_ruler_of(line, char) local function is_ruler_of(line, char)
if not line:match("^[ %" .. char .. "]*$") then return false end if not line:match("^[ %" .. char .. "]*$") then return false end
if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end
return true return true
end end
-- Identifies the block level formatting present in the line -- Identifies the block level formatting present in the line
function classify(line) local function classify(line)
local info = {line = line, text = line} local info = {line = line, text = line}
if line:match("^ ") then if line:match("^ ") then
info.type = "indented" info.type = "indented"
info.outdented = line:sub(5) info.outdented = line:sub(5)
return info return info
end end
for _,c in ipairs({'*', '-', '_', '='}) do for _,c in ipairs({'*', '-', '_', '='}) do
if is_ruler_of(line, c) then if is_ruler_of(line, c) then
info.type = "ruler" info.type = "ruler"
@ -433,12 +435,12 @@ function classify(line)
return info return info
end end
end end
if line == "" then if line == "" then
info.type = "blank" info.type = "blank"
return info return info
end end
if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then
local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$")
info.type = "header" info.type = "header"
@ -446,7 +448,7 @@ function classify(line)
info.text = m2 info.text = m2
return info return info
end end
if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then
local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)")
info.type = "list_item" info.type = "list_item"
@ -455,7 +457,7 @@ function classify(line)
info.text = text info.text = text
return info return info
end end
if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then
local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)")
info.type = "list_item" info.type = "list_item"
@ -464,29 +466,29 @@ function classify(line)
info.text= text info.text= text
return info return info
end end
if line:match("^>[ \t]?(.*)") then if line:match("^>[ \t]?(.*)") then
info.type = "blockquote" info.type = "blockquote"
info.text = line:match("^>[ \t]?(.*)") info.text = line:match("^>[ \t]?(.*)")
return info return info
end end
if is_protected(line) then if is_protected(line) then
info.type = "raw" info.type = "raw"
info.html = unprotect(line) info.html = unprotect(line)
return info return info
end end
info.type = "normal" info.type = "normal"
return info return info
end end
-- Find headers constisting of a normal line followed by a ruler and converts them to -- Find headers constisting of a normal line followed by a ruler and converts them to
-- header entries. -- header entries.
function headers(array) local function headers(array)
local i = 1 local i = 1
while i <= #array - 1 do while i <= #array - 1 do
if array[i].type == "normal" and array[i+1].type == "ruler" and if array[i].type == "normal" and array[i+1].type == "ruler" and
(array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then
local info = {line = array[i].line} local info = {line = array[i].line}
info.text = info.line info.text = info.line
@ -500,8 +502,10 @@ function headers(array)
return array return array
end end
local block_transform, blocks_to_html, encode_code, span_transform, encode_backslash_escapes
-- Find list blocks and convert them to protected data blocks -- Find list blocks and convert them to protected data blocks
function lists(array, sublist) local function lists(array, sublist)
local function process_list(arr) local function process_list(arr)
local function any_blanks(arr) local function any_blanks(arr)
for i = 1, #arr do for i = 1, #arr do
@ -509,7 +513,7 @@ function lists(array, sublist)
end end
return false return false
end end
local function split_list_items(arr) local function split_list_items(arr)
local acc = {arr[1]} local acc = {arr[1]}
local res = {} local res = {}
@ -524,12 +528,12 @@ function lists(array, sublist)
table.insert(res, acc) table.insert(res, acc)
return res return res
end end
local function process_list_item(lines, block) local function process_list_item(lines, block)
while lines[#lines].type == "blank" do while lines[#lines].type == "blank" do
table.remove(lines) table.remove(lines)
end end
local itemtext = lines[1].text local itemtext = lines[1].text
for i=2,#lines do for i=2,#lines do
itemtext = itemtext .. "\n" .. outdent(lines[i].line) itemtext = itemtext .. "\n" .. outdent(lines[i].line)
@ -548,7 +552,7 @@ function lists(array, sublist)
return " <li>" .. itemtext .. "</li>" return " <li>" .. itemtext .. "</li>"
end end
end end
local block_list = any_blanks(arr) local block_list = any_blanks(arr)
local items = split_list_items(arr) local items = split_list_items(arr)
local out = "" local out = ""
@ -561,7 +565,7 @@ function lists(array, sublist)
return "<ul>\n" .. out .. "</ul>" return "<ul>\n" .. out .. "</ul>"
end end
end end
-- Finds the range of lines composing the first list in the array. A list -- Finds the range of lines composing the first list in the array. A list
-- starts with (^ list_item) or (blank list_item) and ends with -- starts with (^ list_item) or (blank list_item) and ends with
-- (blank* $) or (blank normal). -- (blank* $) or (blank normal).
@ -586,7 +590,7 @@ function lists(array, sublist)
local function find_list_end(array, start) local function find_list_end(array, start)
local pos = #array local pos = #array
for i = start, #array-1 do for i = start, #array-1 do
if array[i].type == "blank" and array[i+1].type ~= "list_item" if array[i].type == "blank" and array[i+1].type ~= "list_item"
and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then
pos = i-1 pos = i-1
break break
@ -597,12 +601,12 @@ function lists(array, sublist)
end end
return pos return pos
end end
local start = find_list_start(array, sublist) local start = find_list_start(array, sublist)
if not start then return nil end if not start then return nil end
return start, find_list_end(array, start) return start, find_list_end(array, start)
end end
while true do while true do
local start, stop = find_list(array, sublist) local start, stop = find_list(array, sublist)
if not start then break end if not start then break end
@ -614,17 +618,17 @@ function lists(array, sublist)
} }
array = splice(array, start, stop, {info}) array = splice(array, start, stop, {info})
end end
-- Convert any remaining list items to normal -- Convert any remaining list items to normal
for _,line in ipairs(array) do for _,line in ipairs(array) do
if line.type == "list_item" then line.type = "normal" end if line.type == "list_item" then line.type = "normal" end
end end
return array return array
end end
-- Find and convert blockquote markers. -- Find and convert blockquote markers.
function blockquotes(lines) local function blockquotes(lines)
local function find_blockquote(lines) local function find_blockquote(lines)
local start local start
for i,line in ipairs(lines) do for i,line in ipairs(lines) do
@ -634,7 +638,7 @@ function blockquotes(lines)
end end
end end
if not start then return nil end if not start then return nil end
local stop = #lines local stop = #lines
for i = start+1, #lines do for i = start+1, #lines do
if lines[i].type == "blank" or lines[i].type == "blockquote" then if lines[i].type == "blank" or lines[i].type == "blockquote" then
@ -647,7 +651,7 @@ function blockquotes(lines)
while lines[stop].type == "blank" do stop = stop - 1 end while lines[stop].type == "blank" do stop = stop - 1 end
return start, stop return start, stop
end end
local function process_blockquote(lines) local function process_blockquote(lines)
local raw = lines[1].text local raw = lines[1].text
for i = 2,#lines do for i = 2,#lines do
@ -658,7 +662,7 @@ function blockquotes(lines)
return "<blockquote>\n " .. bt .. return "<blockquote>\n " .. bt ..
"\n</blockquote>" "\n</blockquote>"
end end
while true do while true do
local start, stop = find_blockquote(lines) local start, stop = find_blockquote(lines)
if not start then break end if not start then break end
@ -674,14 +678,14 @@ function blockquotes(lines)
end end
-- Find and convert codeblocks. -- Find and convert codeblocks.
function codeblocks(lines) local function codeblocks(lines)
local function find_codeblock(lines) local function find_codeblock(lines)
local start local start
for i,line in ipairs(lines) do for i,line in ipairs(lines) do
if line.type == "indented" then start = i break end if line.type == "indented" then start = i break end
end end
if not start then return nil end if not start then return nil end
local stop = #lines local stop = #lines
for i = start+1, #lines do for i = start+1, #lines do
if lines[i].type ~= "indented" and lines[i].type ~= "blank" then if lines[i].type ~= "indented" and lines[i].type ~= "blank" then
@ -692,7 +696,7 @@ function codeblocks(lines)
while lines[stop].type == "blank" do stop = stop - 1 end while lines[stop].type == "blank" do stop = stop - 1 end
return start, stop return start, stop
end end
local function process_codeblock(lines) local function process_codeblock(lines)
local raw = detab(encode_code(outdent(lines[1].line))) local raw = detab(encode_code(outdent(lines[1].line)))
for i = 2,#lines do for i = 2,#lines do
@ -700,7 +704,7 @@ function codeblocks(lines)
end end
return "<pre><code>" .. raw .. "\n</code></pre>" return "<pre><code>" .. raw .. "\n</code></pre>"
end end
while true do while true do
local start, stop = find_codeblock(lines) local start, stop = find_codeblock(lines)
if not start then break end if not start then break end
@ -727,12 +731,12 @@ function blocks_to_html(lines, no_paragraphs)
table.insert(out, line.html) table.insert(out, line.html)
elseif line.type == "normal" then elseif line.type == "normal" then
local s = line.line local s = line.line
while i+1 <= #lines and lines[i+1].type == "normal" do while i+1 <= #lines and lines[i+1].type == "normal" do
i = i + 1 i = i + 1
s = s .. "\n" .. lines[i].line s = s .. "\n" .. lines[i].line
end end
if no_paragraphs then if no_paragraphs then
table.insert(out, span_transform(s)) table.insert(out, span_transform(s))
else else
@ -764,7 +768,7 @@ end
-- Debug function for printing a line array to see the result -- Debug function for printing a line array to see the result
-- of partial transforms. -- of partial transforms.
function print_lines(lines) local function print_lines(lines)
for i, line in ipairs(lines) do for i, line in ipairs(lines) do
print(i, line.type, line.text or line.line) print(i, line.type, line.text or line.line)
end end
@ -778,10 +782,10 @@ end
-- These characters may need to be escaped because they have a special -- These characters may need to be escaped because they have a special
-- meaning in markdown. -- meaning in markdown.
escape_chars = "'\\`*_{}[]()>#+-.!'" local escape_chars = "'\\`*_{}[]()>#+-.!'"
escape_table = {} local escape_table = {}
function init_escape_table() local function init_escape_table()
escape_table = {} escape_table = {}
for i = 1,#escape_chars do for i = 1,#escape_chars do
local c = escape_chars:sub(i,i) local c = escape_chars:sub(i,i)
@ -790,17 +794,17 @@ function init_escape_table()
end end
-- Adds a new escape to the escape table. -- Adds a new escape to the escape table.
function add_escape(text) local function add_escape(text)
if not escape_table[text] then if not escape_table[text] then
escape_table[text] = hash(text) escape_table[text] = hash(text)
end end
return escape_table[text] return escape_table[text]
end end
-- Escape characters that should not be disturbed by markdown. -- Escape characters that should not be disturbed by markdown.
function escape_special_chars(text) local function escape_special_chars(text)
local tokens = tokenize_html(text) local tokens = tokenize_html(text)
local out = "" local out = ""
for _, token in ipairs(tokens) do for _, token in ipairs(tokens) do
local t = token.text local t = token.text
@ -826,7 +830,7 @@ function encode_backslash_escapes(t)
end end
-- Unescape characters that have been encoded. -- Unescape characters that have been encoded.
function unescape_special_chars(t) local function unescape_special_chars(t)
local tin = t local tin = t
for k,v in pairs(escape_table) do for k,v in pairs(escape_table) do
k = k:gsub("%%", "%%%%") k = k:gsub("%%", "%%%%")
@ -850,7 +854,7 @@ function encode_code(s)
end end
-- Handle backtick blocks. -- Handle backtick blocks.
function code_spans(s) local function code_spans(s)
s = s:gsub("\\\\", escape_table["\\"]) s = s:gsub("\\\\", escape_table["\\"])
s = s:gsub("\\`", escape_table["`"]) s = s:gsub("\\`", escape_table["`"])
@ -880,7 +884,7 @@ function code_spans(s)
end end
-- Encode alt text... enodes &, and ". -- Encode alt text... enodes &, and ".
function encode_alt(s) local function encode_alt(s)
if not s then return s end if not s then return s end
s = s:gsub('&', '&amp;') s = s:gsub('&', '&amp;')
s = s:gsub('"', '&quot;') s = s:gsub('"', '&quot;')
@ -888,8 +892,10 @@ function encode_alt(s)
return s return s
end end
local link_database
-- Handle image references -- Handle image references
function images(text) local function images(text)
local function reference_link(alt, id) local function reference_link(alt, id)
alt = encode_alt(alt:match("%b[]"):sub(2,-2)) alt = encode_alt(alt:match("%b[]"):sub(2,-2))
id = id:match("%[(.*)%]"):lower() id = id:match("%[(.*)%]"):lower()
@ -902,7 +908,7 @@ function images(text)
if title then title = " title=\"" .. title .. "\"" else title = "" end if title then title = " title=\"" .. title .. "\"" else title = "" end
return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>") return add_escape ('<img src="' .. url .. '" alt="' .. alt .. '"' .. title .. "/>")
end end
local function inline_link(alt, link) local function inline_link(alt, link)
alt = encode_alt(alt:match("%b[]"):sub(2,-2)) alt = encode_alt(alt:match("%b[]"):sub(2,-2))
local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
@ -915,14 +921,14 @@ function images(text)
return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>') return add_escape('<img src="' .. url .. '" alt="' .. alt .. '"/>')
end end
end end
text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
text = text:gsub("!(%b[])(%b())", inline_link) text = text:gsub("!(%b[])(%b())", inline_link)
return text return text
end end
-- Handle anchor references -- Handle anchor references
function anchors(text) local function anchors(text)
local function reference_link(text, id) local function reference_link(text, id)
text = text:match("%b[]"):sub(2,-2) text = text:match("%b[]"):sub(2,-2)
id = id:match("%b[]"):sub(2,-2):lower() id = id:match("%b[]"):sub(2,-2):lower()
@ -935,7 +941,7 @@ function anchors(text)
if title then title = " title=\"" .. title .. "\"" else title = "" end if title then title = " title=\"" .. title .. "\"" else title = "" end
return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>") return add_escape("<a href=\"" .. url .. "\"" .. title .. ">") .. text .. add_escape("</a>")
end end
local function inline_link(text, link) local function inline_link(text, link)
text = text:match("%b[]"):sub(2,-2) text = text:match("%b[]"):sub(2,-2)
local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]") local url, title = link:match("%(<?(.-)>?[ \t]*['\"](.+)['\"]")
@ -948,14 +954,14 @@ function anchors(text)
return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>") return add_escape("<a href=\"" .. url .. "\">") .. text .. add_escape("</a>")
end end
end end
text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link)
text = text:gsub("(%b[])(%b())", inline_link) text = text:gsub("(%b[])(%b())", inline_link)
return text return text
end end
-- Handle auto links, i.e. <http://www.google.com/>. -- Handle auto links, i.e. <http://www.google.com/>.
function auto_links(text) local function auto_links(text)
local function link(s) local function link(s)
return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>" return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>"
end end
@ -969,21 +975,21 @@ function auto_links(text)
local plain = {code = function(c) return c end, count = 0, rate = 0.1} local plain = {code = function(c) return c end, count = 0, rate = 0.1}
local codes = {hex, dec, plain} local codes = {hex, dec, plain}
local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end
local out = "" local out = ""
for i = 1,s:len() do for i = 1,s:len() do
for _,code in ipairs(codes) do code.count = code.count + code.rate end for _,code in ipairs(codes) do code.count = code.count + code.rate end
if codes[1].count < codes[2].count then swap(codes,1,2) end if codes[1].count < codes[2].count then swap(codes,1,2) end
if codes[2].count < codes[3].count then swap(codes,2,3) end if codes[2].count < codes[3].count then swap(codes,2,3) end
if codes[1].count < codes[2].count then swap(codes,1,2) end if codes[1].count < codes[2].count then swap(codes,1,2) end
local code = codes[1] local code = codes[1]
local c = s:sub(i,i) local c = s:sub(i,i)
-- Force encoding of "@" to make email address more invisible. -- Force encoding of "@" to make email address more invisible.
if c == "@" and code == plain then code = codes[2] end if c == "@" and code == plain then code = codes[2] end
out = out .. code.code(c) out = out .. code.code(c)
code.count = code.count - 1 code.count = code.count - 1
end end
return out return out
end end
local function mail(s) local function mail(s)
@ -995,7 +1001,7 @@ function auto_links(text)
-- links -- links
text = text:gsub("<(https?:[^'\">%s]+)>", link) text = text:gsub("<(https?:[^'\">%s]+)>", link)
text = text:gsub("<(ftp:[^'\">%s]+)>", link) text = text:gsub("<(ftp:[^'\">%s]+)>", link)
-- mail -- mail
text = text:gsub("<mailto:([^'\">%s]+)>", mail) text = text:gsub("<mailto:([^'\">%s]+)>", mail)
text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail)
@ -1004,7 +1010,7 @@ end
-- Encode free standing amps (&) and angles (<)... note that this does not -- Encode free standing amps (&) and angles (<)... note that this does not
-- encode free >. -- encode free >.
function amps_and_angles(s) local function amps_and_angles(s)
-- encode amps not part of &..; expression -- encode amps not part of &..; expression
local pos = 1 local pos = 1
while true do while true do
@ -1019,17 +1025,17 @@ function amps_and_angles(s)
pos = amp+1 pos = amp+1
end end
end end
-- encode naked <'s -- encode naked <'s
s = s:gsub("<([^a-zA-Z/?$!])", "&lt;%1") s = s:gsub("<([^a-zA-Z/?$!])", "&lt;%1")
s = s:gsub("<$", "&lt;") s = s:gsub("<$", "&lt;")
-- what about >, nothing done in the original markdown source to handle them -- what about >, nothing done in the original markdown source to handle them
return s return s
end end
-- Handles emphasis markers (* and _) in the text. -- Handles emphasis markers (* and _) in the text.
function emphasis(text) local function emphasis(text)
for _, s in ipairs {"%*%*", "%_%_"} do for _, s in ipairs {"%*%*", "%_%_"} do
text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>") text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>")
text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>") text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>")
@ -1044,7 +1050,7 @@ function emphasis(text)
end end
-- Handles line break markers in the text. -- Handles line break markers in the text.
function line_breaks(text) local function line_breaks(text)
return text:gsub(" +\n", " <br/>\n") return text:gsub(" +\n", " <br/>\n")
end end
@ -1067,28 +1073,28 @@ end
-- Cleanup the text by normalizing some possible variations to make further -- Cleanup the text by normalizing some possible variations to make further
-- processing easier. -- processing easier.
function cleanup(text) local function cleanup(text)
-- Standardize line endings -- Standardize line endings
text = text:gsub("\r\n", "\n") -- DOS to UNIX text = text:gsub("\r\n", "\n") -- DOS to UNIX
text = text:gsub("\r", "\n") -- Mac to UNIX text = text:gsub("\r", "\n") -- Mac to UNIX
-- Convert all tabs to spaces -- Convert all tabs to spaces
text = detab(text) text = detab(text)
-- Strip lines with only spaces and tabs -- Strip lines with only spaces and tabs
while true do while true do
local subs local subs
text, subs = text:gsub("\n[ \t]+\n", "\n\n") text, subs = text:gsub("\n[ \t]+\n", "\n\n")
if subs == 0 then break end if subs == 0 then break end
end end
return "\n" .. text .. "\n" return "\n" .. text .. "\n"
end end
-- Strips link definitions from the text and stores the data in a lookup table. -- Strips link definitions from the text and stores the data in a lookup table.
function strip_link_definitions(text) local function strip_link_definitions(text)
local linkdb = {} local linkdb = {}
local function link_def(id, url, title) local function link_def(id, url, title)
id = id:match("%[(.+)%]"):lower() id = id:match("%[(.+)%]"):lower()
linkdb[id] = linkdb[id] or {} linkdb[id] = linkdb[id] or {}
@ -1101,7 +1107,7 @@ function strip_link_definitions(text)
local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*"
local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*"
local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*"
text = text:gsub(def_title1, link_def) text = text:gsub(def_title1, link_def)
text = text:gsub(def_title2, link_def) text = text:gsub(def_title2, link_def)
text = text:gsub(def_title3, link_def) text = text:gsub(def_title3, link_def)
@ -1112,10 +1118,10 @@ end
link_database = {} link_database = {}
-- Main markdown processing function -- Main markdown processing function
function markdown(text) local function markdown(text)
init_hash(text) init_hash(text)
init_escape_table() init_escape_table()
text = cleanup(text) text = cleanup(text)
text = protect(text) text = protect(text)
text, link_database = strip_link_definitions(text) text, link_database = strip_link_definitions(text)
@ -1132,7 +1138,7 @@ setfenv(1, _G)
M.lock(M) M.lock(M)
-- Expose markdown function to the world -- Expose markdown function to the world
markdown = M.markdown _G.markdown = M.markdown
-- Class for parsing command-line options -- Class for parsing command-line options
local OptionParser = {} local OptionParser = {}
@ -1168,6 +1174,7 @@ end
-- where successfully parsed and false otherwise. -- where successfully parsed and false otherwise.
function OptionParser:run(args) function OptionParser:run(args)
local pos = 1 local pos = 1
local param
while pos <= #args do while pos <= #args do
local arg = args[pos] local arg = args[pos]
if arg == "--" then if arg == "--" then
@ -1225,7 +1232,7 @@ local function run_command_line(arg)
if not options.wrap_header then return s end if not options.wrap_header then return s end
local header = "" local header = ""
if options.header then if options.header then
local f = io.open(options.header) or error("Could not open file: " .. options.header) local f = io.open(options.header) or error("Could not open file: " .. options.header)
header = f:read("*a") header = f:read("*a")
f:close() f:close()
else else
@ -1239,14 +1246,14 @@ local function run_command_line(arg)
</head> </head>
<body> <body>
]] ]]
local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or local title = options.title or s:match("<h1>(.-)</h1>") or s:match("<h2>(.-)</h2>") or
s:match("<h3>(.-)</h3>") or "Untitled" s:match("<h3>(.-)</h3>") or "Untitled"
header = header:gsub("TITLE", title) header = header:gsub("TITLE", title)
if options.inline_style then if options.inline_style then
local style = "" local style = ""
local f = io.open(options.stylesheet) local f = io.open(options.stylesheet)
if f then if f then
style = f:read("*a") f:close() style = f:read("*a") f:close()
else else
error("Could not include style sheet " .. options.stylesheet .. ": File not found") error("Could not include style sheet " .. options.stylesheet .. ": File not found")
end end
@ -1265,15 +1272,15 @@ local function run_command_line(arg)
end end
return header .. s .. footer return header .. s .. footer
end end
-- Generate output path name from input path name given options. -- Generate output path name from input path name given options.
local function outpath(path, options) local function outpath(path, options)
if options.append then return path .. ".html" end if options.append then return path .. ".html" end
local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end
m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end
return path .. ".html" return path .. ".html"
end end
-- Default commandline options -- Default commandline options
local options = { local options = {
wrap_header = true, wrap_header = true,
@ -1316,18 +1323,18 @@ Other options:
op:param("s", "style", function(x) options.stylesheet = x end) op:param("s", "style", function(x) options.stylesheet = x end)
op:flag("l", "inline-style", function(x) options.inline_style = true end) op:flag("l", "inline-style", function(x) options.inline_style = true end)
op:flag("a", "append", function() options.append = true end) op:flag("a", "append", function() options.append = true end)
op:flag("t", "test", function() op:flag("t", "test", function()
local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua")
local f = io.open(n) local f = io.open(n)
if f then if f then
f:close() dofile(n) f:close() dofile(n)
else else
error("Cannot find markdown-tests.lua") error("Cannot find markdown-tests.lua")
end end
run_stdin = false run_stdin = false
end) end)
op:flag("h", "help", function() print(help) run_stdin = false end) op:flag("h", "help", function() print(help) run_stdin = false end)
op:arg(function(path) op:arg(function(path)
local file = io.open(path) or error("Could not open file: " .. path) local file = io.open(path) or error("Could not open file: " .. path)
local s = file:read("*a") local s = file:read("*a")
file:close() file:close()
@ -1338,7 +1345,7 @@ Other options:
run_stdin = false run_stdin = false
end end
) )
if not op:run(arg) then if not op:run(arg) then
print(help) print(help)
run_stdin = false run_stdin = false
@ -1350,10 +1357,10 @@ Other options:
io.write(s) io.write(s)
end end
end end
-- If we are being run from the command-line, act accordingly -- If we are being run from the command-line, act accordingly
if arg and arg[0]:find("markdown%.lua$") then if arg and arg[0]:find("markdown%.lua$") then
run_command_line(arg) run_command_line(arg)
else else
return markdown return markdown
end end