built-in markdown crack down on locals; still very slow

This commit is contained in:
steve donovan 2013-04-17 12:08:11 +02:00
parent 68cd576bf4
commit 167a4595a5
1 changed files with 126 additions and 119 deletions

View File

@ -130,31 +130,33 @@ setfenv(1, M)
-- Locks table t from changes, writes an error if someone attempts to change the table. -- Locks table t from changes, writes an error if someone attempts to change the table.
-- This is useful for detecting variables that have "accidently" been made global. Something -- This is useful for detecting variables that have "accidently" been made global. Something
-- I tend to do all too much. -- I tend to do all too much.
function lock(t) function M.lock(t)
function lock_new_index(t, k, v) local function lock_new_index(t, k, v)
error("module has been locked -- " .. k .. " must be declared local", 2) error("module has been locked -- " .. k .. " must be declared local", 2)
end end
local mt = {__newindex = lock_new_index} local mt = {__newindex = lock_new_index}
if getmetatable(t) then mt.__index = getmetatable(t).__index end if getmetatable(t) then
mt.__index = getmetatable(t).__index
end
setmetatable(t, mt) setmetatable(t, mt)
end end
-- Returns the result of mapping the values in table t through the function f -- Returns the result of mapping the values in table t through the function f
function map(t, f) local function map(t, f)
local out = {} local out = {}
for k,v in pairs(t) do out[k] = f(v,k) end for k,v in pairs(t) do out[k] = f(v,k) end
return out return out
end end
-- The identity function, useful as a placeholder. -- The identity function, useful as a placeholder.
function identity(text) return text end local function identity(text) return text end
-- Functional style if statement. (NOTE: no short circuit evaluation) -- Functional style if statement. (NOTE: no short circuit evaluation)
function iff(t, a, b) if t then return a else return b end end local function iff(t, a, b) if t then return a else return b end end
-- Splits the text into an array of separate lines. -- Splits the text into an array of separate lines.
function split(text, sep) local function split(text, sep)
sep = sep or "\n" sep = sep or "\n"
local lines = {} local lines = {}
local pos = 1 local pos = 1
@ -168,7 +170,7 @@ function split(text, sep)
end end
-- Converts tabs to spaces -- Converts tabs to spaces
function detab(text) local function detab(text)
local tab_width = 4 local tab_width = 4
local function rep(match) local function rep(match)
local spaces = -match:len() local spaces = -match:len()
@ -180,7 +182,7 @@ function detab(text)
end end
-- Applies string.find for every pattern in the list and returns the first match -- Applies string.find for every pattern in the list and returns the first match
function find_first(s, patterns, index) local function find_first(s, patterns, index)
local res = {} local res = {}
for _,p in ipairs(patterns) do for _,p in ipairs(patterns) do
local match = {s:find(p, index)} local match = {s:find(p, index)}
@ -192,7 +194,7 @@ end
-- If a replacement array is specified, the range [start, stop] in the array is replaced -- If a replacement array is specified, the range [start, stop] in the array is replaced
-- with the replacement array and the resulting array is returned. Without a replacement -- with the replacement array and the resulting array is returned. Without a replacement
-- array the section of the array between start and stop is returned. -- array the section of the array between start and stop is returned.
function splice(array, start, stop, replacement) local function splice(array, start, stop, replacement)
if replacement then if replacement then
local n = stop - start + 1 local n = stop - start + 1
while n > 0 do while n > 0 do
@ -213,7 +215,7 @@ function splice(array, start, stop, replacement)
end end
-- Outdents the text one step. -- Outdents the text one step.
function outdent(text) local function outdent(text)
text = "\n" .. text text = "\n" .. text
text = text:gsub("\n ? ? ?", "\n") text = text:gsub("\n ? ? ?", "\n")
text = text:sub(2) text = text:sub(2)
@ -221,7 +223,7 @@ function outdent(text)
end end
-- Indents the text one step. -- Indents the text one step.
function indent(text) local function indent(text)
text = text:gsub("\n", "\n ") text = text:gsub("\n", "\n ")
return text return text
end end
@ -229,7 +231,7 @@ end
-- Does a simple tokenization of html data. Returns the data as a list of tokens. -- Does a simple tokenization of html data. Returns the data as a list of tokens.
-- Each token is a table with a type field (which is either "tag" or "text") and -- Each token is a table with a type field (which is either "tag" or "text") and
-- a text field (which contains the original token data). -- a text field (which contains the original token data).
function tokenize_html(html) local function tokenize_html(html)
local tokens = {} local tokens = {}
local pos = 1 local pos = 1
while true do while true do
@ -287,7 +289,7 @@ local HASH = {
-- Inits hashing. Creates a hash_identifier that doesn't occur anywhere -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
-- in the text. -- in the text.
function init_hash(text) local function init_hash(text)
HASH.inited = true HASH.inited = true
HASH.identifier = "" HASH.identifier = ""
HASH.counter = 0 HASH.counter = 0
@ -305,7 +307,7 @@ function init_hash(text)
end end
-- Returns the hashed value for s. -- Returns the hashed value for s.
function hash(s) local function hash(s)
assert(HASH.inited) assert(HASH.inited)
if not HASH.table[s] then if not HASH.table[s] then
HASH.counter = HASH.counter + 1 HASH.counter = HASH.counter + 1
@ -342,18 +344,18 @@ local PD = {
-- Nested data. -- Nested data.
-- </div> -- </div>
-- </div> -- </div>
function block_pattern(tag) local function block_pattern(tag)
return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n" return "\n<" .. tag .. ".-\n</" .. tag .. ">[ \t]*\n"
end end
-- Pattern for matching a block tag that begins and ends with a newline -- Pattern for matching a block tag that begins and ends with a newline
function line_pattern(tag) local function line_pattern(tag)
return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n" return "\n<" .. tag .. ".-</" .. tag .. ">[ \t]*\n"
end end
-- Protects the range of characters from start to stop in the text and -- Protects the range of characters from start to stop in the text and
-- returns the protected string. -- returns the protected string.
function protect_range(text, start, stop) local function protect_range(text, start, stop)
local s = text:sub(start, stop) local s = text:sub(start, stop)
local h = hash(s) local h = hash(s)
PD.blocks[h] = s PD.blocks[h] = s
@ -363,7 +365,7 @@ end
-- Protect every part of the text that matches any of the patterns. The first -- Protect every part of the text that matches any of the patterns. The first
-- matching pattern is protected first, etc. -- matching pattern is protected first, etc.
function protect_matches(text, patterns) local function protect_matches(text, patterns)
while true do while true do
local start, stop = find_first(text, patterns) local start, stop = find_first(text, patterns)
if not start then break end if not start then break end
@ -373,7 +375,7 @@ function protect_matches(text, patterns)
end end
-- Protects blocklevel tags in the specified text -- Protects blocklevel tags in the specified text
function protect(text) local function protect(text)
-- First protect potentially nested block tags -- First protect potentially nested block tags
text = protect_matches(text, map(PD.tags, block_pattern)) text = protect_matches(text, map(PD.tags, block_pattern))
-- Then protect block tags at the line level. -- Then protect block tags at the line level.
@ -385,12 +387,12 @@ function protect(text)
end end
-- Returns true if the string s is a hash resulting from protection -- Returns true if the string s is a hash resulting from protection
function is_protected(s) local function is_protected(s)
return PD.blocks[s] return PD.blocks[s]
end end
-- Unprotects the specified text by expanding all the nonces -- Unprotects the specified text by expanding all the nonces
function unprotect(text) local function unprotect(text)
for k,v in pairs(PD.blocks) do for k,v in pairs(PD.blocks) do
v = v:gsub("%%", "%%%%") v = v:gsub("%%", "%%%%")
text = text:gsub(k, v) text = text:gsub(k, v)
@ -410,14 +412,14 @@ end
-- Returns true if the line is a ruler of (char) characters. -- Returns true if the line is a ruler of (char) characters.
-- The line must contain at least three char characters and contain only spaces and -- The line must contain at least three char characters and contain only spaces and
-- char characters. -- char characters.
function is_ruler_of(line, char) local function is_ruler_of(line, char)
if not line:match("^[ %" .. char .. "]*$") then return false end if not line:match("^[ %" .. char .. "]*$") then return false end
if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end
return true return true
end end
-- Identifies the block level formatting present in the line -- Identifies the block level formatting present in the line
function classify(line) local function classify(line)
local info = {line = line, text = line} local info = {line = line, text = line}
if line:match("^ ") then if line:match("^ ") then
@ -483,7 +485,7 @@ end
-- Find headers constisting of a normal line followed by a ruler and converts them to -- Find headers constisting of a normal line followed by a ruler and converts them to
-- header entries. -- header entries.
function headers(array) local function headers(array)
local i = 1 local i = 1
while i <= #array - 1 do while i <= #array - 1 do
if array[i].type == "normal" and array[i+1].type == "ruler" and if array[i].type == "normal" and array[i+1].type == "ruler" and
@ -500,8 +502,10 @@ function headers(array)
return array return array
end end
local block_transform, blocks_to_html, encode_code, span_transform, encode_backslash_escapes
-- Find list blocks and convert them to protected data blocks -- Find list blocks and convert them to protected data blocks
function lists(array, sublist) local function lists(array, sublist)
local function process_list(arr) local function process_list(arr)
local function any_blanks(arr) local function any_blanks(arr)
for i = 1, #arr do for i = 1, #arr do
@ -624,7 +628,7 @@ function lists(array, sublist)
end end
-- Find and convert blockquote markers. -- Find and convert blockquote markers.
function blockquotes(lines) local function blockquotes(lines)
local function find_blockquote(lines) local function find_blockquote(lines)
local start local start
for i,line in ipairs(lines) do for i,line in ipairs(lines) do
@ -674,7 +678,7 @@ function blockquotes(lines)
end end
-- Find and convert codeblocks. -- Find and convert codeblocks.
function codeblocks(lines) local function codeblocks(lines)
local function find_codeblock(lines) local function find_codeblock(lines)
local start local start
for i,line in ipairs(lines) do for i,line in ipairs(lines) do
@ -764,7 +768,7 @@ end
-- Debug function for printing a line array to see the result -- Debug function for printing a line array to see the result
-- of partial transforms. -- of partial transforms.
function print_lines(lines) local function print_lines(lines)
for i, line in ipairs(lines) do for i, line in ipairs(lines) do
print(i, line.type, line.text or line.line) print(i, line.type, line.text or line.line)
end end
@ -778,10 +782,10 @@ end
-- These characters may need to be escaped because they have a special -- These characters may need to be escaped because they have a special
-- meaning in markdown. -- meaning in markdown.
escape_chars = "'\\`*_{}[]()>#+-.!'" local escape_chars = "'\\`*_{}[]()>#+-.!'"
escape_table = {} local escape_table = {}
function init_escape_table() local function init_escape_table()
escape_table = {} escape_table = {}
for i = 1,#escape_chars do for i = 1,#escape_chars do
local c = escape_chars:sub(i,i) local c = escape_chars:sub(i,i)
@ -790,7 +794,7 @@ function init_escape_table()
end end
-- Adds a new escape to the escape table. -- Adds a new escape to the escape table.
function add_escape(text) local function add_escape(text)
if not escape_table[text] then if not escape_table[text] then
escape_table[text] = hash(text) escape_table[text] = hash(text)
end end
@ -798,7 +802,7 @@ function add_escape(text)
end end
-- Escape characters that should not be disturbed by markdown. -- Escape characters that should not be disturbed by markdown.
function escape_special_chars(text) local function escape_special_chars(text)
local tokens = tokenize_html(text) local tokens = tokenize_html(text)
local out = "" local out = ""
@ -826,7 +830,7 @@ function encode_backslash_escapes(t)
end end
-- Unescape characters that have been encoded. -- Unescape characters that have been encoded.
function unescape_special_chars(t) local function unescape_special_chars(t)
local tin = t local tin = t
for k,v in pairs(escape_table) do for k,v in pairs(escape_table) do
k = k:gsub("%%", "%%%%") k = k:gsub("%%", "%%%%")
@ -850,7 +854,7 @@ function encode_code(s)
end end
-- Handle backtick blocks. -- Handle backtick blocks.
function code_spans(s) local function code_spans(s)
s = s:gsub("\\\\", escape_table["\\"]) s = s:gsub("\\\\", escape_table["\\"])
s = s:gsub("\\`", escape_table["`"]) s = s:gsub("\\`", escape_table["`"])
@ -880,7 +884,7 @@ function code_spans(s)
end end
-- Encode alt text... enodes &, and ". -- Encode alt text... enodes &, and ".
function encode_alt(s) local function encode_alt(s)
if not s then return s end if not s then return s end
s = s:gsub('&', '&amp;') s = s:gsub('&', '&amp;')
s = s:gsub('"', '&quot;') s = s:gsub('"', '&quot;')
@ -888,8 +892,10 @@ function encode_alt(s)
return s return s
end end
local link_database
-- Handle image references -- Handle image references
function images(text) local function images(text)
local function reference_link(alt, id) local function reference_link(alt, id)
alt = encode_alt(alt:match("%b[]"):sub(2,-2)) alt = encode_alt(alt:match("%b[]"):sub(2,-2))
id = id:match("%[(.*)%]"):lower() id = id:match("%[(.*)%]"):lower()
@ -922,7 +928,7 @@ function images(text)
end end
-- Handle anchor references -- Handle anchor references
function anchors(text) local function anchors(text)
local function reference_link(text, id) local function reference_link(text, id)
text = text:match("%b[]"):sub(2,-2) text = text:match("%b[]"):sub(2,-2)
id = id:match("%b[]"):sub(2,-2):lower() id = id:match("%b[]"):sub(2,-2):lower()
@ -955,7 +961,7 @@ function anchors(text)
end end
-- Handle auto links, i.e. <http://www.google.com/>. -- Handle auto links, i.e. <http://www.google.com/>.
function auto_links(text) local function auto_links(text)
local function link(s) local function link(s)
return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>" return add_escape("<a href=\"" .. s .. "\">") .. s .. "</a>"
end end
@ -1004,7 +1010,7 @@ end
-- Encode free standing amps (&) and angles (<)... note that this does not -- Encode free standing amps (&) and angles (<)... note that this does not
-- encode free >. -- encode free >.
function amps_and_angles(s) local function amps_and_angles(s)
-- encode amps not part of &..; expression -- encode amps not part of &..; expression
local pos = 1 local pos = 1
while true do while true do
@ -1029,7 +1035,7 @@ function amps_and_angles(s)
end end
-- Handles emphasis markers (* and _) in the text. -- Handles emphasis markers (* and _) in the text.
function emphasis(text) local function emphasis(text)
for _, s in ipairs {"%*%*", "%_%_"} do for _, s in ipairs {"%*%*", "%_%_"} do
text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>") text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "<strong>%1</strong>")
text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>") text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "<strong>%1</strong>")
@ -1044,7 +1050,7 @@ function emphasis(text)
end end
-- Handles line break markers in the text. -- Handles line break markers in the text.
function line_breaks(text) local function line_breaks(text)
return text:gsub(" +\n", " <br/>\n") return text:gsub(" +\n", " <br/>\n")
end end
@ -1067,7 +1073,7 @@ end
-- Cleanup the text by normalizing some possible variations to make further -- Cleanup the text by normalizing some possible variations to make further
-- processing easier. -- processing easier.
function cleanup(text) local function cleanup(text)
-- Standardize line endings -- Standardize line endings
text = text:gsub("\r\n", "\n") -- DOS to UNIX text = text:gsub("\r\n", "\n") -- DOS to UNIX
text = text:gsub("\r", "\n") -- Mac to UNIX text = text:gsub("\r", "\n") -- Mac to UNIX
@ -1086,7 +1092,7 @@ function cleanup(text)
end end
-- Strips link definitions from the text and stores the data in a lookup table. -- Strips link definitions from the text and stores the data in a lookup table.
function strip_link_definitions(text) local function strip_link_definitions(text)
local linkdb = {} local linkdb = {}
local function link_def(id, url, title) local function link_def(id, url, title)
@ -1112,7 +1118,7 @@ end
link_database = {} link_database = {}
-- Main markdown processing function -- Main markdown processing function
function markdown(text) local function markdown(text)
init_hash(text) init_hash(text)
init_escape_table() init_escape_table()
@ -1132,7 +1138,7 @@ setfenv(1, _G)
M.lock(M) M.lock(M)
-- Expose markdown function to the world -- Expose markdown function to the world
markdown = M.markdown _G.markdown = M.markdown
-- Class for parsing command-line options -- Class for parsing command-line options
local OptionParser = {} local OptionParser = {}
@ -1168,6 +1174,7 @@ end
-- where successfully parsed and false otherwise. -- where successfully parsed and false otherwise.
function OptionParser:run(args) function OptionParser:run(args)
local pos = 1 local pos = 1
local param
while pos <= #args do while pos <= #args do
local arg = args[pos] local arg = args[pos]
if arg == "--" then if arg == "--" then