From 167a4595a5ca6d4e8560d68a38cc28a1b7faa873 Mon Sep 17 00:00:00 2001 From: steve donovan Date: Wed, 17 Apr 2013 12:08:11 +0200 Subject: [PATCH] built-in markdown crack down on locals; still very slow --- ldoc/markdown.lua | 245 ++++++++++++++++++++++++---------------------- 1 file changed, 126 insertions(+), 119 deletions(-) diff --git a/ldoc/markdown.lua b/ldoc/markdown.lua index 5181dd8..bfe3a1b 100644 --- a/ldoc/markdown.lua +++ b/ldoc/markdown.lua @@ -5,7 +5,7 @@ -**Author:** Niklas Frykholm, +**Author:** Niklas Frykholm, **Date:** 31 May 2008 This is an implementation of the popular text markup language Markdown in pure Lua. @@ -47,10 +47,10 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -to whom the Software is furnished to do so, subject to the following conditions: +to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies -or substantial portions of the Software. +or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -130,31 +130,33 @@ setfenv(1, M) -- Locks table t from changes, writes an error if someone attempts to change the table. -- This is useful for detecting variables that have "accidently" been made global. Something -- I tend to do all too much. -function lock(t) - function lock_new_index(t, k, v) +function M.lock(t) + local function lock_new_index(t, k, v) error("module has been locked -- " .. k .. " must be declared local", 2) end local mt = {__newindex = lock_new_index} - if getmetatable(t) then mt.__index = getmetatable(t).__index end + if getmetatable(t) then + mt.__index = getmetatable(t).__index + end setmetatable(t, mt) end -- Returns the result of mapping the values in table t through the function f -function map(t, f) +local function map(t, f) local out = {} for k,v in pairs(t) do out[k] = f(v,k) end return out end -- The identity function, useful as a placeholder. -function identity(text) return text end +local function identity(text) return text end -- Functional style if statement. (NOTE: no short circuit evaluation) -function iff(t, a, b) if t then return a else return b end end +local function iff(t, a, b) if t then return a else return b end end -- Splits the text into an array of separate lines. -function split(text, sep) +local function split(text, sep) sep = sep or "\n" local lines = {} local pos = 1 @@ -168,7 +170,7 @@ function split(text, sep) end -- Converts tabs to spaces -function detab(text) +local function detab(text) local tab_width = 4 local function rep(match) local spaces = -match:len() @@ -180,7 +182,7 @@ function detab(text) end -- Applies string.find for every pattern in the list and returns the first match -function find_first(s, patterns, index) +local function find_first(s, patterns, index) local res = {} for _,p in ipairs(patterns) do local match = {s:find(p, index)} @@ -192,7 +194,7 @@ end -- If a replacement array is specified, the range [start, stop] in the array is replaced -- with the replacement array and the resulting array is returned. Without a replacement -- array the section of the array between start and stop is returned. -function splice(array, start, stop, replacement) +local function splice(array, start, stop, replacement) if replacement then local n = stop - start + 1 while n > 0 do @@ -213,7 +215,7 @@ function splice(array, start, stop, replacement) end -- Outdents the text one step. -function outdent(text) +local function outdent(text) text = "\n" .. text text = text:gsub("\n ? ? ?", "\n") text = text:sub(2) @@ -221,15 +223,15 @@ function outdent(text) end -- Indents the text one step. -function indent(text) +local function indent(text) text = text:gsub("\n", "\n ") return text end --- Does a simple tokenization of html data. Returns the data as a list of tokens. +-- Does a simple tokenization of html data. Returns the data as a list of tokens. -- Each token is a table with a type field (which is either "tag" or "text") and -- a text field (which contains the original token data). -function tokenize_html(html) +local function tokenize_html(html) local tokens = {} local pos = 1 while true do @@ -239,7 +241,7 @@ function tokenize_html(html) break end if start ~= pos then table.insert(tokens, {type="text", text = html:sub(pos, start-1)}) end - + local _, stop if html:match("^", start) @@ -249,7 +251,7 @@ function tokenize_html(html) _,stop = html:find("%b<>", start) end if not stop then - -- error("Could not match html tag " .. html:sub(start,start+30)) + -- error("Could not match html tag " .. html:sub(start,start+30)) table.insert(tokens, {type="text", text=html:sub(start, start)}) pos = start + 1 else @@ -272,27 +274,27 @@ end local HASH = { -- Has the hash been inited. inited = false, - + -- The unique string prepended to all hash values. This is to ensure -- that hash values do not accidently coincide with an actual existing -- string in the document. identifier = "", - + -- Counter that counts up for each new hash instance. counter = 0, - + -- Hash table. table = {} } -- Inits hashing. Creates a hash_identifier that doesn't occur anywhere -- in the text. -function init_hash(text) +local function init_hash(text) HASH.inited = true HASH.identifier = "" HASH.counter = 0 HASH.table = {} - + local s = "HASH" local counter = 0 local id @@ -305,7 +307,7 @@ function init_hash(text) end -- Returns the hashed value for s. -function hash(s) +local function hash(s) assert(HASH.inited) if not HASH.table[s] then HASH.counter = HASH.counter + 1 @@ -320,7 +322,7 @@ end ---------------------------------------------------------------------- -- The protection module is used to "protect" parts of a document --- so that they are not modified by subsequent processing steps. +-- so that they are not modified by subsequent processing steps. -- Protected parts are saved in a table for later unprotection -- Protection data @@ -342,18 +344,18 @@ local PD = { -- Nested data. -- -- -function block_pattern(tag) +local function block_pattern(tag) return "\n<" .. tag .. ".-\n[ \t]*\n" end -- Pattern for matching a block tag that begins and ends with a newline -function line_pattern(tag) +local function line_pattern(tag) return "\n<" .. tag .. ".-[ \t]*\n" end -- Protects the range of characters from start to stop in the text and -- returns the protected string. -function protect_range(text, start, stop) +local function protect_range(text, start, stop) local s = text:sub(start, stop) local h = hash(s) PD.blocks[h] = s @@ -363,7 +365,7 @@ end -- Protect every part of the text that matches any of the patterns. The first -- matching pattern is protected first, etc. -function protect_matches(text, patterns) +local function protect_matches(text, patterns) while true do local start, stop = find_first(text, patterns) if not start then break end @@ -373,7 +375,7 @@ function protect_matches(text, patterns) end -- Protects blocklevel tags in the specified text -function protect(text) +local function protect(text) -- First protect potentially nested block tags text = protect_matches(text, map(PD.tags, block_pattern)) -- Then protect block tags at the line level. @@ -385,12 +387,12 @@ function protect(text) end -- Returns true if the string s is a hash resulting from protection -function is_protected(s) +local function is_protected(s) return PD.blocks[s] end -- Unprotects the specified text by expanding all the nonces -function unprotect(text) +local function unprotect(text) for k,v in pairs(PD.blocks) do v = v:gsub("%%", "%%%%") text = text:gsub(k, v) @@ -410,22 +412,22 @@ end -- Returns true if the line is a ruler of (char) characters. -- The line must contain at least three char characters and contain only spaces and -- char characters. -function is_ruler_of(line, char) +local function is_ruler_of(line, char) if not line:match("^[ %" .. char .. "]*$") then return false end if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end return true end -- Identifies the block level formatting present in the line -function classify(line) +local function classify(line) local info = {line = line, text = line} - + if line:match("^ ") then info.type = "indented" info.outdented = line:sub(5) return info end - + for _,c in ipairs({'*', '-', '_', '='}) do if is_ruler_of(line, c) then info.type = "ruler" @@ -433,12 +435,12 @@ function classify(line) return info end end - + if line == "" then info.type = "blank" return info end - + if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") info.type = "header" @@ -446,7 +448,7 @@ function classify(line) info.text = m2 return info end - + if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") info.type = "list_item" @@ -455,7 +457,7 @@ function classify(line) info.text = text return info end - + if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") info.type = "list_item" @@ -464,29 +466,29 @@ function classify(line) info.text= text return info end - + if line:match("^>[ \t]?(.*)") then info.type = "blockquote" info.text = line:match("^>[ \t]?(.*)") return info end - + if is_protected(line) then info.type = "raw" info.html = unprotect(line) return info end - + info.type = "normal" return info end -- Find headers constisting of a normal line followed by a ruler and converts them to -- header entries. -function headers(array) +local function headers(array) local i = 1 while i <= #array - 1 do - if array[i].type == "normal" and array[i+1].type == "ruler" and + if array[i].type == "normal" and array[i+1].type == "ruler" and (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then local info = {line = array[i].line} info.text = info.line @@ -500,8 +502,10 @@ function headers(array) return array end +local block_transform, blocks_to_html, encode_code, span_transform, encode_backslash_escapes + -- Find list blocks and convert them to protected data blocks -function lists(array, sublist) +local function lists(array, sublist) local function process_list(arr) local function any_blanks(arr) for i = 1, #arr do @@ -509,7 +513,7 @@ function lists(array, sublist) end return false end - + local function split_list_items(arr) local acc = {arr[1]} local res = {} @@ -524,12 +528,12 @@ function lists(array, sublist) table.insert(res, acc) return res end - + local function process_list_item(lines, block) while lines[#lines].type == "blank" do table.remove(lines) end - + local itemtext = lines[1].text for i=2,#lines do itemtext = itemtext .. "\n" .. outdent(lines[i].line) @@ -548,7 +552,7 @@ function lists(array, sublist) return "
  • " .. itemtext .. "
  • " end end - + local block_list = any_blanks(arr) local items = split_list_items(arr) local out = "" @@ -561,7 +565,7 @@ function lists(array, sublist) return "
      \n" .. out .. "
    " end end - + -- Finds the range of lines composing the first list in the array. A list -- starts with (^ list_item) or (blank list_item) and ends with -- (blank* $) or (blank normal). @@ -586,7 +590,7 @@ function lists(array, sublist) local function find_list_end(array, start) local pos = #array for i = start, #array-1 do - if array[i].type == "blank" and array[i+1].type ~= "list_item" + if array[i].type == "blank" and array[i+1].type ~= "list_item" and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then pos = i-1 break @@ -597,12 +601,12 @@ function lists(array, sublist) end return pos end - + local start = find_list_start(array, sublist) if not start then return nil end return start, find_list_end(array, start) end - + while true do local start, stop = find_list(array, sublist) if not start then break end @@ -614,17 +618,17 @@ function lists(array, sublist) } array = splice(array, start, stop, {info}) end - + -- Convert any remaining list items to normal for _,line in ipairs(array) do if line.type == "list_item" then line.type = "normal" end end - + return array end -- Find and convert blockquote markers. -function blockquotes(lines) +local function blockquotes(lines) local function find_blockquote(lines) local start for i,line in ipairs(lines) do @@ -634,7 +638,7 @@ function blockquotes(lines) end end if not start then return nil end - + local stop = #lines for i = start+1, #lines do if lines[i].type == "blank" or lines[i].type == "blockquote" then @@ -647,7 +651,7 @@ function blockquotes(lines) while lines[stop].type == "blank" do stop = stop - 1 end return start, stop end - + local function process_blockquote(lines) local raw = lines[1].text for i = 2,#lines do @@ -658,7 +662,7 @@ function blockquotes(lines) return "
    \n " .. bt .. "\n
    " end - + while true do local start, stop = find_blockquote(lines) if not start then break end @@ -674,14 +678,14 @@ function blockquotes(lines) end -- Find and convert codeblocks. -function codeblocks(lines) +local function codeblocks(lines) local function find_codeblock(lines) local start for i,line in ipairs(lines) do if line.type == "indented" then start = i break end end if not start then return nil end - + local stop = #lines for i = start+1, #lines do if lines[i].type ~= "indented" and lines[i].type ~= "blank" then @@ -692,7 +696,7 @@ function codeblocks(lines) while lines[stop].type == "blank" do stop = stop - 1 end return start, stop end - + local function process_codeblock(lines) local raw = detab(encode_code(outdent(lines[1].line))) for i = 2,#lines do @@ -700,7 +704,7 @@ function codeblocks(lines) end return "
    " .. raw .. "\n
    " end - + while true do local start, stop = find_codeblock(lines) if not start then break end @@ -727,12 +731,12 @@ function blocks_to_html(lines, no_paragraphs) table.insert(out, line.html) elseif line.type == "normal" then local s = line.line - + while i+1 <= #lines and lines[i+1].type == "normal" do i = i + 1 s = s .. "\n" .. lines[i].line end - + if no_paragraphs then table.insert(out, span_transform(s)) else @@ -764,7 +768,7 @@ end -- Debug function for printing a line array to see the result -- of partial transforms. -function print_lines(lines) +local function print_lines(lines) for i, line in ipairs(lines) do print(i, line.type, line.text or line.line) end @@ -778,10 +782,10 @@ end -- These characters may need to be escaped because they have a special -- meaning in markdown. -escape_chars = "'\\`*_{}[]()>#+-.!'" -escape_table = {} +local escape_chars = "'\\`*_{}[]()>#+-.!'" +local escape_table = {} -function init_escape_table() +local function init_escape_table() escape_table = {} for i = 1,#escape_chars do local c = escape_chars:sub(i,i) @@ -790,17 +794,17 @@ function init_escape_table() end -- Adds a new escape to the escape table. -function add_escape(text) +local function add_escape(text) if not escape_table[text] then escape_table[text] = hash(text) end return escape_table[text] -end +end -- Escape characters that should not be disturbed by markdown. -function escape_special_chars(text) +local function escape_special_chars(text) local tokens = tokenize_html(text) - + local out = "" for _, token in ipairs(tokens) do local t = token.text @@ -826,7 +830,7 @@ function encode_backslash_escapes(t) end -- Unescape characters that have been encoded. -function unescape_special_chars(t) +local function unescape_special_chars(t) local tin = t for k,v in pairs(escape_table) do k = k:gsub("%%", "%%%%") @@ -850,7 +854,7 @@ function encode_code(s) end -- Handle backtick blocks. -function code_spans(s) +local function code_spans(s) s = s:gsub("\\\\", escape_table["\\"]) s = s:gsub("\\`", escape_table["`"]) @@ -880,7 +884,7 @@ function code_spans(s) end -- Encode alt text... enodes &, and ". -function encode_alt(s) +local function encode_alt(s) if not s then return s end s = s:gsub('&', '&') s = s:gsub('"', '"') @@ -888,8 +892,10 @@ function encode_alt(s) return s end +local link_database + -- Handle image references -function images(text) +local function images(text) local function reference_link(alt, id) alt = encode_alt(alt:match("%b[]"):sub(2,-2)) id = id:match("%[(.*)%]"):lower() @@ -902,7 +908,7 @@ function images(text) if title then title = " title=\"" .. title .. "\"" else title = "" end return add_escape ('' .. alt .. '") end - + local function inline_link(alt, link) alt = encode_alt(alt:match("%b[]"):sub(2,-2)) local url, title = link:match("%(?[ \t]*['\"](.+)['\"]") @@ -915,14 +921,14 @@ function images(text) return add_escape('' .. alt .. '') end end - + text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) text = text:gsub("!(%b[])(%b())", inline_link) return text end -- Handle anchor references -function anchors(text) +local function anchors(text) local function reference_link(text, id) text = text:match("%b[]"):sub(2,-2) id = id:match("%b[]"):sub(2,-2):lower() @@ -935,7 +941,7 @@ function anchors(text) if title then title = " title=\"" .. title .. "\"" else title = "" end return add_escape("") .. text .. add_escape("") end - + local function inline_link(text, link) text = text:match("%b[]"):sub(2,-2) local url, title = link:match("%(?[ \t]*['\"](.+)['\"]") @@ -948,14 +954,14 @@ function anchors(text) return add_escape("") .. text .. add_escape("") end end - + text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) text = text:gsub("(%b[])(%b())", inline_link) return text end -- Handle auto links, i.e. . -function auto_links(text) +local function auto_links(text) local function link(s) return add_escape("") .. s .. "" end @@ -969,21 +975,21 @@ function auto_links(text) local plain = {code = function(c) return c end, count = 0, rate = 0.1} local codes = {hex, dec, plain} local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end - + local out = "" for i = 1,s:len() do for _,code in ipairs(codes) do code.count = code.count + code.rate end if codes[1].count < codes[2].count then swap(codes,1,2) end if codes[2].count < codes[3].count then swap(codes,2,3) end if codes[1].count < codes[2].count then swap(codes,1,2) end - + local code = codes[1] local c = s:sub(i,i) -- Force encoding of "@" to make email address more invisible. if c == "@" and code == plain then code = codes[2] end out = out .. code.code(c) code.count = code.count - 1 - end + end return out end local function mail(s) @@ -995,7 +1001,7 @@ function auto_links(text) -- links text = text:gsub("<(https?:[^'\">%s]+)>", link) text = text:gsub("<(ftp:[^'\">%s]+)>", link) - + -- mail text = text:gsub("%s]+)>", mail) text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) @@ -1004,7 +1010,7 @@ end -- Encode free standing amps (&) and angles (<)... note that this does not -- encode free >. -function amps_and_angles(s) +local function amps_and_angles(s) -- encode amps not part of &..; expression local pos = 1 while true do @@ -1019,17 +1025,17 @@ function amps_and_angles(s) pos = amp+1 end end - + -- encode naked <'s s = s:gsub("<([^a-zA-Z/?$!])", "<%1") s = s:gsub("<$", "<") - + -- what about >, nothing done in the original markdown source to handle them return s end -- Handles emphasis markers (* and _) in the text. -function emphasis(text) +local function emphasis(text) for _, s in ipairs {"%*%*", "%_%_"} do text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "%1") text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "%1") @@ -1044,7 +1050,7 @@ function emphasis(text) end -- Handles line break markers in the text. -function line_breaks(text) +local function line_breaks(text) return text:gsub(" +\n", "
    \n") end @@ -1067,28 +1073,28 @@ end -- Cleanup the text by normalizing some possible variations to make further -- processing easier. -function cleanup(text) +local function cleanup(text) -- Standardize line endings text = text:gsub("\r\n", "\n") -- DOS to UNIX text = text:gsub("\r", "\n") -- Mac to UNIX - + -- Convert all tabs to spaces text = detab(text) - + -- Strip lines with only spaces and tabs while true do local subs text, subs = text:gsub("\n[ \t]+\n", "\n\n") if subs == 0 then break end end - + return "\n" .. text .. "\n" end -- Strips link definitions from the text and stores the data in a lookup table. -function strip_link_definitions(text) +local function strip_link_definitions(text) local linkdb = {} - + local function link_def(id, url, title) id = id:match("%[(.+)%]"):lower() linkdb[id] = linkdb[id] or {} @@ -1101,7 +1107,7 @@ function strip_link_definitions(text) local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" - + text = text:gsub(def_title1, link_def) text = text:gsub(def_title2, link_def) text = text:gsub(def_title3, link_def) @@ -1112,10 +1118,10 @@ end link_database = {} -- Main markdown processing function -function markdown(text) +local function markdown(text) init_hash(text) init_escape_table() - + text = cleanup(text) text = protect(text) text, link_database = strip_link_definitions(text) @@ -1132,7 +1138,7 @@ setfenv(1, _G) M.lock(M) -- Expose markdown function to the world -markdown = M.markdown +_G.markdown = M.markdown -- Class for parsing command-line options local OptionParser = {} @@ -1168,6 +1174,7 @@ end -- where successfully parsed and false otherwise. function OptionParser:run(args) local pos = 1 + local param while pos <= #args do local arg = args[pos] if arg == "--" then @@ -1225,7 +1232,7 @@ local function run_command_line(arg) if not options.wrap_header then return s end local header = "" if options.header then - local f = io.open(options.header) or error("Could not open file: " .. options.header) + local f = io.open(options.header) or error("Could not open file: " .. options.header) header = f:read("*a") f:close() else @@ -1239,14 +1246,14 @@ local function run_command_line(arg) ]] - local title = options.title or s:match("

    (.-)

    ") or s:match("

    (.-)

    ") or + local title = options.title or s:match("

    (.-)

    ") or s:match("

    (.-)

    ") or s:match("

    (.-)

    ") or "Untitled" header = header:gsub("TITLE", title) if options.inline_style then local style = "" local f = io.open(options.stylesheet) - if f then - style = f:read("*a") f:close() + if f then + style = f:read("*a") f:close() else error("Could not include style sheet " .. options.stylesheet .. ": File not found") end @@ -1265,15 +1272,15 @@ local function run_command_line(arg) end return header .. s .. footer end - - -- Generate output path name from input path name given options. + + -- Generate output path name from input path name given options. local function outpath(path, options) if options.append then return path .. ".html" end local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end return path .. ".html" end - + -- Default commandline options local options = { wrap_header = true, @@ -1316,18 +1323,18 @@ Other options: op:param("s", "style", function(x) options.stylesheet = x end) op:flag("l", "inline-style", function(x) options.inline_style = true end) op:flag("a", "append", function() options.append = true end) - op:flag("t", "test", function() + op:flag("t", "test", function() local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") local f = io.open(n) - if f then - f:close() dofile(n) + if f then + f:close() dofile(n) else error("Cannot find markdown-tests.lua") end - run_stdin = false + run_stdin = false end) op:flag("h", "help", function() print(help) run_stdin = false end) - op:arg(function(path) + op:arg(function(path) local file = io.open(path) or error("Could not open file: " .. path) local s = file:read("*a") file:close() @@ -1338,7 +1345,7 @@ Other options: run_stdin = false end ) - + if not op:run(arg) then print(help) run_stdin = false @@ -1350,10 +1357,10 @@ Other options: io.write(s) end end - + -- If we are being run from the command-line, act accordingly if arg and arg[0]:find("markdown%.lua$") then run_command_line(arg) else return markdown -end \ No newline at end of file +end