lexer rewritten to return an object and not a coroutine. Methods like getline() and lineno() now work much better; sorts out problems encountered with Lua block comments
This commit is contained in:
parent
779ce4bb51
commit
e265370400
138
ldoc/lexer.lua
138
ldoc/lexer.lua
|
@ -21,7 +21,6 @@
|
||||||
-- @class module
|
-- @class module
|
||||||
-- @name pl.lexer
|
-- @name pl.lexer
|
||||||
|
|
||||||
local yield,wrap = coroutine.yield,coroutine.wrap
|
|
||||||
local strfind = string.find
|
local strfind = string.find
|
||||||
local strsub = string.sub
|
local strsub = string.sub
|
||||||
local append = table.insert
|
local append = table.insert
|
||||||
|
@ -52,14 +51,14 @@ local PREPRO = '^#.-[^\\]\n'
|
||||||
local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword
|
local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword
|
||||||
|
|
||||||
local function tdump(tok)
|
local function tdump(tok)
|
||||||
return yield(tok,tok)
|
return tok,tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function ndump(tok,options)
|
local function ndump(tok,options)
|
||||||
if options and options.number then
|
if options and options.number then
|
||||||
tok = tonumber(tok)
|
tok = tonumber(tok)
|
||||||
end
|
end
|
||||||
return yield("number",tok)
|
return "number",tok
|
||||||
end
|
end
|
||||||
|
|
||||||
-- regular strings, single or double quotes; usually we want them
|
-- regular strings, single or double quotes; usually we want them
|
||||||
|
@ -68,7 +67,7 @@ local function sdump(tok,options)
|
||||||
if options and options.string then
|
if options and options.string then
|
||||||
tok = tok:sub(2,-2)
|
tok = tok:sub(2,-2)
|
||||||
end
|
end
|
||||||
return yield("string",tok)
|
return "string",tok
|
||||||
end
|
end
|
||||||
|
|
||||||
-- long Lua strings need extra work to get rid of the quotes
|
-- long Lua strings need extra work to get rid of the quotes
|
||||||
|
@ -76,45 +75,45 @@ local function sdump_l(tok,options)
|
||||||
if options and options.string then
|
if options and options.string then
|
||||||
tok = tok:sub(3,-3)
|
tok = tok:sub(3,-3)
|
||||||
end
|
end
|
||||||
return yield("string",tok)
|
return "string",tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function chdump(tok,options)
|
local function chdump(tok,options)
|
||||||
if options and options.string then
|
if options and options.string then
|
||||||
tok = tok:sub(2,-2)
|
tok = tok:sub(2,-2)
|
||||||
end
|
end
|
||||||
return yield("char",tok)
|
return "char",tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function cdump(tok)
|
local function cdump(tok)
|
||||||
return yield('comment',tok)
|
return 'comment',tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function wsdump (tok)
|
local function wsdump (tok)
|
||||||
return yield("space",tok)
|
return "space",tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function pdump (tok)
|
local function pdump (tok)
|
||||||
return yield('prepro',tok)
|
return 'prepro',tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function plain_vdump(tok)
|
local function plain_vdump(tok)
|
||||||
return yield("iden",tok)
|
return "iden",tok
|
||||||
end
|
end
|
||||||
|
|
||||||
local function lua_vdump(tok)
|
local function lua_vdump(tok)
|
||||||
if lua_keyword[tok] then
|
if lua_keyword[tok] then
|
||||||
return yield("keyword",tok)
|
return "keyword",tok
|
||||||
else
|
else
|
||||||
return yield("iden",tok)
|
return "iden",tok
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
local function cpp_vdump(tok)
|
local function cpp_vdump(tok)
|
||||||
if cpp_keyword[tok] then
|
if cpp_keyword[tok] then
|
||||||
return yield("keyword",tok)
|
return "keyword",tok
|
||||||
else
|
else
|
||||||
return yield("iden",tok)
|
return "iden",tok
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -151,7 +150,6 @@ function lexer.scan (s,matches,filter,options)
|
||||||
end
|
end
|
||||||
matches = plain_matches
|
matches = plain_matches
|
||||||
end
|
end
|
||||||
function lex ()
|
|
||||||
local i1,i2,idx,res1,res2,tok,pat,fun,capt
|
local i1,i2,idx,res1,res2,tok,pat,fun,capt
|
||||||
local line = 1
|
local line = 1
|
||||||
if file then
|
if file then
|
||||||
|
@ -162,95 +160,71 @@ function lexer.scan (s,matches,filter,options)
|
||||||
local sz = #s
|
local sz = #s
|
||||||
local idx = 1
|
local idx = 1
|
||||||
if sz == 0 then return nil end -- empty file
|
if sz == 0 then return nil end -- empty file
|
||||||
--print('sz',sz)
|
|
||||||
|
local res = {}
|
||||||
|
local mt = {}
|
||||||
|
mt.__index = mt
|
||||||
|
setmetatable(res,mt)
|
||||||
|
|
||||||
|
function mt.lineno() return line end
|
||||||
|
|
||||||
|
function mt.getline()
|
||||||
|
if idx < sz then
|
||||||
|
tok = strsub(s,idx,-2)
|
||||||
|
idx = sz + 1
|
||||||
|
line = line + 1
|
||||||
|
return tok
|
||||||
|
else
|
||||||
|
idx = sz + 1
|
||||||
|
line = line + 1
|
||||||
|
return file:read()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function mt.next (tok)
|
||||||
|
local t,v = tok()
|
||||||
|
while t == 'space' do
|
||||||
|
t,v = tok()
|
||||||
|
end
|
||||||
|
return t,v
|
||||||
|
end
|
||||||
|
|
||||||
|
function mt.__call ()
|
||||||
while true do
|
while true do
|
||||||
for _,m in ipairs(matches) do
|
for _,m in ipairs(matches) do
|
||||||
pat = m[1]
|
pat,fun = m[1],m[2]
|
||||||
fun = m[2]
|
if fun == nil then error("no match for "..pat) end
|
||||||
if fun == nil then print(pat); os.exit() end
|
|
||||||
i1,i2 = strfind(s,pat,idx)
|
i1,i2 = strfind(s,pat,idx)
|
||||||
if i1 then
|
if i1 then
|
||||||
tok = strsub(s,i1,i2)
|
tok = strsub(s,i1,i2)
|
||||||
idx = i2 + 1
|
idx = i2 + 1
|
||||||
if not (filter and filter[fun]) then
|
if not (filter and filter[fun]) then
|
||||||
lexer.finished = idx > sz
|
lexer.finished = idx > sz
|
||||||
res1,res2 = fun(tok,options)
|
return fun(tok,options)
|
||||||
end
|
end
|
||||||
if res1 then
|
|
||||||
local tp = type(res1)
|
|
||||||
-- insert a token list
|
|
||||||
if tp=='table' then
|
|
||||||
yield('','')
|
|
||||||
for _,t in ipairs(res1) do
|
|
||||||
yield(t[1],t[2])
|
|
||||||
end
|
|
||||||
elseif tp == 'string' then -- or search up to some special pattern
|
|
||||||
i1,i2 = strfind(s,res1,idx)
|
|
||||||
if i1 then
|
|
||||||
tok = strsub(s,i1,i2)
|
|
||||||
idx = i2 + 1
|
|
||||||
yield('',tok)
|
|
||||||
else
|
|
||||||
yield('','')
|
|
||||||
idx = sz + 1
|
|
||||||
end
|
|
||||||
--if idx > sz then return end
|
|
||||||
else
|
|
||||||
yield(line,idx)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if idx > sz then
|
if idx > sz then
|
||||||
if file then
|
if file then
|
||||||
--repeat -- next non-empty line
|
|
||||||
line = line + 1
|
line = line + 1
|
||||||
s = file:read()
|
s = file:read()
|
||||||
if not s then return end
|
if not s then return end
|
||||||
--until not s:match '^%s*$'
|
|
||||||
s = s .. '\n'
|
s = s .. '\n'
|
||||||
idx ,sz = 1,#s
|
idx ,sz = 1,#s
|
||||||
break
|
|
||||||
else
|
else
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
else break end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
return res
|
||||||
return wrap(lex)
|
|
||||||
end
|
|
||||||
|
|
||||||
local function isstring (s)
|
|
||||||
return type(s) == 'string'
|
|
||||||
end
|
|
||||||
|
|
||||||
--- insert tokens into a stream.
|
|
||||||
-- @param tok a token stream
|
|
||||||
-- @param a1 a string is the type, a table is a token list and
|
|
||||||
-- a function is assumed to be a token-like iterator (returns type & value)
|
|
||||||
-- @param a2 a string is the value
|
|
||||||
function lexer.insert (tok,a1,a2)
|
|
||||||
if not a1 then return end
|
|
||||||
local ts
|
|
||||||
if isstring(a1) and isstring(a2) then
|
|
||||||
ts = {{a1,a2}}
|
|
||||||
elseif type(a1) == 'function' then
|
|
||||||
ts = {}
|
|
||||||
for t,v in a1() do
|
|
||||||
append(ts,{t,v})
|
|
||||||
end
|
|
||||||
else
|
|
||||||
ts = a1
|
|
||||||
end
|
|
||||||
tok(ts)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
--- get everything in a stream upto a newline.
|
--- get everything in a stream upto a newline.
|
||||||
-- @param tok a token stream
|
-- @param tok a token stream
|
||||||
-- @return a string
|
-- @return a string
|
||||||
function lexer.getline (tok)
|
function lexer.getline (tok)
|
||||||
local t,v = tok('.-\n')
|
return tok:getline()
|
||||||
return v
|
|
||||||
end
|
end
|
||||||
|
|
||||||
--- get current line number. <br>
|
--- get current line number. <br>
|
||||||
|
@ -258,15 +232,7 @@ end
|
||||||
-- @param tok a token stream
|
-- @param tok a token stream
|
||||||
-- @return the line number and current column
|
-- @return the line number and current column
|
||||||
function lexer.lineno (tok)
|
function lexer.lineno (tok)
|
||||||
return tok(0)
|
return tok:lineno()
|
||||||
end
|
|
||||||
|
|
||||||
--- get the rest of the stream.
|
|
||||||
-- @param tok a token stream
|
|
||||||
-- @return a string
|
|
||||||
function lexer.getrest (tok)
|
|
||||||
local t,v = tok('.+')
|
|
||||||
return v
|
|
||||||
end
|
end
|
||||||
|
|
||||||
--- get the Lua keywords as a set-like table.
|
--- get the Lua keywords as a set-like table.
|
||||||
|
@ -445,11 +411,7 @@ end
|
||||||
--- get the next non-space token from the stream.
|
--- get the next non-space token from the stream.
|
||||||
-- @param tok the token stream.
|
-- @param tok the token stream.
|
||||||
function lexer.skipws (tok)
|
function lexer.skipws (tok)
|
||||||
local t,v = tok()
|
return tok:next()
|
||||||
while t == 'space' do
|
|
||||||
t,v = tok()
|
|
||||||
end
|
|
||||||
return t,v
|
|
||||||
end
|
end
|
||||||
|
|
||||||
local skipws = lexer.skipws
|
local skipws = lexer.skipws
|
||||||
|
|
|
@ -76,18 +76,13 @@ local function parse_file(fname,lang, package)
|
||||||
local tok,f = lang.lexer(fname)
|
local tok,f = lang.lexer(fname)
|
||||||
|
|
||||||
function lineno ()
|
function lineno ()
|
||||||
while true do
|
return tok:lineno()
|
||||||
local res = lexer.lineno(tok)
|
|
||||||
if type(res) == 'number' then return res end
|
|
||||||
if res == nil then return nil end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
function filename () return fname end
|
function filename () return fname end
|
||||||
|
|
||||||
function F:warning (msg,kind)
|
function F:warning (msg,kind)
|
||||||
kind = kind or 'warning'
|
kind = kind or 'warning'
|
||||||
lineno() -- why is this necessary?
|
|
||||||
lineno()
|
|
||||||
io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n')
|
io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -123,9 +118,7 @@ local function parse_file(fname,lang, package)
|
||||||
while t do
|
while t do
|
||||||
if t == 'comment' then
|
if t == 'comment' then
|
||||||
local comment = {}
|
local comment = {}
|
||||||
|
|
||||||
local ldoc_comment,block = lang:start_comment(v)
|
local ldoc_comment,block = lang:start_comment(v)
|
||||||
|
|
||||||
if ldoc_comment and v:match '%-+$' then
|
if ldoc_comment and v:match '%-+$' then
|
||||||
ldoc_comment = false
|
ldoc_comment = false
|
||||||
end
|
end
|
||||||
|
|
|
@ -324,30 +324,23 @@ function M.quote (s)
|
||||||
return "'"..s.."'"
|
return "'"..s.."'"
|
||||||
end
|
end
|
||||||
|
|
||||||
-- an embarassing function. The PL Lua lexer does not do block comments
|
-- The PL Lua lexer does not do block comments
|
||||||
-- when used in line-grabbing mode, and in fact (0.9.4) does not even
|
-- when used in line-grabbing mode, so this function grabs each line
|
||||||
-- do them properly in full-text mode, due to a ordering mistake.
|
-- until we meet the end of the comment
|
||||||
-- So, we do what we can ;)
|
|
||||||
function M.grab_block_comment (v,tok,end1,end2)
|
function M.grab_block_comment (v,tok,end1,end2)
|
||||||
local res = {v}
|
local res = {v}
|
||||||
local t,last_v
|
|
||||||
local t12 = end1..end2
|
|
||||||
k = 1
|
|
||||||
repeat
|
repeat
|
||||||
last_v = v
|
v = lexer.getline(tok)
|
||||||
t,v = tok()
|
if v:match '%]%]' then break end
|
||||||
if t=='comment' and v:find(t12,1,true) then t12 = nil; break end
|
|
||||||
if t=='string' then v = "'"..v.."'" end
|
|
||||||
append(res,v)
|
append(res,v)
|
||||||
until last_v == end1 and v == end2
|
append(res,'\n')
|
||||||
if t12 then
|
until false
|
||||||
table.remove(res)
|
|
||||||
table.remove(res)
|
|
||||||
end
|
|
||||||
res = table.concat(res)
|
res = table.concat(res)
|
||||||
|
--print(res)
|
||||||
return 'comment',res
|
return 'comment',res
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
function M.abspath (f)
|
function M.abspath (f)
|
||||||
return path.normcase(path.abspath(f))
|
return path.normcase(path.abspath(f))
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue