lexer rewritten to return an object and not a coroutine. Methods like getline() and lineno() now work much better; sorts out problems encountered with Lua block comments

This commit is contained in:
steve donovan 2011-08-14 11:54:40 +02:00
parent 779ce4bb51
commit e265370400
3 changed files with 90 additions and 142 deletions

View File

@ -21,7 +21,6 @@
-- @class module -- @class module
-- @name pl.lexer -- @name pl.lexer
local yield,wrap = coroutine.yield,coroutine.wrap
local strfind = string.find local strfind = string.find
local strsub = string.sub local strsub = string.sub
local append = table.insert local append = table.insert
@ -52,14 +51,14 @@ local PREPRO = '^#.-[^\\]\n'
local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword
local function tdump(tok) local function tdump(tok)
return yield(tok,tok) return tok,tok
end end
local function ndump(tok,options) local function ndump(tok,options)
if options and options.number then if options and options.number then
tok = tonumber(tok) tok = tonumber(tok)
end end
return yield("number",tok) return "number",tok
end end
-- regular strings, single or double quotes; usually we want them -- regular strings, single or double quotes; usually we want them
@ -68,7 +67,7 @@ local function sdump(tok,options)
if options and options.string then if options and options.string then
tok = tok:sub(2,-2) tok = tok:sub(2,-2)
end end
return yield("string",tok) return "string",tok
end end
-- long Lua strings need extra work to get rid of the quotes -- long Lua strings need extra work to get rid of the quotes
@ -76,45 +75,45 @@ local function sdump_l(tok,options)
if options and options.string then if options and options.string then
tok = tok:sub(3,-3) tok = tok:sub(3,-3)
end end
return yield("string",tok) return "string",tok
end end
local function chdump(tok,options) local function chdump(tok,options)
if options and options.string then if options and options.string then
tok = tok:sub(2,-2) tok = tok:sub(2,-2)
end end
return yield("char",tok) return "char",tok
end end
local function cdump(tok) local function cdump(tok)
return yield('comment',tok) return 'comment',tok
end end
local function wsdump (tok) local function wsdump (tok)
return yield("space",tok) return "space",tok
end end
local function pdump (tok) local function pdump (tok)
return yield('prepro',tok) return 'prepro',tok
end end
local function plain_vdump(tok) local function plain_vdump(tok)
return yield("iden",tok) return "iden",tok
end end
local function lua_vdump(tok) local function lua_vdump(tok)
if lua_keyword[tok] then if lua_keyword[tok] then
return yield("keyword",tok) return "keyword",tok
else else
return yield("iden",tok) return "iden",tok
end end
end end
local function cpp_vdump(tok) local function cpp_vdump(tok)
if cpp_keyword[tok] then if cpp_keyword[tok] then
return yield("keyword",tok) return "keyword",tok
else else
return yield("iden",tok) return "iden",tok
end end
end end
@ -151,7 +150,6 @@ function lexer.scan (s,matches,filter,options)
end end
matches = plain_matches matches = plain_matches
end end
function lex ()
local i1,i2,idx,res1,res2,tok,pat,fun,capt local i1,i2,idx,res1,res2,tok,pat,fun,capt
local line = 1 local line = 1
if file then if file then
@ -162,95 +160,71 @@ function lexer.scan (s,matches,filter,options)
local sz = #s local sz = #s
local idx = 1 local idx = 1
if sz == 0 then return nil end -- empty file if sz == 0 then return nil end -- empty file
--print('sz',sz)
local res = {}
local mt = {}
mt.__index = mt
setmetatable(res,mt)
function mt.lineno() return line end
function mt.getline()
if idx < sz then
tok = strsub(s,idx,-2)
idx = sz + 1
line = line + 1
return tok
else
idx = sz + 1
line = line + 1
return file:read()
end
end
function mt.next (tok)
local t,v = tok()
while t == 'space' do
t,v = tok()
end
return t,v
end
function mt.__call ()
while true do while true do
for _,m in ipairs(matches) do for _,m in ipairs(matches) do
pat = m[1] pat,fun = m[1],m[2]
fun = m[2] if fun == nil then error("no match for "..pat) end
if fun == nil then print(pat); os.exit() end
i1,i2 = strfind(s,pat,idx) i1,i2 = strfind(s,pat,idx)
if i1 then if i1 then
tok = strsub(s,i1,i2) tok = strsub(s,i1,i2)
idx = i2 + 1 idx = i2 + 1
if not (filter and filter[fun]) then if not (filter and filter[fun]) then
lexer.finished = idx > sz lexer.finished = idx > sz
res1,res2 = fun(tok,options) return fun(tok,options)
end end
if res1 then
local tp = type(res1)
-- insert a token list
if tp=='table' then
yield('','')
for _,t in ipairs(res1) do
yield(t[1],t[2])
end
elseif tp == 'string' then -- or search up to some special pattern
i1,i2 = strfind(s,res1,idx)
if i1 then
tok = strsub(s,i1,i2)
idx = i2 + 1
yield('',tok)
else
yield('','')
idx = sz + 1
end
--if idx > sz then return end
else
yield(line,idx)
end end
end end
if idx > sz then if idx > sz then
if file then if file then
--repeat -- next non-empty line
line = line + 1 line = line + 1
s = file:read() s = file:read()
if not s then return end if not s then return end
--until not s:match '^%s*$'
s = s .. '\n' s = s .. '\n'
idx ,sz = 1,#s idx ,sz = 1,#s
break
else else
return return
end end
else break end
end end
end end
end end
end return res
return wrap(lex)
end
local function isstring (s)
return type(s) == 'string'
end
--- insert tokens into a stream.
-- @param tok a token stream
-- @param a1 a string is the type, a table is a token list and
-- a function is assumed to be a token-like iterator (returns type & value)
-- @param a2 a string is the value
function lexer.insert (tok,a1,a2)
if not a1 then return end
local ts
if isstring(a1) and isstring(a2) then
ts = {{a1,a2}}
elseif type(a1) == 'function' then
ts = {}
for t,v in a1() do
append(ts,{t,v})
end
else
ts = a1
end
tok(ts)
end end
--- get everything in a stream upto a newline. --- get everything in a stream upto a newline.
-- @param tok a token stream -- @param tok a token stream
-- @return a string -- @return a string
function lexer.getline (tok) function lexer.getline (tok)
local t,v = tok('.-\n') return tok:getline()
return v
end end
--- get current line number. <br> --- get current line number. <br>
@ -258,15 +232,7 @@ end
-- @param tok a token stream -- @param tok a token stream
-- @return the line number and current column -- @return the line number and current column
function lexer.lineno (tok) function lexer.lineno (tok)
return tok(0) return tok:lineno()
end
--- get the rest of the stream.
-- @param tok a token stream
-- @return a string
function lexer.getrest (tok)
local t,v = tok('.+')
return v
end end
--- get the Lua keywords as a set-like table. --- get the Lua keywords as a set-like table.
@ -445,11 +411,7 @@ end
--- get the next non-space token from the stream. --- get the next non-space token from the stream.
-- @param tok the token stream. -- @param tok the token stream.
function lexer.skipws (tok) function lexer.skipws (tok)
local t,v = tok() return tok:next()
while t == 'space' do
t,v = tok()
end
return t,v
end end
local skipws = lexer.skipws local skipws = lexer.skipws

View File

@ -76,18 +76,13 @@ local function parse_file(fname,lang, package)
local tok,f = lang.lexer(fname) local tok,f = lang.lexer(fname)
function lineno () function lineno ()
while true do return tok:lineno()
local res = lexer.lineno(tok)
if type(res) == 'number' then return res end
if res == nil then return nil end
end
end end
function filename () return fname end function filename () return fname end
function F:warning (msg,kind) function F:warning (msg,kind)
kind = kind or 'warning' kind = kind or 'warning'
lineno() -- why is this necessary?
lineno()
io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n') io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n')
end end
@ -123,9 +118,7 @@ local function parse_file(fname,lang, package)
while t do while t do
if t == 'comment' then if t == 'comment' then
local comment = {} local comment = {}
local ldoc_comment,block = lang:start_comment(v) local ldoc_comment,block = lang:start_comment(v)
if ldoc_comment and v:match '%-+$' then if ldoc_comment and v:match '%-+$' then
ldoc_comment = false ldoc_comment = false
end end

View File

@ -324,30 +324,23 @@ function M.quote (s)
return "'"..s.."'" return "'"..s.."'"
end end
-- an embarassing function. The PL Lua lexer does not do block comments -- The PL Lua lexer does not do block comments
-- when used in line-grabbing mode, and in fact (0.9.4) does not even -- when used in line-grabbing mode, so this function grabs each line
-- do them properly in full-text mode, due to a ordering mistake. -- until we meet the end of the comment
-- So, we do what we can ;)
function M.grab_block_comment (v,tok,end1,end2) function M.grab_block_comment (v,tok,end1,end2)
local res = {v} local res = {v}
local t,last_v
local t12 = end1..end2
k = 1
repeat repeat
last_v = v v = lexer.getline(tok)
t,v = tok() if v:match '%]%]' then break end
if t=='comment' and v:find(t12,1,true) then t12 = nil; break end
if t=='string' then v = "'"..v.."'" end
append(res,v) append(res,v)
until last_v == end1 and v == end2 append(res,'\n')
if t12 then until false
table.remove(res)
table.remove(res)
end
res = table.concat(res) res = table.concat(res)
--print(res)
return 'comment',res return 'comment',res
end end
function M.abspath (f) function M.abspath (f)
return path.normcase(path.abspath(f)) return path.normcase(path.abspath(f))
end end