lexer rewritten to return an object and not a coroutine. Methods like getline() and lineno() now work much better; sorts out problems encountered with Lua block comments

This commit is contained in:
steve donovan 2011-08-14 11:54:40 +02:00
parent 779ce4bb51
commit e265370400
3 changed files with 90 additions and 142 deletions

View File

@ -21,7 +21,6 @@
-- @class module -- @class module
-- @name pl.lexer -- @name pl.lexer
local yield,wrap = coroutine.yield,coroutine.wrap
local strfind = string.find local strfind = string.find
local strsub = string.sub local strsub = string.sub
local append = table.insert local append = table.insert
@ -52,14 +51,14 @@ local PREPRO = '^#.-[^\\]\n'
local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword
local function tdump(tok) local function tdump(tok)
return yield(tok,tok) return tok,tok
end end
local function ndump(tok,options) local function ndump(tok,options)
if options and options.number then if options and options.number then
tok = tonumber(tok) tok = tonumber(tok)
end end
return yield("number",tok) return "number",tok
end end
-- regular strings, single or double quotes; usually we want them -- regular strings, single or double quotes; usually we want them
@ -68,7 +67,7 @@ local function sdump(tok,options)
if options and options.string then if options and options.string then
tok = tok:sub(2,-2) tok = tok:sub(2,-2)
end end
return yield("string",tok) return "string",tok
end end
-- long Lua strings need extra work to get rid of the quotes -- long Lua strings need extra work to get rid of the quotes
@ -76,45 +75,45 @@ local function sdump_l(tok,options)
if options and options.string then if options and options.string then
tok = tok:sub(3,-3) tok = tok:sub(3,-3)
end end
return yield("string",tok) return "string",tok
end end
local function chdump(tok,options) local function chdump(tok,options)
if options and options.string then if options and options.string then
tok = tok:sub(2,-2) tok = tok:sub(2,-2)
end end
return yield("char",tok) return "char",tok
end end
local function cdump(tok) local function cdump(tok)
return yield('comment',tok) return 'comment',tok
end end
local function wsdump (tok) local function wsdump (tok)
return yield("space",tok) return "space",tok
end end
local function pdump (tok) local function pdump (tok)
return yield('prepro',tok) return 'prepro',tok
end end
local function plain_vdump(tok) local function plain_vdump(tok)
return yield("iden",tok) return "iden",tok
end end
local function lua_vdump(tok) local function lua_vdump(tok)
if lua_keyword[tok] then if lua_keyword[tok] then
return yield("keyword",tok) return "keyword",tok
else else
return yield("iden",tok) return "iden",tok
end end
end end
local function cpp_vdump(tok) local function cpp_vdump(tok)
if cpp_keyword[tok] then if cpp_keyword[tok] then
return yield("keyword",tok) return "keyword",tok
else else
return yield("iden",tok) return "iden",tok
end end
end end
@ -151,106 +150,81 @@ function lexer.scan (s,matches,filter,options)
end end
matches = plain_matches matches = plain_matches
end end
function lex () local i1,i2,idx,res1,res2,tok,pat,fun,capt
local i1,i2,idx,res1,res2,tok,pat,fun,capt local line = 1
local line = 1 if file then
if file then s = file:read()
s = file:read() if not s then return nil end -- empty file
if not s then return nil end -- empty file s = s ..'\n'
s = s ..'\n' end
end local sz = #s
local sz = #s local idx = 1
local idx = 1 if sz == 0 then return nil end -- empty file
if sz == 0 then return nil end -- empty file
--print('sz',sz) local res = {}
while true do local mt = {}
for _,m in ipairs(matches) do mt.__index = mt
pat = m[1] setmetatable(res,mt)
fun = m[2]
if fun == nil then print(pat); os.exit() end function mt.lineno() return line end
i1,i2 = strfind(s,pat,idx)
if i1 then function mt.getline()
tok = strsub(s,i1,i2) if idx < sz then
idx = i2 + 1 tok = strsub(s,idx,-2)
if not (filter and filter[fun]) then idx = sz + 1
lexer.finished = idx > sz line = line + 1
res1,res2 = fun(tok,options) return tok
end else
if res1 then idx = sz + 1
local tp = type(res1) line = line + 1
-- insert a token list return file:read()
if tp=='table' then end
yield('','') end
for _,t in ipairs(res1) do
yield(t[1],t[2]) function mt.next (tok)
end local t,v = tok()
elseif tp == 'string' then -- or search up to some special pattern while t == 'space' do
i1,i2 = strfind(s,res1,idx) t,v = tok()
if i1 then end
tok = strsub(s,i1,i2) return t,v
idx = i2 + 1 end
yield('',tok)
else function mt.__call ()
yield('','') while true do
idx = sz + 1 for _,m in ipairs(matches) do
end pat,fun = m[1],m[2]
--if idx > sz then return end if fun == nil then error("no match for "..pat) end
else i1,i2 = strfind(s,pat,idx)
yield(line,idx) if i1 then
end tok = strsub(s,i1,i2)
end idx = i2 + 1
if idx > sz then if not (filter and filter[fun]) then
if file then lexer.finished = idx > sz
--repeat -- next non-empty line return fun(tok,options)
line = line + 1
s = file:read()
if not s then return end
--until not s:match '^%s*$'
s = s .. '\n'
idx ,sz = 1,#s
break
else
return
end
else break end
end end
end end
end end
end if idx > sz then
return wrap(lex) if file then
end line = line + 1
s = file:read()
local function isstring (s) if not s then return end
return type(s) == 'string' s = s .. '\n'
end idx ,sz = 1,#s
else
--- insert tokens into a stream. return
-- @param tok a token stream end
-- @param a1 a string is the type, a table is a token list and
-- a function is assumed to be a token-like iterator (returns type & value)
-- @param a2 a string is the value
function lexer.insert (tok,a1,a2)
if not a1 then return end
local ts
if isstring(a1) and isstring(a2) then
ts = {{a1,a2}}
elseif type(a1) == 'function' then
ts = {}
for t,v in a1() do
append(ts,{t,v})
end end
else end
ts = a1
end end
tok(ts) return res
end end
--- get everything in a stream upto a newline. --- get everything in a stream upto a newline.
-- @param tok a token stream -- @param tok a token stream
-- @return a string -- @return a string
function lexer.getline (tok) function lexer.getline (tok)
local t,v = tok('.-\n') return tok:getline()
return v
end end
--- get current line number. <br> --- get current line number. <br>
@ -258,15 +232,7 @@ end
-- @param tok a token stream -- @param tok a token stream
-- @return the line number and current column -- @return the line number and current column
function lexer.lineno (tok) function lexer.lineno (tok)
return tok(0) return tok:lineno()
end
--- get the rest of the stream.
-- @param tok a token stream
-- @return a string
function lexer.getrest (tok)
local t,v = tok('.+')
return v
end end
--- get the Lua keywords as a set-like table. --- get the Lua keywords as a set-like table.
@ -445,11 +411,7 @@ end
--- get the next non-space token from the stream. --- get the next non-space token from the stream.
-- @param tok the token stream. -- @param tok the token stream.
function lexer.skipws (tok) function lexer.skipws (tok)
local t,v = tok() return tok:next()
while t == 'space' do
t,v = tok()
end
return t,v
end end
local skipws = lexer.skipws local skipws = lexer.skipws

View File

@ -76,18 +76,13 @@ local function parse_file(fname,lang, package)
local tok,f = lang.lexer(fname) local tok,f = lang.lexer(fname)
function lineno () function lineno ()
while true do return tok:lineno()
local res = lexer.lineno(tok)
if type(res) == 'number' then return res end
if res == nil then return nil end
end
end end
function filename () return fname end function filename () return fname end
function F:warning (msg,kind) function F:warning (msg,kind)
kind = kind or 'warning' kind = kind or 'warning'
lineno() -- why is this necessary?
lineno()
io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n') io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n')
end end
@ -123,9 +118,7 @@ local function parse_file(fname,lang, package)
while t do while t do
if t == 'comment' then if t == 'comment' then
local comment = {} local comment = {}
local ldoc_comment,block = lang:start_comment(v) local ldoc_comment,block = lang:start_comment(v)
if ldoc_comment and v:match '%-+$' then if ldoc_comment and v:match '%-+$' then
ldoc_comment = false ldoc_comment = false
end end

View File

@ -324,30 +324,23 @@ function M.quote (s)
return "'"..s.."'" return "'"..s.."'"
end end
-- an embarassing function. The PL Lua lexer does not do block comments -- The PL Lua lexer does not do block comments
-- when used in line-grabbing mode, and in fact (0.9.4) does not even -- when used in line-grabbing mode, so this function grabs each line
-- do them properly in full-text mode, due to a ordering mistake. -- until we meet the end of the comment
-- So, we do what we can ;)
function M.grab_block_comment (v,tok,end1,end2) function M.grab_block_comment (v,tok,end1,end2)
local res = {v} local res = {v}
local t,last_v
local t12 = end1..end2
k = 1
repeat repeat
last_v = v v = lexer.getline(tok)
t,v = tok() if v:match '%]%]' then break end
if t=='comment' and v:find(t12,1,true) then t12 = nil; break end
if t=='string' then v = "'"..v.."'" end
append(res,v) append(res,v)
until last_v == end1 and v == end2 append(res,'\n')
if t12 then until false
table.remove(res)
table.remove(res)
end
res = table.concat(res) res = table.concat(res)
--print(res)
return 'comment',res return 'comment',res
end end
function M.abspath (f) function M.abspath (f)
return path.normcase(path.abspath(f)) return path.normcase(path.abspath(f))
end end