lexer rewritten to return an object and not a coroutine. Methods like getline() and lineno() now work much better; sorts out problems encountered with Lua block comments

This commit is contained in:
steve donovan 2011-08-14 11:54:40 +02:00
parent 779ce4bb51
commit e265370400
3 changed files with 90 additions and 142 deletions

View File

@ -21,7 +21,6 @@
-- @class module
-- @name pl.lexer
local yield,wrap = coroutine.yield,coroutine.wrap
local strfind = string.find
local strsub = string.sub
local append = table.insert
@ -52,14 +51,14 @@ local PREPRO = '^#.-[^\\]\n'
local plain_matches,lua_matches,cpp_matches,lua_keyword,cpp_keyword
local function tdump(tok)
return yield(tok,tok)
return tok,tok
end
local function ndump(tok,options)
if options and options.number then
tok = tonumber(tok)
end
return yield("number",tok)
return "number",tok
end
-- regular strings, single or double quotes; usually we want them
@ -68,7 +67,7 @@ local function sdump(tok,options)
if options and options.string then
tok = tok:sub(2,-2)
end
return yield("string",tok)
return "string",tok
end
-- long Lua strings need extra work to get rid of the quotes
@ -76,45 +75,45 @@ local function sdump_l(tok,options)
if options and options.string then
tok = tok:sub(3,-3)
end
return yield("string",tok)
return "string",tok
end
local function chdump(tok,options)
if options and options.string then
tok = tok:sub(2,-2)
end
return yield("char",tok)
return "char",tok
end
local function cdump(tok)
return yield('comment',tok)
return 'comment',tok
end
local function wsdump (tok)
return yield("space",tok)
return "space",tok
end
local function pdump (tok)
return yield('prepro',tok)
return 'prepro',tok
end
local function plain_vdump(tok)
return yield("iden",tok)
return "iden",tok
end
local function lua_vdump(tok)
if lua_keyword[tok] then
return yield("keyword",tok)
return "keyword",tok
else
return yield("iden",tok)
return "iden",tok
end
end
local function cpp_vdump(tok)
if cpp_keyword[tok] then
return yield("keyword",tok)
return "keyword",tok
else
return yield("iden",tok)
return "iden",tok
end
end
@ -151,7 +150,6 @@ function lexer.scan (s,matches,filter,options)
end
matches = plain_matches
end
function lex ()
local i1,i2,idx,res1,res2,tok,pat,fun,capt
local line = 1
if file then
@ -162,95 +160,71 @@ function lexer.scan (s,matches,filter,options)
local sz = #s
local idx = 1
if sz == 0 then return nil end -- empty file
--print('sz',sz)
local res = {}
local mt = {}
mt.__index = mt
setmetatable(res,mt)
function mt.lineno() return line end
function mt.getline()
if idx < sz then
tok = strsub(s,idx,-2)
idx = sz + 1
line = line + 1
return tok
else
idx = sz + 1
line = line + 1
return file:read()
end
end
function mt.next (tok)
local t,v = tok()
while t == 'space' do
t,v = tok()
end
return t,v
end
function mt.__call ()
while true do
for _,m in ipairs(matches) do
pat = m[1]
fun = m[2]
if fun == nil then print(pat); os.exit() end
pat,fun = m[1],m[2]
if fun == nil then error("no match for "..pat) end
i1,i2 = strfind(s,pat,idx)
if i1 then
tok = strsub(s,i1,i2)
idx = i2 + 1
if not (filter and filter[fun]) then
lexer.finished = idx > sz
res1,res2 = fun(tok,options)
return fun(tok,options)
end
if res1 then
local tp = type(res1)
-- insert a token list
if tp=='table' then
yield('','')
for _,t in ipairs(res1) do
yield(t[1],t[2])
end
elseif tp == 'string' then -- or search up to some special pattern
i1,i2 = strfind(s,res1,idx)
if i1 then
tok = strsub(s,i1,i2)
idx = i2 + 1
yield('',tok)
else
yield('','')
idx = sz + 1
end
--if idx > sz then return end
else
yield(line,idx)
end
end
if idx > sz then
if file then
--repeat -- next non-empty line
line = line + 1
s = file:read()
if not s then return end
--until not s:match '^%s*$'
s = s .. '\n'
idx ,sz = 1,#s
break
else
return
end
else break end
end
end
end
end
return wrap(lex)
end
local function isstring (s)
return type(s) == 'string'
end
--- insert tokens into a stream.
-- @param tok a token stream
-- @param a1 a string is the type, a table is a token list and
-- a function is assumed to be a token-like iterator (returns type & value)
-- @param a2 a string is the value
function lexer.insert (tok,a1,a2)
if not a1 then return end
local ts
if isstring(a1) and isstring(a2) then
ts = {{a1,a2}}
elseif type(a1) == 'function' then
ts = {}
for t,v in a1() do
append(ts,{t,v})
end
else
ts = a1
end
tok(ts)
return res
end
--- get everything in a stream upto a newline.
-- @param tok a token stream
-- @return a string
function lexer.getline (tok)
local t,v = tok('.-\n')
return v
return tok:getline()
end
--- get current line number. <br>
@ -258,15 +232,7 @@ end
-- @param tok a token stream
-- @return the line number and current column
function lexer.lineno (tok)
return tok(0)
end
--- get the rest of the stream.
-- @param tok a token stream
-- @return a string
function lexer.getrest (tok)
local t,v = tok('.+')
return v
return tok:lineno()
end
--- get the Lua keywords as a set-like table.
@ -445,11 +411,7 @@ end
--- get the next non-space token from the stream.
-- @param tok the token stream.
function lexer.skipws (tok)
local t,v = tok()
while t == 'space' do
t,v = tok()
end
return t,v
return tok:next()
end
local skipws = lexer.skipws

View File

@ -76,18 +76,13 @@ local function parse_file(fname,lang, package)
local tok,f = lang.lexer(fname)
function lineno ()
while true do
local res = lexer.lineno(tok)
if type(res) == 'number' then return res end
if res == nil then return nil end
end
return tok:lineno()
end
function filename () return fname end
function F:warning (msg,kind)
kind = kind or 'warning'
lineno() -- why is this necessary?
lineno()
io.stderr:write(kind..' '..fname..':'..lineno()..' '..msg,'\n')
end
@ -123,9 +118,7 @@ local function parse_file(fname,lang, package)
while t do
if t == 'comment' then
local comment = {}
local ldoc_comment,block = lang:start_comment(v)
if ldoc_comment and v:match '%-+$' then
ldoc_comment = false
end

View File

@ -324,30 +324,23 @@ function M.quote (s)
return "'"..s.."'"
end
-- an embarassing function. The PL Lua lexer does not do block comments
-- when used in line-grabbing mode, and in fact (0.9.4) does not even
-- do them properly in full-text mode, due to a ordering mistake.
-- So, we do what we can ;)
-- The PL Lua lexer does not do block comments
-- when used in line-grabbing mode, so this function grabs each line
-- until we meet the end of the comment
function M.grab_block_comment (v,tok,end1,end2)
local res = {v}
local t,last_v
local t12 = end1..end2
k = 1
repeat
last_v = v
t,v = tok()
if t=='comment' and v:find(t12,1,true) then t12 = nil; break end
if t=='string' then v = "'"..v.."'" end
v = lexer.getline(tok)
if v:match '%]%]' then break end
append(res,v)
until last_v == end1 and v == end2
if t12 then
table.remove(res)
table.remove(res)
end
append(res,'\n')
until false
res = table.concat(res)
--print(res)
return 'comment',res
end
function M.abspath (f)
return path.normcase(path.abspath(f))
end