------------ -- Language-dependent parsing of code. -- This encapsulates the different strategies needed for parsing C and Lua -- source code. local class = require 'pl.class' local utils = require 'pl.utils' local tools = require 'ldoc.tools' local lexer = require 'ldoc.lexer' local tnext = lexer.skipws class.Lang() function Lang:trim_comment (s) return s:gsub(self.line_comment,'') end function Lang:start_comment (v) local line = v:match (self.start_comment_) local block = v:match(self.block_comment) return line or block, block end function Lang:empty_comment (v) return v:match(self.empty_comment_) end function Lang:grab_block_comment(v,tok) v = v:gsub(self.block_comment,'') return tools.grab_block_comment(v,tok,self.end_comment) end function Lang:find_module(tok,t,v) return '...',t,v end function Lang:item_follows(t,v) return false end function Lang:finalize() self.empty_comment_ = self.start_comment_..'%s*$' end function Lang:search_for_token (tok,type,value,t,v) while t and not (t == type and v == value) do if t == 'comment' and self:start_comment(v) then return nil,t,v end t,v = tnext(tok) end return t ~= nil,t,v end function Lang:parse_extra (tags,tok) end function Lang:is_module_modifier () return false end function Lang:parse_module_modifier (tags, tok) return nil, "@usage or @exports deduction not implemented for this language" end class.Lua(Lang) function Lua:_init() self.line_comment = '^%-%-+' -- used for stripping self.start_comment_ = '^%-%-%-+' -- used for doc comment line start self.block_comment = '^%-%-%[=*%[%-+' -- used for block doc comments self:finalize() end function Lua.lexer(fname) local f,e = io.open(fname) if not f then quit(e) end return lexer.lua(f,{}),f end function Lua:grab_block_comment(v,tok) local equals = v:match('^%-%-%[(=*)%[') v = v:gsub(self.block_comment,'') return tools.grab_block_comment(v,tok,'%]'..equals..'%]') end function Lua:parse_module_call(tok,t,v) t,v = tnext(tok) if t == '(' then t,v = tnext(tok) end if t == 'string' then -- explicit name, cool return v,t,v elseif t == '...' then -- we have to guess! return '...',t,v end end -- If a module name was not provided, then we look for an explicit module() -- call. However, we should not try too hard; if we hit a doc comment then -- we should go back and process it. Likewise, module(...) also means -- that we must infer the module name. function Lua:find_module(tok,t,v) local res res,t,v = self:search_for_token(tok,'iden','module',t,v) if not res then return nil,t,v end return self:parse_module_call(tok,t,v) end local function parse_lua_parameters (tags,tok) tags.formal_args = tools.get_parameters(tok) tags.class = 'function' end local function parse_lua_function_header (tags,tok) tags.name = tools.get_fun_name(tok) if not tags.name then return 'function has no name' end parse_lua_parameters(tags,tok) end local function parse_lua_table (tags,tok) tags.formal_args = tools.get_parameters(tok,'}',function(s) return s == ',' or s == ';' end) end --------------- function and variable inferrence ----------- -- After a doc comment, there may be a local followed by: -- [1] (l)function: function NAME -- [2] (l)function: NAME = function -- [3] table: NAME = { -- [4] field: NAME = (this is a module-level field) -- -- Depending on the case successfully detected, returns a function which -- will be called later to fill in inferred item tags function Lua:item_follows(t,v,tok) local parser, case local is_local = t == 'keyword' and v == 'local' if is_local then t,v = tnext(tok) end if t == 'keyword' and v == 'function' then -- case [1] case = 1 parser = parse_lua_function_header elseif t == 'iden' then local name,t,v = tools.get_fun_name(tok,v) if t ~= '=' then return nil end -- probably invalid code... t,v = tnext(tok) if t == 'keyword' and v == 'function' then -- case [2] tnext(tok) -- skip '(' case = 2 parser = function(tags,tok) tags.name = name parse_lua_parameters(tags,tok) end elseif t == '{' then -- case [3] case = 3 parser = function(tags,tok) tags.class = 'table' tags.name = name parse_lua_table (tags,tok) end else -- case [4] case = 4 parser = function(tags) tags.class = 'field' tags.name = name end end elseif t == 'keyword' and v == 'return' then -- case [5] case = 5 if tnext(tok) ~= '{' then return nil end parser = function(tags,tok) tags.class = 'table' parse_lua_table(tags,tok) end end return parser, is_local, case end -- we only call the function returned by the item_follows above if there -- is not already a name and a type. -- Otherwise, this is called. Currrently only tries to fill in the fields -- of a table from a table definition as identified above function Lua:parse_extra (tags,tok,case) if tags.class == 'table' and not tags.field and case == 3 then parse_lua_table(tags,tok) end end -- For Lua, a --- @usage comment means that a long -- string containing the usage follows, which we -- use to update the module usage tag. Likewise, the @export -- tag alone in a doc comment refers to the following returned -- Lua table of functions function Lua:is_module_modifier (tags) return tags.summary == '' and (tags.usage or tags.export) end function Lua:parse_module_modifier (tags, tok, F) if tags.usage then if tags.class ~= 'field' then return nil,"cannot deduce @usage" end local t1= tnext(tok) --local t2 = tok() if t1 ~= '[' then return nil, t1..' '..': not a long string' end t, v = tools.grab_block_comment('',tok,'%]%]') return true, v, 'usage' elseif tags.export then if tags.class ~= 'table' then return nil, "cannot deduce @export" end for f in tags.formal_args:iter() do F:export_item(f) end return true end end -- note a difference here: we scan C/C++ code in full-text mode, not line by line. -- This is because we can't detect multiline comments in line mode class.CC(Lang) function CC:_init() self.line_comment = '^//+' self.start_comment_ = '^///+' self.block_comment = '^/%*%*+' self:finalize() end function CC.lexer(f) f,err = utils.readfile(f) if not f then quit(err) end return lexer.cpp(f,{}) end function CC:grab_block_comment(v,tok) v = v:gsub(self.block_comment,'') return 'comment',v:sub(1,-3) end return { lua = Lua(), cc = CC() }