package Perl::LanguageServer::Parser ; use Moose::Role ; use Coro ; use Coro::AIO ; use JSON ; use File::Basename ; use v5.16; no warnings 'uninitialized' ; use Compiler::Lexer; use Data::Dump qw{dump} ; use constant SymbolKindFile => 1; use constant SymbolKindModule => 2; use constant SymbolKindNamespace => 3; use constant SymbolKindPackage => 4; use constant SymbolKindClass => 5; use constant SymbolKindMethod => 6; use constant SymbolKindProperty => 7; use constant SymbolKindField => 8; use constant SymbolKindConstructor => 9; use constant SymbolKindEnum => 10; use constant SymbolKindInterface => 11; use constant SymbolKindFunction => 12; use constant SymbolKindVariable => 13; use constant SymbolKindConstant => 14; use constant SymbolKindString => 15; use constant SymbolKindNumber => 16; use constant SymbolKindBoolean => 17; use constant SymbolKindArray => 18; use constant SymbolKindObject => 19; use constant SymbolKindKey => 20; use constant SymbolKindNull => 21; use constant SymbolKindEnumMember => 22; use constant SymbolKindStruct => 23; use constant SymbolKindEvent => 24; use constant SymbolKindOperator => 25; use constant SymbolKindTypeParameter => 26; use constant CacheVersion => 5 ; # --------------------------------------------------------------------------- sub _get_docu { my ($self, $source, $line) = @_ ; my @docu ; my $in_pod ; while ($line-- >= 0) { my $src = $source -> [$line] ; if ($src =~ /^=cut/) { $in_pod = 1 ; next ; } if ($in_pod) { last if ($src =~ /^=pod/) ; next if ($src =~ /^=\w+\s*$/) ; $src =~ s/^=item /* / ; unshift @docu, $src ; } else { next if ($src =~ /^\s*$/) ; next if ($src =~ /^\s*#[-#+~= \t]+$/) ; last if ($src !~ /^\s*#(.*?)\s*$/) ; unshift @docu, $1 ; } } shift @docu while (@docu && ($docu[0] =~ /^\s*$/)) ; pop @docu while (@docu && ($docu[-1] =~ /^\s*$/)) ; return join ("\n", @docu) ; } # --------------------------------------------------------------------------- sub parse_perl_source { my ($self, $uri, $source, $server) = @_ ; $source =~ s/\r//g ; # Compiler::Lexer computes wrong line numbers with \r my @source = split /\n/, $source ; my $lexer = Compiler::Lexer->new(); my $tokens = $lexer->tokenize($source); cede () ; #$server -> logger (dump ($tokens) . "\n") ; #my $modules = $lexer->get_used_modules($script); my @vars ; my $package = 'main::' ; my %state ; my $decl ; my $declline ; my $func ; my $parent ; my $top ; my $add ; my $func_param ; my $token_ndx = -1 ; my $brace_level = 0 ; my @stack ; my $beginchar = 0 ; my $endchar = 0 ; foreach my $token (@$tokens) { $token_ndx++ ; $token -> {data} =~ s/\r$// ; $server -> logger ("token=", dump ($token), "\n") if ($Perl::LanguageServer::debug3) ; if (exists $state{method_mod} && $token -> {name} eq 'RawString') { $token -> {name} = 'Function' ; delete $state{method_mod} ; } my $name = $token -> {name} ; if ($name =~ /^(?:VarDecl|OurDecl|FunctionDecl)$/) { $decl = $token -> {data}, $declline = $token -> {line} ; } elsif ($name =~ /Var$/) { $top = $decl eq 'our' || !$parent?\@vars:$parent ; push @$top, { name => $token -> {data}, kind => SymbolKindVariable, containerName => $decl eq 'our'?$package:$func, ($decl?(definition => $decl):()), ($decl eq 'my'?(localvar => $decl):()), } ; $add = $top -> [-1] ; $token -> {line} = $declline if ($decl) ; $decl = undef ; } elsif ($name eq 'LeftBrace') { $brace_level++ ; $decl = undef ; if (@vars && $vars[-1]{kind} == SymbolKindVariable) { $vars[-1]{name} =~ s/^\$/%/ ; } } elsif ($name =~ /^(?:RightBrace|SemiColon)$/) { $brace_level-- if ($name eq 'RightBrace') ; if (@stack > 0 && $brace_level == $stack[-1]{brace_level}) { my $stacktop = pop @stack ; $parent = $stacktop -> {parent} ; $func = $stacktop -> {func} ; my $symbol = $stacktop -> {symbol} ; my $start_line = $symbol -> {range}{start}{line} // $symbol -> {line} ; $symbol -> {range} = { start => { line => $start_line, character => 0 }, end => { line => $token -> {line}-1, character => 9999 }} if (defined ($start_line)) ; } if ($name eq 'SemiColon') { $decl = undef ; # continue does only work in switch statement, which is deprecated and was removed # unclear, if this is still necessray? #continue ; } } elsif ($name eq 'LeftBracket') { if (@vars && $vars[-1]{kind} == SymbolKindVariable) { $vars[-1]{name} =~ s/^\$/@/ ; } } elsif ($name =~ /^(?:Function|Method)$/) { if ($token -> {data} =~ /^\w/) { $top = !$parent?\@vars:$parent ; push @$top, { name => $token -> {data}, kind => SymbolKindFunction, containerName => @stack?$func:$package, ($decl?(definition => $decl):()), } ; $func_param = $add = $top -> [-1] ; if ($decl) { push @stack, { brace_level => $brace_level, parent => $parent, func => $func, 'package' => $package, symbol => $add, } ; $token -> {line} = $declline ; $func = $token -> {data} ; $parent = $top -> [-1]{children} ||= [] ; } my $src = $source[$token -> {line}-1] ; my $i ; if ($src && ($i = index($src, $func) >= 0)) { $beginchar = $i + 1 ; $endchar = $i + 1 + length ($func) ; } } $decl = undef ; } elsif ($name eq 'ArgumentArray') { if ($func_param) { my @params ; if ($tokens -> [$token_ndx - 1]{name} eq 'Assign' && $tokens -> [$token_ndx - 2]{name} eq 'RightParenthesis') { for (my $i = $token_ndx - 3; $i >= 0; $i--) { next if ($tokens -> [$i]{name} eq 'Comma') ; last if ($tokens -> [$i]{name} !~ /Var$/) ; push @params, $tokens -> [$i]{data} ; } my $func_doc = $self -> _get_docu (\@source, $func_param -> {range}{start}{line} // $func_param -> {line}) ; my @parameters ; foreach my $p (reverse @params) { push @parameters, { label => $p, } ; } $func_param -> {detail} = '(' . join (',', reverse @params) . ')' ; $func_param -> {signature} = { label => $func_param -> {name} . $func_param -> {detail}, documentation => $func_doc, parameters => \@parameters } ; } $func_param = undef ; } } elsif ($name eq 'Prototype') { if ($func_param) { my @params = split /\s*,\s*/, $token -> {data} ; my $func_doc = $self -> _get_docu (\@source, $func_param -> {range}{start}{line} // $func_param -> {line}) ; my @parameters ; foreach my $p (@params) { push @parameters, { label => $p, } ; } $func_param -> {detail} = '(' . join (',', @params) . ')' ; $func_param -> {signature} = { label => $func_param -> {name} . $func_param -> {detail}, documentation => $func_doc, parameters => \@parameters } ; $func_param = undef ; } } elsif ($name =~ /^(?:Package|UseDecl)$/) { $state{is} = $token -> {data} ; $state{module} = 1 ; } elsif ($name =~ /^(?:ShortHashDereference|ShortArrayDereference)$/) { $state{scalar} = '$' ; } elsif ($name eq 'Key') { if (exists ($state{constant})) { $top = \@vars ; push @$top, { name => $token -> {data}, kind => SymbolKindConstant, containerName => $package, definition => 1, } ; $add = $top -> [-1] ; } elsif (exists ($state{scalar})) { $top = $decl eq 'our' || !$parent?\@vars:$parent ; push @$top, { name => $state{scalar} . $token -> {data}, kind => SymbolKindVariable, containerName => $decl eq 'our'?$package:$func, } ; $add = $top -> [-1] ; } elsif ($token -> {data} =~ /^(?:has|class_has)$/) { $state{has} = 1 ; } elsif ($token -> {data} =~ /^(?:around|before|after)$/) { $state{method_mod} = 1 ; $decl = $token -> {data}, $declline = $token -> {line} ; } elsif ($token -> {data} =~ /^[a-z_][a-z0-9_]+$/i) { $top = \@vars ; push @$top, { name => $token -> {data}, kind => SymbolKindFunction, } ; $add = $top -> [-1] ; } } elsif ($name eq 'RawString') { if (exists ($state{has})) { $top = \@vars ; push @$top, { name => $token -> {data}, kind => SymbolKindProperty, containerName => $package, definition => 1, } ; $add = $top -> [-1] ; } } elsif ($name eq 'UsedName') { if ($token -> {data} eq 'constant') { delete $state{module} ; $state{constant} = 1 ; } else { $state{ns} = [$token->{data}] ; } } elsif($name eq 'Namespace') { $state{ns} ||= [] ; push @{$state{ns}}, $token -> {data} ; } elsif ($name eq 'NamespaceResolver') { # make sure it is not matched below } elsif ($name eq 'Assign' or $token -> {data} =~ /^\W/) { if ($name eq 'Assign') { $decl = undef ; } if (exists ($state{ns})) { if ($state{module}) { my $def ; if ($state{is} eq 'package') { $def = 1 ; $package = join ('::', @{$state{ns}}) ; $top = \@vars ; push @$top, { name => $package, kind => SymbolKindModule, #containerName => join ('::', @{$state{ns}}), #($def?(definition => $def):()), definition => 1, } ; $add = $top -> [-1] ; } else { my $name = pop @{$state{ns}} ; $top = \@vars ; push @$top, { name => $name, kind => SymbolKindModule, containerName => join ('::', @{$state{ns}}), ($def?(definition => $def):()), } ; $add = $top -> [-1] ; } } else { my $name = shift @{$state{ns}} ; $top = \@vars ; push @$top, { name => $name, kind => SymbolKindFunction, containerName => join ('::', @{$state{ns}}), } ; $add = $top -> [-1] ; } } %state = () ; } if ($add) { if (!$uri) { $add -> {line} = $token -> {line}-1 ; } else { #$add -> {location} = { uri => $uri, range => { start => { line => $token -> {line}-1, character => 0 }, end => { line => $token -> {line}-1, character => 0 }}} ; $add -> {range} = { start => { line => $token -> {line}-1, character => 0 }, end => { line => $token -> {line}-1, character => ($endchar?9999:0) }} ; $add -> {selectionRange} = { start => { line => $token -> {line}-1, character => $beginchar }, end => { line => $token -> {line}-1, character => $endchar }} ; $beginchar = $endchar = 0 ; } $server -> logger ("var=", dump ($add), "\n") if ($Perl::LanguageServer::debug3) ; $add = undef ; } } $server -> logger (dump (\@vars), "\n") if ($Perl::LanguageServer::debug3) ; return wantarray?(\@vars, $tokens):\@vars ; } # ---------------------------------------------------------------------------- sub _parse_perl_source_cached { my ($self, $uri, $source, $path, $stats, $server) = @_ ; my $cachepath ; if (!$self -> disable_cache) { my $escpath = $path ; $escpath =~ s/:/%3A/ ; $cachepath = $self -> state_dir .'/' . $escpath ; $self -> mkpath (dirname ($cachepath)) ; #$server -> logger ("$path -> cachepath=$cachepath\n") ; aio_stat ($cachepath) ; if (-e _) { my $mtime_cache = -M _ ; aio_stat ($path) ; my $mtime_src = -M _ ; #$server -> logger ("cache = $mtime_cache src = $mtime_src\n") ; if ($mtime_src > $mtime_cache) { #$server -> logger ("load from cache\n") ; my $cache ; aio_load ($cachepath, $cache) ; my $cache_data = eval { $Perl::LanguageServer::json -> decode ($cache) ; } ; if ($@) { $self -> logger ("Loading of $cachepath failed, reparse file ($@)\n") ; } elsif (ref ($cache_data) eq 'HASH') { if ($cache_data -> {version} == CacheVersion) { $stats -> {loaded}++ ; return $cache_data -> {vars} ; } } } } } my $vars = $self -> parse_perl_source ($uri, $source, $server) ; if ($cachepath) { my $ifh = aio_open ($cachepath, IO::AIO::O_WRONLY | IO::AIO::O_TRUNC | IO::AIO::O_CREAT, 0664) or die "open $cachepath failed ($!)" ; aio_write ($ifh, undef, undef, $Perl::LanguageServer::json -> encode ({ version => CacheVersion, vars => $vars}), 0) ; aio_close ($ifh) ; } $stats -> {parsed}++ ; return $vars ; } # ---------------------------------------------------------------------------- sub _parse_dir { my ($self, $server, $dir, $vars, $stats) = @_ ; my $text ; my $fn ; my $uri ; my $file_vars ; my $filefilter = $self -> file_filter_regex ; my $ignore_dir = $self -> ignore_dir ; my ($dirs, $files) = aio_scandir ($dir, 4) ; if ($dirs) { foreach my $d (sort @$dirs) { next if (exists $ignore_dir -> {$d}) ; $self -> _parse_dir ($server, $dir . '/' . $d, $vars, $stats) ; } } if ($files) { foreach my $f (sort @$files) { next if ($f !~ /$filefilter/) ; $fn = $dir . '/' . $f ; aio_load ($fn, $text) ; $uri = $self -> uri_server2client ('file://' . $fn) ; #$server -> logger ("parse $fn -> $uri\n") ; $file_vars = $self -> _parse_perl_source_cached (undef, $text, $fn, $stats, $server) ; $vars -> {$uri} = $file_vars ; #$server -> logger ("done $fn\n") ; my $cnt = keys %$vars ; $server -> logger ("loaded $stats->{loaded} files, parsed $stats->{parsed} files, $cnt files\n") if ($cnt % 100 == 0) ; } } } # ---------------------------------------------------------------------------- sub background_parser { my ($self, $server) = @_ ; my $channel = $self -> parser_channel ; $channel -> shutdown ; # end other parser cede ; $channel = $self -> parser_channel (Coro::Channel -> new) ; my $folders = $self -> folders ; $server -> logger ("background_parser folders = ", dump ($folders), "\n") ; %{$self -> symbols} = () ; my $stats = {} ; foreach my $dir (values %$folders) { $self -> _parse_dir ($server, $dir, $self -> symbols, $stats) ; cede ; } my $cnt = keys %{$self -> symbols} ; $server -> logger ("initial parsing done, loaded $stats->{loaded} files, parsed $stats->{parsed} files, $cnt files\n") ; my $filefilter = $self -> file_filter_regex ; while (my $item = $channel -> get) { my ($cmd, $uri) = @$item ; my $fn = substr ($self -> uri_client2server ($uri), 7) ; next if (basename ($fn) !~ /$filefilter/) ; my $text ; aio_load ($fn, $text) ; $server -> logger ("parse $fn -> $uri\n") ; my $file_vars = $self -> _parse_perl_source_cached (undef, $text, $fn, {}, $server) ; $self -> symbols -> {$uri} = $file_vars ; } $server -> logger ("background_parser quit\n") ; } 1 ;