Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Switch to using sets for RISC-V assembly & improve organisation
Sets are the preferred method. I also reorganised the states a bit to make things work slightly more nicely (e.g. it highlights registers in preprocessor definitions).
  • Loading branch information
Tim Hutt committed Jul 28, 2025
commit 2fa4ab37a5f5f4a93f88b4833496cf334dd547c2
86 changes: 66 additions & 20 deletions lib/rouge/lexers/riscvasm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ class RiscvAsm < RegexLexer
filenames '*.s', '*.S'

# C preprocessor directives. These are only processed for .S files - not .s - however
# the parsing is the same in both cases.
# the parsing is mostly the same in both cases.
def self.preproc_directive
@preproc_directive ||= %w(
@preproc_directive = Set.new %w(
define elif else endif error if ifdef ifndef include line pragma undef warning
)
end

# Standard register name, including ABI names.
def self.register
@register ||= %w(
@register = Set.new %w(
x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31
f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31
Expand All @@ -34,14 +34,14 @@ def self.register

# These keywords are used for some vector instructions (vsetvli etc.).
def self.other_keyword
@other_keyword ||= %w(
@other_keyword = Set.new %w(
e8 e16 e32 e64 mf8 mf4 mf2 m1 m2 m4 m8 ta tu ma mu v0.t
)
end

# For %pcrel_hi(...) relocations etc.
def self.relocation_function
@relocation_function ||= %w(
@relocation_function = Set.new %w(
hi lo
pcrel_hi pcrel_lo
tprel_hi tprel_lo
Expand All @@ -59,6 +59,11 @@ def self.relocation_function
rule %r(/\*.*?\*/)m, Comment::Multiline
end

# This is only needed to deal with preprocessor directives.
state :in_single_line_comment do
rule %r/.*/, Comment::Single, :pop!
end

state :literals do
# 1f, 2b forward/backward label references.
rule %r/[0-9]+[fb]\b/, Name::Label
Expand All @@ -82,6 +87,36 @@ def self.relocation_function
rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
end

state :relocations do
rule %r/%(\w+)\b/ do |m|
if self.class.relocation_function.include?(m[1])
token Name::Builtin
else
token Text
end
end
end

# Registers, keywords, variables and operators.
state :words_and_operators do
# Register names, keywords
rule %r/([\w.]+)\b/ do |m|
if self.class.register.include?(m[1])
token Name::Constant
elsif self.class.other_keyword.include?(m[1])
token Name::Constant
else
token Name::Variable
end
end

# Variables.
rule %r/\\?[\$\w]+/, Name::Variable

# Operators
rule %r/[-~*\/%<>|&\^!+(),]/, Operator
end

state :root do
# Preprocessor directive. Awkwardly these are the same as single line comments.
# It seems like GCC will silently ignore unknown directives so that comments
Expand All @@ -91,7 +126,17 @@ def self.relocation_function
#
# Then it will silently ignore it!
#
rule %r/^[ \t]*#[ \t]*(:?#{RiscvAsm.preproc_directive.join('|')})\b/, Comment::Preproc, :preprocessor_directive
# [ \t] is used here to avoid matching `#\nfoo`.
rule %r/^\s*#[ \t]*(\w+)\b/ do |m|
if self.class.preproc_directive.include?(m[1])
token Comment::Preproc
push :preprocessor_directive
else
token Comment::Single
# Match the rest of the line as a comment too.
push :in_single_line_comment
end
end

mixin :comments_and_whitespace

Expand All @@ -110,44 +155,45 @@ def self.relocation_function

state :preprocessor_directive do
mixin :comments_and_whitespace
mixin :literals

# Escaped newline. This is one case where you can't parse
# .S and .s the same - if you try to escape a newline in a
# preprocessor directive in .S it will work but in .s it
# will be ignored. Here we assume .S.
rule %r/\\\n/, Text

rule %r/./, Text
rule %r/\n/, Text, :pop!

mixin :literals
mixin :relocations
mixin :words_and_operators

rule %r/./, Text
end

state :directive do
mixin :comments_and_whitespace

rule %r/\n/, Text, :pop!

mixin :literals
mixin :relocations
mixin :words_and_operators

rule %r/./, Text
rule %r/\n/, Text, :pop!
end

state :args do
mixin :comments_and_whitespace
mixin :literals

# End of instruction.
rule %r/[;\n]/, Text::Whitespace, :pop!

# Register names.
rule %r/(?:#{RiscvAsm.register.join('|')})\b/, Name::Constant
# Other keywords.
rule %r/(?:#{RiscvAsm.other_keyword.join('|')})\b/, Name::Constant
# Relocations
rule %r/%(?:#{RiscvAsm.relocation_function.join('|')})\b/, Name::Builtin
mixin :literals
mixin :relocations
mixin :words_and_operators

# Operators
rule %r/[-~*\/%<>|&\^!+(),]/, Operator
# Variables.
rule %r/\\?[\$\w]+/, Name::Variable
rule %r/./, Text
end
end
end
Expand Down