|
1 |
module CodeRay
|
|
2 |
module Scanners
|
|
3 |
class Java < Scanner
|
|
4 |
|
|
5 |
register_for :java
|
|
6 |
|
|
7 |
RESERVED_WORDS = %w(abstract assert break case catch class
|
|
8 |
const continue default do else enum extends final finally for
|
|
9 |
goto if implements import instanceof interface native new
|
|
10 |
package private protected public return static strictfp super switch
|
|
11 |
synchronized this throw throws transient try void volatile while)
|
|
12 |
|
|
13 |
PREDEFINED_TYPES = %w(boolean byte char double float int long short)
|
|
14 |
|
|
15 |
PREDEFINED_CONSTANTS = %w(true false null)
|
|
16 |
|
|
17 |
IDENT_KIND = WordList.new(:ident).
|
|
18 |
add(RESERVED_WORDS, :reserved).
|
|
19 |
add(PREDEFINED_TYPES, :pre_type).
|
|
20 |
add(PREDEFINED_CONSTANTS, :pre_constant)
|
|
21 |
|
|
22 |
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
|
23 |
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
|
24 |
|
|
25 |
def scan_tokens tokens, options
|
|
26 |
state = :initial
|
|
27 |
|
|
28 |
until eos?
|
|
29 |
kind = nil
|
|
30 |
match = nil
|
|
31 |
|
|
32 |
case state
|
|
33 |
when :initial
|
|
34 |
|
|
35 |
if scan(/ \s+ | \\\n /x)
|
|
36 |
kind = :space
|
|
37 |
|
|
38 |
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
|
39 |
kind = :comment
|
|
40 |
|
|
41 |
elsif match = scan(/ \# \s* if \s* 0 /x)
|
|
42 |
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
|
43 |
kind = :comment
|
|
44 |
|
|
45 |
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
|
|
46 |
kind = :operator
|
|
47 |
|
|
48 |
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
|
49 |
kind = IDENT_KIND[match]
|
|
50 |
if kind == :ident and check(/:(?!:)/)
|
|
51 |
match << scan(/:/)
|
|
52 |
kind = :label
|
|
53 |
end
|
|
54 |
|
|
55 |
elsif match = scan(/L?"/)
|
|
56 |
tokens << [:open, :string]
|
|
57 |
if match[0] == ?L
|
|
58 |
tokens << ['L', :modifier]
|
|
59 |
match = '"'
|
|
60 |
end
|
|
61 |
state = :string
|
|
62 |
kind = :delimiter
|
|
63 |
|
|
64 |
elsif scan(%r! \@ .* !x)
|
|
65 |
kind = :preprocessor
|
|
66 |
|
|
67 |
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
|
68 |
kind = :char
|
|
69 |
|
|
70 |
elsif scan(/0[xX][0-9A-Fa-f]+/)
|
|
71 |
kind = :hex
|
|
72 |
|
|
73 |
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
|
74 |
kind = :oct
|
|
75 |
|
|
76 |
elsif scan(/(?:\d+)(?![.eEfF])/)
|
|
77 |
kind = :integer
|
|
78 |
|
|
79 |
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
|
80 |
kind = :float
|
|
81 |
|
|
82 |
else
|
|
83 |
getch
|
|
84 |
kind = :error
|
|
85 |
|
|
86 |
end
|
|
87 |
|
|
88 |
when :string
|
|
89 |
if scan(/[^\\\n"]+/)
|
|
90 |
kind = :content
|
|
91 |
elsif scan(/"/)
|
|
92 |
tokens << ['"', :delimiter]
|
|
93 |
tokens << [:close, :string]
|
|
94 |
state = :initial
|
|
95 |
next
|
|
96 |
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
|
97 |
kind = :char
|
|
98 |
elsif scan(/ \\ | $ /x)
|
|
99 |
tokens << [:close, :string]
|
|
100 |
kind = :error
|
|
101 |
state = :initial
|
|
102 |
else
|
|
103 |
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
|
|
104 |
end
|
|
105 |
|
|
106 |
else
|
|
107 |
raise_inspect 'Unknown state', tokens
|
|
108 |
|
|
109 |
end
|
|
110 |
|
|
111 |
match ||= matched
|
|
112 |
if $DEBUG and not kind
|
|
113 |
raise_inspect 'Error token %p in line %d' %
|
|
114 |
[[match, kind], line], tokens
|
|
115 |
end
|
|
116 |
raise_inspect 'Empty token', tokens unless match
|
|
117 |
|
|
118 |
tokens << [match, kind]
|
|
119 |
|
|
120 |
end
|
|
121 |
|
|
122 |
if state == :string
|
|
123 |
tokens << [:close, :string]
|
|
124 |
end
|
|
125 |
|
|
126 |
tokens
|
|
127 |
end
|
|
128 |
end
|
|
129 |
end
|
|
130 |
end
|
0 |
131 |
|
Add a simple java scanner.