@@ -228,18 +228,73 @@ public static TreeSet<String> getAllKeywordsUsingRegex(File file) throws IOExcep
228
228
Matcher tokenBlockmatcher = tokenBlockPattern .matcher (content );
229
229
while (tokenBlockmatcher .find ()) {
230
230
String tokenBlock = tokenBlockmatcher .group (0 );
231
- Matcher tokenStringValueMatcher = tokenStringValuePattern .matcher (tokenBlock );
232
- while (tokenStringValueMatcher .find ()) {
233
- String tokenValue = tokenStringValueMatcher .group (1 );
234
- // test if pure US-ASCII
235
- if (CHARSET_ENCODER .canEncode (tokenValue ) && tokenValue .matches ("[A-Za-z]+" )) {
236
- allKeywords .add (tokenValue );
231
+ // remove single and multiline comments
232
+ tokenBlock = tokenBlock .replaceAll ("(?sm)((\\ /\\ *.*?\\ *\\ /)|(\\ /\\ /.*?$))" , "" );
233
+ for (String tokenDefinition : getTokenDefinitions (tokenBlock )) {
234
+ // check if token definition is private
235
+ if (tokenDefinition .matches ("(?sm)^<\\ s*[^#].*" )) {
236
+ Matcher tokenStringValueMatcher = tokenStringValuePattern .matcher (tokenDefinition );
237
+ while (tokenStringValueMatcher .find ()) {
238
+ String tokenValue = tokenStringValueMatcher .group (1 );
239
+ // test if pure US-ASCII
240
+ if (CHARSET_ENCODER .canEncode (tokenValue ) && tokenValue .matches ("\\ w+" )) {
241
+ allKeywords .add (tokenValue );
242
+ }
243
+ }
237
244
}
238
245
}
239
246
}
240
247
return allKeywords ;
241
248
}
242
249
250
+ @ SuppressWarnings ({"PMD.EmptyWhileStmt" })
251
+ private static List <String > getTokenDefinitions (String tokenBlock ) {
252
+ List <String > tokenDefinitions = new ArrayList <>();
253
+ int level = 0 ;
254
+ char openChar = '<' ;
255
+ char closeChar = '>' ;
256
+ char [] tokenBlockChars = tokenBlock .toCharArray ();
257
+ int tokenDefinitionStart = -1 ;
258
+ for (int i = 0 ; i < tokenBlockChars .length ; ++i ) {
259
+ if (isQuotationMark (i , tokenBlockChars )) {
260
+ // skip everything inside quotation marks
261
+ while (!isQuotationMark (++i , tokenBlockChars )) {
262
+ // skip until quotation ends
263
+ }
264
+ }
265
+
266
+ char character = tokenBlockChars [i ];
267
+ if (character == openChar ) {
268
+ if (level == 0 ) {
269
+ tokenDefinitionStart = i ;
270
+ }
271
+
272
+ ++level ;
273
+ } else if (character == closeChar ) {
274
+ --level ;
275
+
276
+ if (level == 0 && tokenDefinitionStart >= 0 ) {
277
+ tokenDefinitions .add (tokenBlock .substring (tokenDefinitionStart , i + 1 ));
278
+ tokenDefinitionStart = -1 ;
279
+ }
280
+ }
281
+ }
282
+
283
+ return tokenDefinitions ;
284
+ }
285
+
286
+ private static boolean isQuotationMark (int index , char [] str ) {
287
+ if (str [index ] == '\"' ) {
288
+ // check if quotation is escaped
289
+ if (index > 0 && str [index - 1 ] == '\\' ) {
290
+ return index > 1 && str [index - 2 ] == '\\' ;
291
+ }
292
+
293
+ return true ;
294
+ }
295
+
296
+ return false ;
297
+ }
243
298
244
299
public static void buildGrammarForRelObjectNameWithoutValue (File file ) throws Exception {
245
300
Pattern methodBlockPattern = Pattern .compile (
0 commit comments