最新消息:20210816 当前crifan.com域名已被污染,为防止失联,请关注(页面右下角的)公众号

【未解决】antlr中setText()在递归嵌套的token中无效

ANTLR crifan 2644浏览 0评论

【问题】

目标是:

希望实现,对于包含宏依赖的内容,也可以正确替换。

即:

#define A B

#define B C

A

可以替换为C,而不是B。

其中的一种变体是,对于:

#define PV_UPPER_RANGE_VALUE    position_upper_range_value

#define GET_DEV_VAR_VALUE(a,b)        get_dev_var_value(a,0,b)

GET_DEV_VAR_VALUE(" Upper travel range: \n ", PV_UPPER_RANGE_VALUE);

希望可以替换为:

get_dev_var_value(" Upper travel range: \n ", 0, position_upper_range_value);

但是现在只能实现替换为:

get_dev_var_value(" Upper travel range: \n ",0, PV_UPPER_RANGE_VALUE);

 

目前语法代码为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
grammar preprocess;
//lexer grammar preprocess;
 
options{
    language=Java;
    output = AST;
}
 
......
 
COMMENT
    :   ('//' ~('\n'|'\r')* '\r'? '\n') {skip();}
    |   ('/*' ( options {greedy=false;} : . )* '*/') {skip();}
    ;
 
// and lexer rule
INCLUDE    :    '#include' (WS)? f=STRING
{
    String name = f.getText();
    name = name.substring(1,name.length()-1);
    try {
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        //SaveStruct ss = new SaveStruct(input, globalDefineMap, globalDefineArgsMap);
        includes.push(ss);
  
        // switch on new input stream
        setCharStream(new ANTLRFileStream(name));
        reset();
 
    } catch(Exception fnf) { throw new Error("Cannot open file " + name); }
};
/*
fragment
NON_CR_LF   :   ~('\r'|'\n');
 
fragment
TAB_SPACE
    :   (' ' | '\t');
*/
 
//DIRECTIVE     :   ('#define' WS* defineMacro=ID WS* defineText=STRING)
//DIRECTIVE     :   ('#define' WS* defineMacro=ID WS* defineText=( NON_CR_LF+ | (NON_CR_LF* (TAB_SPACE+ '\\' '\r'? '\n' NON_CR_LF+)*) ) )
 
fragment
//MACRO_TEXT :    ( (('\\'){skip();System.out.println("skip line tail back slash");} '\r'? '\n')
//MACRO_TEXT :    ( ('\\'{$channel=HIDDEN;System.out.println("set back slash to hidden");} '\r'? '\n')
//MACRO_TEXT :    ( (('\\'){setText("");System.out.println("set back slash to empty");} '\r'? '\n')
MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n' | '\r')))*;
//MACRO_TEXT :    ( ('\\' '\r'? '\n') | (~('\r'|'\n')))* -> ( ('\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\'{setText("");} '\r'? '\n') | (~('\r'|'\n')))*;
/*
MACRO_TEXT :    ((('\\' '\r'? '\n') | (~('\r'|'\n')))*)
    {
        String origMultiLineStr = getText();
        String newMultiLineStr = origMultiLineStr.replace("\\", "");
        setText(newMultiLineStr);
    };
*/
//MACRO_TEXT :    ( (('\\' '\r'? '\n')=>('\r' '\n')) | (~('\r'|'\n')))*;
 
 
DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
     
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
@after{
//SETTEXT(GETTEXT()->substring(GETTEXT(),1,GETTEXT()->len-1))
//String processedTokenStr = GETTEXT();
//String processedTokenStr = state.text;
//String processedTokenStr = getText();
//String processedTokenStr = this.getText();
//System.out.println("after process, whole token string is" + processedTokenStr);
}
:   ('#define' WS* defineMacro=RAW_IDENTIFIER_CONTAIN_POINT
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? (defineArg0=RAW_IDENTIFIER_CONTAIN_POINT (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);})?
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER_CONTAIN_POINT (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        globalDefineMap.put(defineId, args );
        skip();
         
        // //process the define content, to check whether it contain the previous define
        // //if yes, then process it
 
        // // save current lexer's state
        // SaveStruct ss = new SaveStruct(input);
        // //SaveStruct ss = new SaveStruct(input, globalDefineMap, globalDefineArgsMap);
        // includes.push(ss);
 
        // // switch on new input stream
        // setCharStream(new ANTLRStringStream(definedContent));
        // reset();
         
        // // isReplacingDefineContent = true;
    }
    )
    /*
    {
        //process the define content, to check whether it contain the previous define
        //if yes, then process it
 
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
        // switch on new input stream
        setCharStream(new ANTLRStringStream(definedContent));
        reset();
         
        //after replacement
        //update the define map -> replace to the replaced text
        String processedDefineContent = macroText.getText();
        args.set(0, processedDefineContent);
        globalDefineMap.put(defineId, args );
        skip();
    }
    */;
 
IDENTIFIER
@init{
    List define = new ArrayList();
    List foundArgs = new ArrayList();
     
    String callArg0Text = "";
    String callArg1Text = "";
}
@after{
//String curCallParaText = getText();
//  if(foundArgs.size() == 0)
//  {
//      //remove () if no para
//      setText("");
//  }
} :
    identifier=RAW_IDENTIFIER_CONTAIN_POINT
    {
        String IdText = (String)identifier.getText();
     
        // see if this is a macro argument
        define = (List)globalDefineArgsMap.get(IdText);
        // while(define != null)
        // {
            // //if define not null, then find recursively to get para's real value
            // String firstParaValue = (String)define.get(0);
            // if(globalDefineArgsMap.containsKey(firstParaValue))
            // {
                // define = (List)globalDefineArgsMap.get(firstParaValue); 
            // }
            // else
            // {
                // break;
            // }
        // }
         
        if (define==null) {
            // see if this is a macro call
            define = (List)globalDefineMap.get(IdText);
             
            //System.out.println("normal define call=" + IdText);
        }
        else
        {
            //is define args replacement
            //isReplacingDefineContent = true;
 
            //System.out.println("normal define args call=" + (String)define.get(0));
        }
    }
    ( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0=EXPR
        {
            callArg0Text = callArg0.getText();
            //foundArgs.add(callArg0Text);
            //maybe whitespace, so need trim here
            if(!callArg0Text.isEmpty() && !callArg0Text.trim().isEmpty())
            {
                foundArgs.add(callArg0Text);
            }
        }
        ( COMMA callArg1=EXPR
        {
            callArg1Text = callArg1.getText();
            //foundArgs.add(callArg1Text);
            //maybe whitespace, so need trim here
            if(!callArg1Text.isEmpty() && !callArg1Text.trim().isEmpty())
            {
                foundArgs.add(callArg1Text);
            }
        }
        )*
        { foundArgs.size()==define.size()-1 }? // better have right amount
        //must add WS support here, otherwise not support none-para but use whitespace within (), such as: (   )
         
        ')'
    | {(define!=null) && (define.size()==1)}?=> '(' WS* ')'
    | {!((define!=null) && (define.size()>1))}?=>
    )
{
if (define!=null) {
    String curText = getText();
    String defineText = (String)define.get(0);
 
    if (define.size()==1) {
        //only have one value in list
        //-> the defineText is the define para content, or is no-para define replacement
        //-> just need replace directly
        setText(defineText);
    } else {
        //add new dict pair: (para, call value)
        for (int i=0;i<foundArgs.size();++i) {
            // treat macro arguments similar to local defines
            List arg = new ArrayList();
            arg.add((String)foundArgs.get(i));
            globalDefineArgsMap.put( (String)define.get(1+i), arg );
        }
 
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        //SaveStruct ss = new SaveStruct(input, globalDefineMap, globalDefineArgsMap);
        includes.push(ss);
 
        // switch on new input stream
        setCharStream(new ANTLRStringStream(defineText));
        reset();
    }
}
};
 
fragment RAW_IDENTIFIER_CONTAIN_POINT : RAW_IDENTIFIER (POINT RAW_IDENTIFIER)?;
 
fragment RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
 
......
 
fragment EXPR // allow just about anything without being ambiguous
    : (WS)? (NUMBER|IDENTIFIER)?
            (
                        ( LEFT EXPR ( COMMA EXPR )* RIGHT
            | STRING
            | OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
            )
            EXPR
        )?
    ;
 
......

生成lexer代码为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
    // $ANTLR start "IDENTIFIER"
    public final void mIDENTIFIER() throws RecognitionException {
        try {
            int _type = IDENTIFIER;
            int _channel = DEFAULT_TOKEN_CHANNEL;
            CommonToken identifier=null;
            CommonToken callArg0=null;
            CommonToken callArg1=null;
 
 
                List define = new ArrayList();
                List foundArgs = new ArrayList();
                 
                String callArg0Text = "";
                String callArg1Text = "";
 
            // D:\\DevRoot\\IndustrialMobileAutomation\\HandheldDataSetter\\ANTLR\\projects\\v1.5\\HartEddlParser_local_TFS\\preprocess\\remove_comment\\preprocess.g:231:3: (identifier= RAW_IDENTIFIER_CONTAIN_POINT ({...}? => ( WS | COMMENT )? '(' callArg0= EXPR ( COMMA callArg1= EXPR )* {...}? ')' |{...}? => '(' ( WS )* ')' |{...}? =>) )
            // D:\\DevRoot\\IndustrialMobileAutomation\\HandheldDataSetter\\ANTLR\\projects\\v1.5\\HartEddlParser_local_TFS\\preprocess\\remove_comment\\preprocess.g:232:5: identifier= RAW_IDENTIFIER_CONTAIN_POINT ({...}? => ( WS | COMMENT )? '(' callArg0= EXPR ( COMMA callArg1= EXPR )* {...}? ')' |{...}? => '(' ( WS )* ')' |{...}? =>)
            {
            int identifierStart467 = getCharIndex();
            int identifierStartLine467 = getLine();
            int identifierStartCharPos467 = getCharPositionInLine();
            mRAW_IDENTIFIER_CONTAIN_POINT();
            identifier = new CommonToken(input, Token.INVALID_TOKEN_TYPE, Token.DEFAULT_CHANNEL, identifierStart467, getCharIndex()-1);
            identifier.setLine(identifierStartLine467);
            identifier.setCharPositionInLine(identifierStartCharPos467);
 
 
                    String IdText = (String)identifier.getText();
                 
                    // see if this is a macro argument
                    define = (List)globalDefineArgsMap.get(IdText);
                    // while(define != null)
                    // {
                        // //if define not null, then find recursively to get para's real value
                        // String firstParaValue = (String)define.get(0);
                        // if(globalDefineArgsMap.containsKey(firstParaValue))
                        // {
                            // define = (List)globalDefineArgsMap.get(firstParaValue); 
                        // }
                        // else
                        // {
                            // break;
                        // }
                    // }
                     
                    if (define==null) {
                        // see if this is a macro call
                        define = (List)globalDefineMap.get(IdText);
                         
                        //System.out.println("normal define call=" + IdText);
                    }
                    else
                    {
                        //is define args replacement
                        //isReplacingDefineContent = true;
 
                        //System.out.println("normal define args call=" + (String)define.get(0));
                    }
                 
......
 
                    match('(');
                    int callArg0Start520 = getCharIndex();
                    int callArg0StartLine520 = getLine();
                    int callArg0StartCharPos520 = getCharPositionInLine();
                    mEXPR();
                    callArg0 = new CommonToken(input, Token.INVALID_TOKEN_TYPE, Token.DEFAULT_CHANNEL, callArg0Start520, getCharIndex()-1);
                    callArg0.setLine(callArg0StartLine520);
                    callArg0.setCharPositionInLine(callArg0StartCharPos520);
 
 
                                callArg0Text = callArg0.getText();
                                //foundArgs.add(callArg0Text);
                                //maybe whitespace, so need trim here
                                if(!callArg0Text.isEmpty() && !callArg0Text.trim().isEmpty())
                                {
                                    foundArgs.add(callArg0Text);
                                }
                    

然后经过调试发现,对于:

拿到了GET_DEV_VAR_VALUE中的PV_UPPER_RANGE_VALUE,已经去调用了对应的

1
mEXPR();

然后可以将PV_UPPER_RANGE_VALUE替换为想要的position_upper_range_value了,但是后面的:

1
callArg0Text = callArg0.getText();

依然得到的是PV_UPPER_RANGE_VALUE

即,对于:

1
2
3
4
5
6
7
8
9
10
11
12
13
// $ANTLR start "EXPR"
public final void mEXPR() throws RecognitionException {
    try {
        ......
            case 2 :
                // D:\\DevRoot\\IndustrialMobileAutomation\\HandheldDataSetter\\ANTLR\\projects\\v1.5\\HartEddlParser_local_TFS\\preprocess\\remove_comment\\preprocess.g:345:21: IDENTIFIER
                {
                mIDENTIFIER();
 
                }
                break;
 
        }

中的mEXPR,其中调用的mIDENTIFIER();中的setText,根本就没生效。

无法返回被setText替换后的内容。

即问题转化为:

在递归嵌套调用的token中的setText,无法将替换后的值,传递给最上层,setText不工作,无效。

 

【解决过程】

1.参考:

ANTLR replace tokens in a recursive manner

说是将token改为rule,就可以了。

但是,我此处,不希望用到rule,还是希望用token本身。

2.待续。。。

 

【总结】

转载请注明:在路上 » 【未解决】antlr中setText()在递归嵌套的token中无效

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址
82 queries in 0.689 seconds, using 22.29MB memory