最新消息:20210816 当前crifan.com域名已被污染,为防止失联,请关注(页面右下角的)公众号

【未解决】antlr中setText()在递归嵌套的token中无效

ANTLR crifan 2560浏览 0评论

【问题】

目标是:

希望实现,对于包含宏依赖的内容,也可以正确替换。

即:

#define A B

#define B C

A

可以替换为C,而不是B。

其中的一种变体是,对于:

#define PV_UPPER_RANGE_VALUE    position_upper_range_value

#define GET_DEV_VAR_VALUE(a,b)        get_dev_var_value(a,0,b)

GET_DEV_VAR_VALUE(" Upper travel range: \n ", PV_UPPER_RANGE_VALUE);

希望可以替换为:

get_dev_var_value(" Upper travel range: \n ", 0, position_upper_range_value);

但是现在只能实现替换为:

get_dev_var_value(" Upper travel range: \n ",0, PV_UPPER_RANGE_VALUE);

 

目前语法代码为:

grammar preprocess;
//lexer grammar preprocess;

options{
	language=Java;
	output = AST;
}

......

COMMENT
    :   ('//' ~('\n'|'\r')* '\r'? '\n') {skip();}
    |   ('/*' ( options {greedy=false;} : . )* '*/') {skip();}
    ;

// and lexer rule
INCLUDE    :    '#include' (WS)? f=STRING 
{
    String name = f.getText();
    name = name.substring(1,name.length()-1);
    try {
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        //SaveStruct ss = new SaveStruct(input, globalDefineMap, globalDefineArgsMap);
        includes.push(ss);
 
        // switch on new input stream
        setCharStream(new ANTLRFileStream(name));
        reset();

    } catch(Exception fnf) { throw new Error("Cannot open file " + name); }
};
/*
fragment
NON_CR_LF	:	~('\r'|'\n');

fragment
TAB_SPACE
	:	(' ' | '\t');
*/

//DIRECTIVE 	:	('#define' WS* defineMacro=ID WS* defineText=STRING)
//DIRECTIVE 	:	('#define' WS* defineMacro=ID WS* defineText=( NON_CR_LF+ | (NON_CR_LF* (TAB_SPACE+ '\\' '\r'? '\n' NON_CR_LF+)*) ) )

fragment
//MACRO_TEXT :    ( (('\\'){skip();System.out.println("skip line tail back slash");} '\r'? '\n')
//MACRO_TEXT :    ( ('\\'{$channel=HIDDEN;System.out.println("set back slash to hidden");} '\r'? '\n')
//MACRO_TEXT :    ( (('\\'){setText("");System.out.println("set back slash to empty");} '\r'? '\n')
MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n' | '\r')))*;
//MACRO_TEXT :    ( ('\\' '\r'? '\n') | (~('\r'|'\n')))* -> ( ('\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\'{setText("");} '\r'? '\n') | (~('\r'|'\n')))*;
/*
MACRO_TEXT :    ((('\\' '\r'? '\n') | (~('\r'|'\n')))*)
	{
		String origMultiLineStr = getText();
		String newMultiLineStr = origMultiLineStr.replace("\\", "");
		setText(newMultiLineStr);
	};
*/
//MACRO_TEXT :    ( (('\\' '\r'? '\n')=>('\r' '\n')) | (~('\r'|'\n')))*;


DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
@after{
//SETTEXT(GETTEXT()->substring(GETTEXT(),1,GETTEXT()->len-1))
//String processedTokenStr = GETTEXT();
//String processedTokenStr = state.text;
//String processedTokenStr = getText();
//String processedTokenStr = this.getText();
//System.out.println("after process, whole token string is" + processedTokenStr);
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER_CONTAIN_POINT
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? (defineArg0=RAW_IDENTIFIER_CONTAIN_POINT (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);})?
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER_CONTAIN_POINT (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        globalDefineMap.put(defineId, args );
        skip();
        
        // //process the define content, to check whether it contain the previous define
        // //if yes, then process it

        // // save current lexer's state
        // SaveStruct ss = new SaveStruct(input);
        // //SaveStruct ss = new SaveStruct(input, globalDefineMap, globalDefineArgsMap);
        // includes.push(ss);

        // // switch on new input stream
        // setCharStream(new ANTLRStringStream(definedContent));
        // reset();
        
        // // isReplacingDefineContent = true;
    }
    )
    /*
    {
        //process the define content, to check whether it contain the previous define
        //if yes, then process it

        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
        // switch on new input stream
        setCharStream(new ANTLRStringStream(definedContent));
        reset();
        
        //after replacement
        //update the define map -> replace to the replaced text
        String processedDefineContent = macroText.getText();
        args.set(0, processedDefineContent);
        globalDefineMap.put(defineId, args );
        skip();
    }
    */;

IDENTIFIER
@init{
    List define = new ArrayList();
    List foundArgs = new ArrayList();
    
    String callArg0Text = "";
    String callArg1Text = "";
}
@after{
//String curCallParaText = getText();
//	if(foundArgs.size() == 0)
//	{
//		//remove () if no para
//		setText("");
//	}
} :
    identifier=RAW_IDENTIFIER_CONTAIN_POINT
    {
        String IdText = (String)identifier.getText();
    
        // see if this is a macro argument
        define = (List)globalDefineArgsMap.get(IdText);
        // while(define != null)
        // {
            // //if define not null, then find recursively to get para's real value
            // String firstParaValue = (String)define.get(0);
            // if(globalDefineArgsMap.containsKey(firstParaValue))
            // {
                // define = (List)globalDefineArgsMap.get(firstParaValue);	
            // }
            // else
            // {
                // break;
            // }
        // }
        
        if (define==null) {
            // see if this is a macro call
            define = (List)globalDefineMap.get(IdText);
            
            //System.out.println("normal define call=" + IdText);
        }
        else
        {
            //is define args replacement
            //isReplacingDefineContent = true;

            //System.out.println("normal define args call=" + (String)define.get(0));
        }
    }
    ( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0=EXPR
        {
            callArg0Text = callArg0.getText(); 
            //foundArgs.add(callArg0Text);
            //maybe whitespace, so need trim here
            if(!callArg0Text.isEmpty() && !callArg0Text.trim().isEmpty())
            {
            	foundArgs.add(callArg0Text);
            }
        }
        ( COMMA callArg1=EXPR 
        {
            callArg1Text = callArg1.getText();
            //foundArgs.add(callArg1Text);
            //maybe whitespace, so need trim here
            if(!callArg1Text.isEmpty() && !callArg1Text.trim().isEmpty())
            {
            	foundArgs.add(callArg1Text);
            }
        }
        )*
        { foundArgs.size()==define.size()-1 }? // better have right amount
        //must add WS support here, otherwise not support none-para but use whitespace within (), such as: (   )
        
        ')'
    | {(define!=null) && (define.size()==1)}?=> '(' WS* ')'
    | {!((define!=null) && (define.size()>1))}?=>
    )
{
if (define!=null) {
    String curText = getText();
    String defineText = (String)define.get(0);

    if (define.size()==1) {
        //only have one value in list 
        //-> the defineText is the define para content, or is no-para define replacement
        //-> just need replace directly
        setText(defineText);
    } else {
        //add new dict pair: (para, call value)
        for (int i=0;i<foundArgs.size();++i) {
            // treat macro arguments similar to local defines
            List arg = new ArrayList();
            arg.add((String)foundArgs.get(i));
            globalDefineArgsMap.put( (String)define.get(1+i), arg );
        }

        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        //SaveStruct ss = new SaveStruct(input, globalDefineMap, globalDefineArgsMap);
        includes.push(ss);

        // switch on new input stream
        setCharStream(new ANTLRStringStream(defineText));
        reset();
    }
}
};

fragment RAW_IDENTIFIER_CONTAIN_POINT : RAW_IDENTIFIER (POINT RAW_IDENTIFIER)?;

fragment RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;

......

fragment EXPR // allow just about anything without being ambiguous
    : (WS)? (NUMBER|IDENTIFIER)?
            (
                        ( LEFT EXPR ( COMMA EXPR )* RIGHT
            | STRING
            | OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
            )
            EXPR
        )?
    ;

......

生成lexer代码为:

	// $ANTLR start "IDENTIFIER"
	public final void mIDENTIFIER() throws RecognitionException {
		try {
			int _type = IDENTIFIER;
			int _channel = DEFAULT_TOKEN_CHANNEL;
			CommonToken identifier=null;
			CommonToken callArg0=null;
			CommonToken callArg1=null;


			    List define = new ArrayList();
			    List foundArgs = new ArrayList();
			    
			    String callArg0Text = "";
			    String callArg1Text = "";

			// D:\\DevRoot\\IndustrialMobileAutomation\\HandheldDataSetter\\ANTLR\\projects\\v1.5\\HartEddlParser_local_TFS\\preprocess\\remove_comment\\preprocess.g:231:3: (identifier= RAW_IDENTIFIER_CONTAIN_POINT ({...}? => ( WS | COMMENT )? '(' callArg0= EXPR ( COMMA callArg1= EXPR )* {...}? ')' |{...}? => '(' ( WS )* ')' |{...}? =>) )
			// D:\\DevRoot\\IndustrialMobileAutomation\\HandheldDataSetter\\ANTLR\\projects\\v1.5\\HartEddlParser_local_TFS\\preprocess\\remove_comment\\preprocess.g:232:5: identifier= RAW_IDENTIFIER_CONTAIN_POINT ({...}? => ( WS | COMMENT )? '(' callArg0= EXPR ( COMMA callArg1= EXPR )* {...}? ')' |{...}? => '(' ( WS )* ')' |{...}? =>)
			{
			int identifierStart467 = getCharIndex();
			int identifierStartLine467 = getLine();
			int identifierStartCharPos467 = getCharPositionInLine();
			mRAW_IDENTIFIER_CONTAIN_POINT(); 
			identifier = new CommonToken(input, Token.INVALID_TOKEN_TYPE, Token.DEFAULT_CHANNEL, identifierStart467, getCharIndex()-1);
			identifier.setLine(identifierStartLine467);
			identifier.setCharPositionInLine(identifierStartCharPos467);


			        String IdText = (String)identifier.getText();
			    
			        // see if this is a macro argument
			        define = (List)globalDefineArgsMap.get(IdText);
			        // while(define != null)
			        // {
			            // //if define not null, then find recursively to get para's real value
			            // String firstParaValue = (String)define.get(0);
			            // if(globalDefineArgsMap.containsKey(firstParaValue))
			            // {
			                // define = (List)globalDefineArgsMap.get(firstParaValue);	
			            // }
			            // else
			            // {
			                // break;
			            // }
			        // }
			        
			        if (define==null) {
			            // see if this is a macro call
			            define = (List)globalDefineMap.get(IdText);
			            
			            //System.out.println("normal define call=" + IdText);
			        }
			        else
			        {
			            //is define args replacement
			            //isReplacingDefineContent = true;

			            //System.out.println("normal define args call=" + (String)define.get(0));
			        }
			    
......

					match('('); 
					int callArg0Start520 = getCharIndex();
					int callArg0StartLine520 = getLine();
					int callArg0StartCharPos520 = getCharPositionInLine();
					mEXPR(); 
					callArg0 = new CommonToken(input, Token.INVALID_TOKEN_TYPE, Token.DEFAULT_CHANNEL, callArg0Start520, getCharIndex()-1);
					callArg0.setLine(callArg0StartLine520);
					callArg0.setCharPositionInLine(callArg0StartCharPos520);


					            callArg0Text = callArg0.getText(); 
					            //foundArgs.add(callArg0Text);
					            //maybe whitespace, so need trim here
					            if(!callArg0Text.isEmpty() && !callArg0Text.trim().isEmpty())
					            {
					            	foundArgs.add(callArg0Text);
					            }
					        

然后经过调试发现,对于:

拿到了GET_DEV_VAR_VALUE中的PV_UPPER_RANGE_VALUE,已经去调用了对应的

mEXPR();

然后可以将PV_UPPER_RANGE_VALUE替换为想要的position_upper_range_value了,但是后面的:

callArg0Text = callArg0.getText(); 

依然得到的是PV_UPPER_RANGE_VALUE

即,对于:

	// $ANTLR start "EXPR"
	public final void mEXPR() throws RecognitionException {
		try {
			......
				case 2 :
					// D:\\DevRoot\\IndustrialMobileAutomation\\HandheldDataSetter\\ANTLR\\projects\\v1.5\\HartEddlParser_local_TFS\\preprocess\\remove_comment\\preprocess.g:345:21: IDENTIFIER
					{
					mIDENTIFIER(); 

					}
					break;

			}

中的mEXPR,其中调用的mIDENTIFIER();中的setText,根本就没生效。

无法返回被setText替换后的内容。

即问题转化为:

在递归嵌套调用的token中的setText,无法将替换后的值,传递给最上层,setText不工作,无效。

 

【解决过程】

1.参考:

ANTLR replace tokens in a recursive manner

说是将token改为rule,就可以了。

但是,我此处,不希望用到rule,还是希望用token本身。

2.待续。。。

 

【总结】

转载请注明:在路上 » 【未解决】antlr中setText()在递归嵌套的token中无效

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址
82 queries in 0.187 seconds, using 22.21MB memory