最新消息:20210816 当前crifan.com域名已被污染,为防止失联,请关注(页面右下角的)公众号

【未解决】antlr实现#define的宏的多层(递归)替换后,去不掉宏的内容

ANTLR crifan 2531浏览 0评论

【问题】

antlr v3,用代码:

grammar preprocess;
//lexer grammar preprocess;

options{
	language=Java;
	output = AST;
}

@lexer::header {
//package com.mm.antlrv3demo;

import java.io.*;
import java.util.*;
}

@parser::header {
//package com.mm.antlrv3demo;
}

@lexer::members {
    //public static TokenStreamSelector selector; // must be assigned externally
    protected static Integer ifState = 1; // -1: no-else false, 0:false, 1: true
    protected static List ifStates = new ArrayList(); // holds nested if conditions
    protected static Map defines = new Hashtable(); // holds the defines
    protected Map defineArgs = new Hashtable(); // holds the args for a macro call
    /*
    public void uponEOF() throws TokenStreamException, CharStreamException {
        try {
            selector.pop(); // return to old lexer/stream
            selector.retry();
        } catch (NoSuchElementException e) {
            // return a real EOF if nothing in stack
        }
    }
    */
	
	class SaveStruct {
      SaveStruct(CharStream input){
        this.input = input;
        this.marker = input.mark();
      }
      public CharStream input;
      public int marker;
     }
     Stack<SaveStruct> includes = new Stack<SaveStruct>();
     
	// class SaveStruct_defines {
      // SaveStruct(CharStream input){
        // this.input = input;
        // this.marker = input.mark();
      // }
      // public CharStream input;
      // public int marker;
     // }
     // Stack<SaveStruct_defines> definesSaveStruct = new Stack<SaveStruct_defines>();
 
    // We should override this method for handling EOF of included file
     public Token nextToken(){
       Token token = super.nextToken();
 
       if(token.getType() == Token.EOF && !includes.empty()){
        // We've got EOF and have non empty stack.
         SaveStruct ss = includes.pop();
         setCharStream(ss.input);
         input.rewind(ss.marker);
         //this should be used instead of super [like below] to handle exits from nested includes
         //it matters, when the 'include' token is the last in previous stream (using super, lexer 'crashes' returning EOF token)
         token = this.nextToken();
       }
 
      // Skip first token after switching on another input.
      // You need to use this rather than super as there may be nested include files
       if(((CommonToken)token).getStartIndex() < 0)
         token = this.nextToken();
 
       return token;
     }
}

COMMENT
    :   ('//' ~('\n'|'\r')* '\r'? '\n') {skip();}
    |   ('/*' ( options {greedy=false;} : . )* '*/') {skip();}
    ;

// and lexer rule
INCLUDE    :    '#include' (WS)? f=STRING 
{
    String name = f.getText();
    name = name.substring(1,name.length()-1);
    try {
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
 
        // switch on new input stream
        setCharStream(new ANTLRFileStream(name));
        reset();

    } catch(Exception fnf) { throw new Error("Cannot open file " + name); }
};
/*
fragment
NON_CR_LF	:	~('\r'|'\n');

fragment
TAB_SPACE
	:	(' ' | '\t');
*/

//DIRECTIVE 	:	('#define' WS* defineMacro=ID WS* defineText=STRING)
//DIRECTIVE 	:	('#define' WS* defineMacro=ID WS* defineText=( NON_CR_LF+ | (NON_CR_LF* (TAB_SPACE+ '\\' '\r'? '\n' NON_CR_LF+)*) ) )


fragment
//MACRO_TEXT :    ( (('\\'){skip();System.out.println("skip line tail back slash");} '\r'? '\n')
//MACRO_TEXT :    ( ('\\'{$channel=HIDDEN;System.out.println("set back slash to hidden");} '\r'? '\n')
//MACRO_TEXT :    ( (('\\'){setText("");System.out.println("set back slash to empty");} '\r'? '\n')
MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n' | '\r')))*;
//MACRO_TEXT :    ( ('\\' '\r'? '\n') | (~('\r'|'\n')))* -> ( ('\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\'{setText("");} '\r'? '\n') | (~('\r'|'\n')))*;
/*
MACRO_TEXT :    ((('\\' '\r'? '\n') | (~('\r'|'\n')))*)
	{
		String origMultiLineStr = getText();
		String newMultiLineStr = origMultiLineStr.replace("\\", "");
		setText(newMultiLineStr);
	};
*/
//MACRO_TEXT :    ( (('\\' '\r'? '\n')=>('\r' '\n')) | (~('\r'|'\n')))*;


DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
                
        
	        //process the define content, to check whether it contain the previous define
	        //if yes, then process it
	        
	        // save current lexer's state
	        SaveStruct ss = new SaveStruct(input);
	        includes.push(ss);
	        // switch on new input stream
	        setCharStream(new ANTLRStringStream(definedContent));
	        reset();
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
    }
    );

IDENTIFIER @init{
    List define = new ArrayList();
    List foundArgs = new ArrayList();
    
    String callArg0Text = "";
    String callArg1Text = "";
} :
    identifier=RAW_IDENTIFIER
    {
        // see if this is a macro argument
        define = (List)defineArgs.get(identifier.getText());
        if (define==null) {
            // see if this is a macro call
            define = (List)defines.get(identifier.getText());
        }
    }
    ( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0=EXPR
        {
            callArg0Text = callArg0.getText(); 
            foundArgs.add(callArg0Text);
        }
        ( COMMA callArg1=EXPR 
        {
            callArg1Text = callArg1.getText();
            foundArgs.add(callArg1Text);
        }
        )*
        { foundArgs.size()==define.size()-1 }? // better have right amount
        ')'
    | {!((define!=null) && (define.size()>1))}?=>
    )
{
if (define!=null) {
    String defineText = (String)define.get(0);

    if (define.size()==1) {
        //only have one value in list -> the defineText is the define para content -> just need replace directly
        setText(defineText);
    } else {
        //add new dict pair: (para, call value)
        for (int i=0;i<foundArgs.size();++i) {
            // treat macro arguments similar to local defines
            List arg = new ArrayList();
            arg.add((String)foundArgs.get(i));
            defineArgs.put( (String)define.get(1+i), arg );
        }

        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);

        // switch on new input stream
        setCharStream(new ANTLRStringStream(defineText));
        reset();
    }
}
};

fragment RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;

NUMBER : ('0'..'9') ('0'..'9'|'a'..'z'|'A'..'Z'|'_')* ; // allow ahpha suffixes on numbers (i.e. L:long)

// group symbols into categories to parse EXPR
LEFT  : '(' | '[' | '{' ;
RIGHT : ')' | ']' | '}' ;
COMMA : ',' ;
OPERATOR : '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '`' | '|' | '~' ;


fragment EXPR // allow just about anything without being ambiguous
    : (WS)? (NUMBER|IDENTIFIER)?
            (
                        ( LEFT EXPR ( COMMA EXPR )* RIGHT
            | STRING
            | OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
            )
            EXPR
        )?
    ;

//INT :	'0'..'9'+    ;

FLOAT
    :   ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
    |   '.' ('0'..'9')+ EXPONENT?
    |   ('0'..'9')+ EXPONENT
    ;

WS  :   ( ' '
        | '\t'
        | '\r'
        | '\n'
        ) {$channel=HIDDEN;}
    ;

//RestSymbo	:	'{' | '}' | '&' | ';' | ',' | '+' | '-' | ')' | '(' | '~' | '/' | '`' | '$' | '@' | '%' | '^' | '#' | '\\' ;

STRING
    :  '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
    ;

CHAR:  '\'' ( ESC_SEQ | ~('\''|'\\') ) '\''
    ;

fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;

fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;

fragment
ESC_SEQ
    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    |   UNICODE_ESC
    |   OCTAL_ESC
    ;

fragment
OCTAL_ESC
    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7')
    ;

fragment
UNICODE_ESC
    :   '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
    ;
    
header
	:	include*;
include	:	INCLUDE;//'#include ' '<' ID ('.h' | '.ddl') '>';

 

其中,这部分的代码:

	        //process the define content, to check whether it contain the previous define
	        //if yes, then process it
	        
	        // save current lexer's state
	        SaveStruct ss = new SaveStruct(input);
	        includes.push(ss);
	        // switch on new input stream
	        setCharStream(new ANTLRStringStream(definedContent));
	        reset();

是后来加进去,为了处理多层的宏定义的,即,形如:

#define  B C

#define A B

的内容的。

 

上面的完整的代码,用于处理内容:

……

#define get_dev_var_value(a,b,c)    _get_dev_var_value((a),(b),METHODID(c))

#define GET_DEV_VAR_VALUE(a,b)        get_dev_var_value(a,0,b)

//#define GET_DEV_VAR_VALUE(a,b)        _get_dev_var_value((a),0,METHODID(b))


METHOD message_methode

{

    LABEL "Message";

    HELP "TEST";

    DEFINITION

    {

        ……

       
        GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", count);

       
        GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", transfer_function);  // enumerated parameter

        ……

    }

}

 

结果变成了:

line 1:0 mismatched character ‘_’ expecting ‘\n’

line 1:0 mismatched character ‘g’ expecting ‘\n’

line 1:0 mismatched character ‘"’ expecting ‘\n’

line 1:19 mismatched character ‘<EOF>’ expecting ‘"’

line 1:0 mismatched character ‘R’ expecting ‘\n’

line 1:0 mismatched character ‘0’ expecting ‘\n’

line 1:0 mismatched character ‘"’ expecting ‘\n’

line 1:26 mismatched character ‘<EOF>’ expecting ‘"’

 

 

get_dev_var_value((a),(b),METHODID(c))

et_dev_var_value(a,0,b)


METHOD message_methode

{

    LABEL "Message";

    HELP "TEST";

    DEFINITION

    {       
        ……

       
        GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", count);

       
        GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", transfer_function);         
        ……

    }

}

 

即,多层(嵌套)的宏定义,不仅没有去除,而且结果宏的内容,即MACRO_TEXT的内容,也没有去除。

【解决过程】

1.antlr语法换成:

DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
        
        //process the define content, to check whether it contain the previous define
        //if yes, then process it
        
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
        // switch on new input stream
        setCharStream(new ANTLRStringStream(definedContent));
        reset();
    }
    );

结果是:

_get_dev_var_value((a),(b),METHODID(c))_get_dev_var_value((a),(0),METHODID(b))

METHOD message_methode

{

    LABEL "Message";

    HELP "TEST";

    DEFINITION

    {       
        ……

       
        _get_dev_var_value((a),(0),METHODID(b));

       
        _get_dev_var_value((a),(0),METHODID(b));         
           
        ……

       
    }

}

2.再换成:

DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
@after{
	//process the define content, to check whether it contain the previous define
	//if yes, then process it
	
// save current lexer's state
	SaveStruct ss = new SaveStruct(input);
	includes.push(ss);
	// switch on new input stream
	setCharStream(new ANTLRStringStream(definedContent));
	reset();
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
    }
    );

结果是:


_get_dev_var_value((a),(b),METHODID(c))_get_dev_var_value((a),(0),METHODID(b))

METHOD message_methode

{

    ……

       
        _get_dev_var_value((a),(0),METHODID(b));

       
        _get_dev_var_value((a),(0),METHODID(b));         
           
        ……

    }

}

3。又换成:

DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
@after{

}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
    }
    )
    {
	//process the define content, to check whether it contain the previous define
	//if yes, then process it
	
	// save current lexer's state
	SaveStruct ss = new SaveStruct(input);
	includes.push(ss);
	// switch on new input stream
	setCharStream(new ANTLRStringStream(definedContent));
	reset();
    };

结果是:


_get_dev_var_value((a),(b),METHODID(c))_get_dev_var_value((a),(0),METHODID(b))

METHOD message_methode

{

    ……

        _get_dev_var_value((a),(0),METHODID(b));

       
        _get_dev_var_value((a),(0),METHODID(b));         
           
        ……

    }

}

4.再去试试,用:

IDENTIFIER @init{
    List define = new ArrayList();
    List foundArgs = new ArrayList();
    
    String callArg0Text = "";
    String callArg1Text = "";
} :
    identifier=RAW_IDENTIFIER
    {
        // see if this is a macro argument
        define = (List)defineArgs.get(identifier.getText());
        if (define==null) {
            // see if this is a macro call
            define = (List)defines.get(identifier.getText());
        }
        else
        {
        	//is normal macro replacement
        	System.out.println("normal define call=" + (String)define.get(0));
        }
    }
    ( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0=EXPR
        {
            callArg0Text = callArg0.getText(); 
            foundArgs.add(callArg0Text);
        }
        ( COMMA callArg1=EXPR 
        {
            callArg1Text = callArg1.getText();
            foundArgs.add(callArg1Text);
        }
        )*
        { foundArgs.size()==define.size()-1 }? // better have right amount
        ')'
    | {!((define!=null) && (define.size()>1))}?=>
    )
{
if (define!=null) {
    String defineText = (String)define.get(0);

    if (define.size()==1) {
        //only have one value in list -> the defineText is the define para content -> just need replace directly
        setText(defineText);
    } else {
        //add new dict pair: (para, call value)
        for (int i=0;i<foundArgs.size();++i) {
            // treat macro arguments similar to local defines
            List arg = new ArrayList();
            arg.add((String)foundArgs.get(i));
            defineArgs.put( (String)define.get(1+i), arg );
        }

        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);

        // switch on new input stream
        setCharStream(new ANTLRStringStream(defineText));
        reset();
    }
}
};

处理:

……

//#define get_dev_var_value(a,b,c)    _get_dev_var_value((a),(b),METHODID(c))

#define get_dev_var_value(d,e,f)    _get_dev_var_value((d),(e),METHODID(f))

#define GET_DEV_VAR_VALUE(a,b)        get_dev_var_value(a,0,b)

//#define GET_DEV_VAR_VALUE(a,b)        _get_dev_var_value((a),0,METHODID(b))


METHOD message_methode

{

    ……

        GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", count);

       
        GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", transfer_function);  // enumerated parameter

        ……

}

得到:

normal define call=a

normal define call=0

normal define call=b

normal define call="Choose between \n 3, 5, 9, 17 or 33 points \n No of points:"

normal define call= count

normal define call=a

normal define call=0

normal define call=b

normal define call="Choose between \n 3, 5, 9, 17 or 33 points \n No of points:"

normal define call= transfer_function

normal define call=a

normal define call=0

normal define call=b

 

 

_get_dev_var_value((d),(e),METHODID(f))_get_dev_var_value((a),(0),METHODID(b))

METHOD message_methode

{

    ……

       
        _get_dev_var_value((a),(0),METHODID(b));

       
        _get_dev_var_value((a),(0),METHODID(b));         
        ……

}

 

 

【总结】

截止目前还是没有搞定。

转载请注明:在路上 » 【未解决】antlr实现#define的宏的多层(递归)替换后,去不掉宏的内容

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址
82 queries in 0.186 seconds, using 22.16MB memory