1.真正去实现的时候,可以参考:
Tips on designing a preprocessor for C++ using Antlr
中的例子,去添加对应的action code。
2.参考:
How to do preprocessing in antlr v4?
->
A list of all available downlads at Soft Gems
->
Windows Resource File Parser + Converter
下载到 312KB的rc-converter.zip。
3.又从:
[antlr-interest] completed an ANTLR-based cpp preprocessor (#include, #if, #define, etc)
拷贝了一份代码,供后续参考:
//copy from //http://www.antlr3.org/pipermail/antlr-interest/2004-July/008778.html //name to: //cppLexer.g // Author: Eric Mahurin // License: just give me credit options { language="Java"; } { import java.io.*; import java.util.*; import antlr.*; class cpp implements cppLexerTokenTypes { public static TokenStreamSelector selector = new TokenStreamSelector(); public static void main(String[] args) { try { // will need a stack of lexers for #include and macro calls cppLexer mainLexer = new cppLexer(new DataInputStream(System.in)); mainLexer.selector = selector; selector.select(mainLexer); for (;;) { Token t = selector.nextToken(); if (t.getType()==Token.EOF_TYPE) break; System.out.print(t.getText()); } } catch(Exception e) { System.err.println("exception: "+e); } } } } class cppLexer extends Lexer; options { testLiterals = false; k = 4; } tokens { ENDIF ; } { public static TokenStreamSelector selector; // must be assigned externally protected static Integer ifState = 1; // -1: no-else false, 0:false, 1: true protected static List ifStates = new ArrayList(); // holds nested if conditions protected static Map defines = new Hashtable(); // holds the defines protected Map defineArgs = new Hashtable(); // holds the args for a macro call public void uponEOF() throws TokenStreamException, CharStreamException { try { selector.pop(); // return to old lexer/stream selector.retry(); } catch (NoSuchElementException e) { // return a real EOF if nothing in stack } } } DIRECTIVE { List args = new ArrayList(); boolean condition = true; } : '#' ( "include" (WS)? includeFile:STRING { if (ifState==1) { // found this in examples/java/includeFile String name = includeFile.getText(); name = name.substring(1,name.length()-1); try { cppLexer sublexer = new cppLexer(new DataInputStream(new FileInputStream(name))); sublexer.defines = defines; // want defines to be persistent sublexer.setFilename(name); selector.push(sublexer); selector.retry(); } catch (FileNotFoundException fnf) { System.err.println("cannot find file "+name); } } } | "define" WS defineMacro:RAW_IDENTIFIER { args.add(""); // first element will hold the macro text } ( ( '(' // get arguments if you find them (no spaces before left paren) (WS)? defineArg0:RAW_IDENTIFIER (WS)? {args.add(defineArg0.getText());} ( COMMA (WS)? defineArg1:RAW_IDENTIFIER (WS)? {args.add(defineArg1.getText());} )* ')' | ' '|'\t'|'\f' ) ( options{greedy=true;}: ' '|'\t'|'\f' )* // store the text verbatim - tokenize when called defineText:MACRO_TEXT {args.set(0,defineText.getText());} )? '\n' {newline();} { if (ifState==1) { defines.put( defineMacro.getText(), args ); $setType(Token.SKIP); }} | "undef" WS undefMacro:RAW_IDENTIFIER { if (ifState==1) { defines.remove(undefMacro.getText()); $setType(Token.SKIP); }} | ("ifdef"|"ifndef"{condition=false;}) WS ifMacro:RAW_IDENTIFIER { ifStates.add(ifState); if (ifState==1) { condition = (defines.containsKey(ifMacro.getText())==condition); ifState = condition?1:0; } else { ifState = -1; } if (ifState==1) { $setType(Token.SKIP); } else { // gobble up tokens until ENDIF (could be caused by else) for (;;) { try { if (selector.nextToken().getType()==ENDIF) break; } catch (TokenStreamRetryException r) { // just continue if someone tried retry } } // retry in case we switched lexers selector.retry(); } } | ( "else" // treat like elsif (true) | "elsif" WS elsifMacro:RAW_IDENTIFIER { condition=defines.containsKey(elsifMacro.getText()); } ) { if (ifState==1) { // previous if/elsif was taken - discard rest ifState = -1; for (;;) { try { if (selector.nextToken().getType()==ENDIF) break; } catch (TokenStreamRetryException r) { // just continue if someone tried retry } } // retry in case we switched lexers selector.retry(); } else if (ifState==0 && condition) { // "elsif" (true) or "else" $setType(ENDIF); ifState = 1; } } | "endif" { condition = (ifState==1); try { // return to previous if state ifState = (Integer)ifStates.remove(ifStates.size()-1); if (condition) { $setType(Token.SKIP); } else { // tell if/else/elsif to stop discarding tokens $setType(ENDIF); } } catch (ArrayIndexOutOfBoundsException e) { // endif with no if } } ); IDENTIFIER options {testLiterals=true;} { List define = new ArrayList(); List args = new ArrayList(); } : identifier:RAW_IDENTIFIER { // see if this is a macro argument define = (List)defineArgs.get(identifier.getText()); if (_createToken && define==null) { // see if this is a macro call define = (List)defines.get(identifier.getText()); } } ( { (define!=null) && (define.size()>1) }? (WS|COMMENT)? // take in arguments if macro call requires them '(' callArg0:EXPR {args.add(callArg0.getText());} ( COMMA callArg1:EXPR {args.add(callArg1.getText());} )* { args.size()==define.size()-1 }? // better have right amount ')' | { !((define!=null) && (define.size()>1)) }? ) { if (define!=null) { String defineText = (String)define.get(0); if (!_createToken) { // just substitute text if called from EXPR - no token created $setText(defineText); } else { // create a new lexer to handle the macro text cppLexer sublexer = new cppLexer(new DataInputStream(new StringBufferInputStream(defineText))); for (int i=0;i<args.size();++i) { // treat macro arguments similar to local defines List arg = new ArrayList(); arg.add((String)args.get(i)); sublexer.defineArgs.put( (String)define.get(1+i), arg ); } selector.push(sublexer); // retry in new lexer selector.retry(); } }}; STRING : '"' ( '\\' . | ~('\\'|'"') )* '"' // double quoted string | '\'' ( '\\' . | ~('\\'|'\'') )* '\'' // single quoted string ; protected MACRO_TEXT : ( '\\'! '\n' {newline();} // escaped newline | ~'\n' )*; WS : ( ' ' | '\t' | '\f' | '\n' {newline();} ) { /*$setType(Token.SKIP);*/ }; COMMENT : ( "//" (~'\n')* '\n' {newline();} // single line comment | "/*" ( options{greedy=false;} : '\n' {newline();} | ~('\n') )* "*/" // multi-line comment ) { /*$setType(Token.SKIP);*/ }; protected RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ; NUMBER : ('0'..'9') ('0'..'9'|'a'..'z'|'A'..'Z'|'_')* ; // allow ahpha suffixes on numbers (i.e. L:long) // group symbols into categories to parse EXPR LEFT : '(' | '[' | '{' ; RIGHT : ')' | ']' | '}' ; COMMA : ',' ; OPERATOR : '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '`' | '|' | '~' ; protected EXPR // allow just about anything without being ambiguous : (WS)? (NUMBER|IDENTIFIER)? ( ( LEFT EXPR ( COMMA EXPR )* RIGHT | STRING | OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here ) EXPR )? ;
4.参考:
How to do preprocessing in antlr v4?
看提到了:
TokenStreamRewriter
不过后来没继续弄。
5.后来是别的同事,搞定了include的,包括递归调用的功能,完整代码如下:
(同时此处后又加了对于define的简单替换)
grammar preprocess; //lexer grammar preprocess; options{ language=Java; } @lexer::header { //package com.mm.antlrv3demo; import java.io.*; import java.util.*; } @parser::header { //package com.mm.antlrv3demo; } @lexer::members { class SaveStruct { SaveStruct(CharStream input){ this.input = input; this.marker = input.mark(); } public CharStream input; public int marker; } static Map defines = new Hashtable(); // holds the defines Stack<SaveStruct> includes = new Stack<SaveStruct>(); // We should override this method for handling EOF of included file public Token nextToken(){ Token token = super.nextToken(); if(token.getType() == Token.EOF && !includes.empty()){ // We've got EOF and have non empty stack. SaveStruct ss = includes.pop(); setCharStream(ss.input); input.rewind(ss.marker); //this should be used instead of super [like below] to handle exits from nested includes //it matters, when the 'include' token is the last in previous stream (using super, lexer 'crashes' returning EOF token) token = this.nextToken(); } // Skip first token after switching on another input. // You need to use this rather than super as there may be nested include files if(((CommonToken)token).getStartIndex() < 0) token = this.nextToken(); return token; } } COMMENT : ('//' ~('\n'|'\r')* '\r'? '\n') {skip();} | ('/*' ( options {greedy=false;} : . )* '*/') {skip();} ; // and lexer rule INCLUDE : '#include' (WS)? f=STRING { String name = f.getText(); name = name.substring(1,name.length()-1); try { // save current lexer's state SaveStruct ss = new SaveStruct(input); includes.push(ss); // switch on new input stream setCharStream(new ANTLRFileStream(name)); reset(); } catch(Exception fnf) { throw new Error("Cannot open file " + name); } }; DIRECTIVE : ('#define' WS* defineMacro=ID WS* defineText=STRING) { String macroKey = defineMacro.getText(); String macroValue = defineText.getText(); System.out.println("Found macro: " + macroKey + "=" + macroValue); defines.put(macroKey, macroValue); skip(); }; ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* { // see if this is a macro call String idStr = getText(); if(defines.containsKey(idStr)) //define = (List)defines.get(identifier.getText()); //if(define!=null) && (define.size()>1) { String macroValue = (String)defines.get(idStr); System.out.println("Found macro reference, so replce " + idStr + " to " + macroValue); setText(macroValue); } }; INT : '0'..'9'+ ; FLOAT : ('0'..'9')+ '.' ('0'..'9')* EXPONENT? | '.' ('0'..'9')+ EXPONENT? | ('0'..'9')+ EXPONENT ; WS : ( ' ' | '\t' | '\r' | '\n' ) {$channel=HIDDEN;} ; RestSymbo : '{' | '}' | '&' | ';' | ',' | '+' | '-' | ')' | '(' | '~' | '/' | '`' | '$' | '@' | '%' | '^' | '#' | '\\' ; STRING : '"' ( ESC_SEQ | ~('\\'|'"') )* '"' ; CHAR: '\'' ( ESC_SEQ | ~('\''|'\\') ) '\'' ; fragment EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; fragment HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ; fragment ESC_SEQ : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') | UNICODE_ESC | OCTAL_ESC ; fragment OCTAL_ESC : '\\' ('0'..'3') ('0'..'7') ('0'..'7') | '\\' ('0'..'7') ('0'..'7') | '\\' ('0'..'7') ; fragment UNICODE_ESC : '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT ; header : include*; include : INCLUDE;//'#include ' '<' ID ('.h' | '.ddl') '>';
【总结】
还是antlr代码,和手动加的action code(java代码)去实现对应的递归处理include的逻辑的。
转载请注明:在路上 » 【记录】尝试用antlr处理C代码中的#include