LinuxSir.cn,穿越时空的Linuxsir!

 找回密码
 注册
搜索
热搜: shell linux mysql
查看: 6258|回复: 9

ANSI C grammar 最新的 Lex 和 Yacc 描述

[复制链接]
发表于 2005-5-23 10:11:25 | 显示全部楼层 |阅读模式
http://www.quut.com/c/ANSI-C-grammar-l-1998.html
http://www.quut.com/c/ANSI-C-grammar-y-1998.html

可以说这是比较权威和准确的词法及语法描述了,是比较可贵的材料。
原文引述如下
-------------------------------
ANSI C Yacc grammar

  1. ANSI C Yacc grammar
  2. In 1985, Jeff Lee published his Yacc grammar (which is accompanied by a matching Lex specification) for the April 30, 1985 draft version of the ANSI C standard.  Tom Stockfisch reposted it to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z.

  3. The version you see here has been updated based on an 1998 draft of the standards document. It allows for restricted pointers, variable arrays, "inline", and designated initializers. The previous version's lex and yacc files (ANSI C as of ca 1995) are still around as archived copies.

  4. I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies.
  5. (If you feel like it, read the FAQ first.)

  6. Jutta Degener, April 2005

  7. %token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
  8. %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
  9. %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
  10. %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
  11. %token XOR_ASSIGN OR_ASSIGN TYPE_NAME

  12. %token TYPEDEF EXTERN STATIC AUTO REGISTER
  13. %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
  14. %token STRUCT UNION ENUM ELLIPSIS

  15. %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN

  16. %start translation_unit
  17. %%

  18. primary_expression
  19.         : IDENTIFIER
  20.         | CONSTANT
  21.         | STRING_LITERAL
  22.         | '(' expression ')'
  23.         ;

  24. postfix_expression
  25.         : primary_expression
  26.         | postfix_expression '[' expression ']'
  27.         | postfix_expression '(' ')'
  28.         | postfix_expression '(' argument_expression_list ')'
  29.         | postfix_expression '.' IDENTIFIER
  30.         | postfix_expression PTR_OP IDENTIFIER
  31.         | postfix_expression INC_OP
  32.         | postfix_expression DEC_OP
  33.         | '(' type_name ')' '{' initializer_list '}'
  34.         | '(' type_name ')' '{' initializer_list ',' '}'
  35.         ;

  36. argument_expression_list
  37.         : assignment_expression
  38.         | argument_expression_list ',' assignment_expression
  39.         ;

  40. unary_expression
  41.         : postfix_expression
  42.         | INC_OP unary_expression
  43.         | DEC_OP unary_expression
  44.         | unary_operator cast_expression
  45.         | SIZEOF unary_expression
  46.         | SIZEOF '(' type_name ')'
  47.         ;

  48. unary_operator
  49.         : '&'
  50.         | '*'
  51.         | '+'
  52.         | '-'
  53.         | '~'
  54.         | '!'
  55.         ;

  56. cast_expression
  57.         : unary_expression
  58.         | '(' type_name ')' cast_expression
  59.         ;

  60. multiplicative_expression
  61.         : cast_expression
  62.         | multiplicative_expression '*' cast_expression
  63.         | multiplicative_expression '/' cast_expression
  64.         | multiplicative_expression '%' cast_expression
  65.         ;

  66. additive_expression
  67.         : multiplicative_expression
  68.         | additive_expression '+' multiplicative_expression
  69.         | additive_expression '-' multiplicative_expression
  70.         ;

  71. shift_expression
  72.         : additive_expression
  73.         | shift_expression LEFT_OP additive_expression
  74.         | shift_expression RIGHT_OP additive_expression
  75.         ;

  76. relational_expression
  77.         : shift_expression
  78.         | relational_expression '<' shift_expression
  79.         | relational_expression '>' shift_expression
  80.         | relational_expression LE_OP shift_expression
  81.         | relational_expression GE_OP shift_expression
  82.         ;

  83. equality_expression
  84.         : relational_expression
  85.         | equality_expression EQ_OP relational_expression
  86.         | equality_expression NE_OP relational_expression
  87.         ;

  88. and_expression
  89.         : equality_expression
  90.         | and_expression '&' equality_expression
  91.         ;

  92. exclusive_or_expression
  93.         : and_expression
  94.         | exclusive_or_expression '^' and_expression
  95.         ;

  96. inclusive_or_expression
  97.         : exclusive_or_expression
  98.         | inclusive_or_expression '|' exclusive_or_expression
  99.         ;

  100. logical_and_expression
  101.         : inclusive_or_expression
  102.         | logical_and_expression AND_OP inclusive_or_expression
  103.         ;

  104. logical_or_expression
  105.         : logical_and_expression
  106.         | logical_or_expression OR_OP logical_and_expression
  107.         ;

  108. conditional_expression
  109.         : logical_or_expression
  110.         | logical_or_expression '?' expression ':' conditional_expression
  111.         ;

  112. assignment_expression
  113.         : conditional_expression
  114.         | unary_expression assignment_operator assignment_expression
  115.         ;

  116. assignment_operator
  117.         : '='
  118.         | MUL_ASSIGN
  119.         | DIV_ASSIGN
  120.         | MOD_ASSIGN
  121.         | ADD_ASSIGN
  122.         | SUB_ASSIGN
  123.         | LEFT_ASSIGN
  124.         | RIGHT_ASSIGN
  125.         | AND_ASSIGN
  126.         | XOR_ASSIGN
  127.         | OR_ASSIGN
  128.         ;

  129. expression
  130.         : assignment_expression
  131.         | expression ',' assignment_expression
  132.         ;

  133. constant_expression
  134.         : conditional_expression
  135.         ;

  136. declaration
  137.         : declaration_specifiers ';'
  138.         | declaration_specifiers init_declarator_list ';'
  139.         ;

  140. declaration_specifiers
  141.         : storage_class_specifier
  142.         | storage_class_specifier declaration_specifiers
  143.         | type_specifier
  144.         | type_specifier declaration_specifiers
  145.         | type_qualifier
  146.         | type_qualifier declaration_specifiers
  147.         | function_specifier
  148.         | function_specifier declaration_specifiers
  149.         ;

  150. init_declarator_list
  151.         : init_declarator
  152.         | init_declarator_list ',' init_declarator
  153.         ;

  154. init_declarator
  155.         : declarator
  156.         | declarator '=' initializer
  157.         ;

  158. storage_class_specifier
  159.         : TYPEDEF
  160.         | EXTERN
  161.         | STATIC
  162.         | AUTO
  163.         | REGISTER
  164.         ;

  165. type_specifier
  166.         : VOID
  167.         | CHAR
  168.         | SHORT
  169.         | INT
  170.         | LONG
  171.         | FLOAT
  172.         | DOUBLE
  173.         | SIGNED
  174.         | UNSIGNED
  175.         | BOOL
  176.         | COMPLEX
  177.         | IMAGINARY
  178.         | struct_or_union_specifier
  179.         | enum_specifier
  180.         | TYPE_NAME
  181.         ;

  182. struct_or_union_specifier
  183.         : struct_or_union IDENTIFIER '{' struct_declaration_list '}'
  184.         | struct_or_union '{' struct_declaration_list '}'
  185.         | struct_or_union IDENTIFIER
  186.         ;

  187. struct_or_union
  188.         : STRUCT
  189.         | UNION
  190.         ;

  191. struct_declaration_list
  192.         : struct_declaration
  193.         | struct_declaration_list struct_declaration
  194.         ;

  195. struct_declaration
  196.         : specifier_qualifier_list struct_declarator_list ';'
  197.         ;

  198. specifier_qualifier_list
  199.         : type_specifier specifier_qualifier_list
  200.         | type_specifier
  201.         | type_qualifier specifier_qualifier_list
  202.         | type_qualifier
  203.         ;

  204. struct_declarator_list
  205.         : struct_declarator
  206.         | struct_declarator_list ',' struct_declarator
  207.         ;

  208. struct_declarator
  209.         : declarator
  210.         | ':' constant_expression
  211.         | declarator ':' constant_expression
  212.         ;

  213. enum_specifier
  214.         : ENUM '{' enumerator_list '}'
  215.         | ENUM IDENTIFIER '{' enumerator_list '}'
  216.         | ENUM '{' enumerator_list ',' '}'
  217.         | ENUM IDENTIFIER '{' enumerator_list ',' '}'
  218.         | ENUM IDENTIFIER
  219.         ;

  220. enumerator_list
  221.         : enumerator
  222.         | enumerator_list ',' enumerator
  223.         ;

  224. enumerator
  225.         : IDENTIFIER
  226.         | IDENTIFIER '=' constant_expression
  227.         ;

  228. type_qualifier
  229.         : CONST
  230.         | RESTRICT
  231.         | VOLATILE
  232.         ;

  233. function_specifier
  234.         : INLINE
  235.         ;

  236. declarator
  237.         : pointer direct_declarator
  238.         | direct_declarator
  239.         ;


  240. direct_declarator
  241.         : IDENTIFIER
  242.         | '(' declarator ')'
  243.         | direct_declarator '[' type_qualifier_list assignment_expression ']'
  244.         | direct_declarator '[' type_qualifier_list ']'
  245.         | direct_declarator '[' assignment_expression ']'
  246.         | direct_declarator '[' STATIC type_qualifier_list assignment_expression ']'
  247.         | direct_declarator '[' type_qualifier_list STATIC assignment_expression ']'
  248.         | direct_declarator '[' type_qualifier_list '*' ']'
  249.         | direct_declarator '[' '*' ']'
  250.         | direct_declarator '[' ']'
  251.         | direct_declarator '(' parameter_type_list ')'
  252.         | direct_declarator '(' identifier_list ')'
  253.         | direct_declarator '(' ')'
  254.         ;

  255. pointer
  256.         : '*'
  257.         | '*' type_qualifier_list
  258.         | '*' pointer
  259.         | '*' type_qualifier_list pointer
  260.         ;

  261. type_qualifier_list
  262.         : type_qualifier
  263.         | type_qualifier_list type_qualifier
  264.         ;


  265. parameter_type_list
  266.         : parameter_list
  267.         | parameter_list ',' ELLIPSIS
  268.         ;

  269. parameter_list
  270.         : parameter_declaration
  271.         | parameter_list ',' parameter_declaration
  272.         ;

  273. parameter_declaration
  274.         : declaration_specifiers declarator
  275.         | declaration_specifiers abstract_declarator
  276.         | declaration_specifiers
  277.         ;

  278. identifier_list
  279.         : IDENTIFIER
  280.         | identifier_list ',' IDENTIFIER
  281.         ;

  282. type_name
  283.         : specifier_qualifier_list
  284.         | specifier_qualifier_list abstract_declarator
  285.         ;

  286. abstract_declarator
  287.         : pointer
  288.         | direct_abstract_declarator
  289.         | pointer direct_abstract_declarator
  290.         ;

  291. direct_abstract_declarator
  292.         : '(' abstract_declarator ')'
  293.         | '[' ']'
  294.         | '[' assignment_expression ']'
  295.         | direct_abstract_declarator '[' ']'
  296.         | direct_abstract_declarator '[' assignment_expression ']'
  297.         | '[' '*' ']'
  298.         | direct_abstract_declarator '[' '*' ']'
  299.         | '(' ')'
  300.         | '(' parameter_type_list ')'
  301.         | direct_abstract_declarator '(' ')'
  302.         | direct_abstract_declarator '(' parameter_type_list ')'
  303.         ;

  304. initializer
  305.         : assignment_expression
  306.         | '{' initializer_list '}'
  307.         | '{' initializer_list ',' '}'
  308.         ;

  309. initializer_list
  310.         : initializer
  311.         | designation initializer
  312.         | initializer_list ',' initializer
  313.         | initializer_list ',' designation initializer
  314.         ;

  315. designation
  316.         : designator_list '='
  317.         ;

  318. designator_list
  319.         : designator
  320.         | designator_list designator
  321.         ;

  322. designator
  323.         : '[' constant_expression ']'
  324.         | '.' IDENTIFIER
  325.         ;

  326. statement
  327.         : labeled_statement
  328.         | compound_statement
  329.         | expression_statement
  330.         | selection_statement
  331.         | iteration_statement
  332.         | jump_statement
  333.         ;

  334. labeled_statement
  335.         : IDENTIFIER ':' statement
  336.         | CASE constant_expression ':' statement
  337.         | DEFAULT ':' statement
  338.         ;

  339. compound_statement
  340.         : '{' '}'
  341.         | '{' block_item_list '}'
  342.         ;

  343. block_item_list
  344.         : block_item
  345.         | block_item_list block_item
  346.         ;

  347. block_item
  348.         : declaration
  349.         | statement
  350.         ;

  351. expression_statement
  352.         : ';'
  353.         | expression ';'
  354.         ;

  355. selection_statement
  356.         : IF '(' expression ')' statement
  357.         | IF '(' expression ')' statement ELSE statement
  358.         | SWITCH '(' expression ')' statement
  359.         ;

  360. iteration_statement
  361.         : WHILE '(' expression ')' statement
  362.         | DO statement WHILE '(' expression ')' ';'
  363.         | FOR '(' expression_statement expression_statement ')' statement
  364.         | FOR '(' expression_statement expression_statement expression ')' statement
  365.         | FOR '(' declaration expression_statement ')' statement
  366.         | FOR '(' declaration expression_statement expression ')' statement
  367.         ;

  368. jump_statement
  369.         : GOTO IDENTIFIER ';'
  370.         | CONTINUE ';'
  371.         | BREAK ';'
  372.         | RETURN ';'
  373.         | RETURN expression ';'
  374.         ;

  375. translation_unit
  376.         : external_declaration
  377.         | translation_unit external_declaration
  378.         ;

  379. external_declaration
  380.         : function_definition
  381.         | declaration
  382.         ;

  383. function_definition
  384.         : declaration_specifiers declarator declaration_list compound_statement
  385.         | declaration_specifiers declarator compound_statement
  386.         ;

  387. declaration_list
  388.         : declaration
  389.         | declaration_list declaration
  390.         ;


  391. %%
  392. #include <stdio.h>

  393. extern char yytext[];
  394. extern int column;

  395. yyerror(s)
  396. char *s;
  397. {
  398.         fflush(stdout);
  399.         printf("\n%*s\n%*s\n", column, "^", column, s);
  400. }
复制代码


ANSI C grammar, Lex specification

  1. ANSI C grammar, Lex specification
  2. In 1985, Jeff Lee published this Lex specification together with a Yacc grammar for the April 30, 1985 ANSI C draft.  Tom Stockfisch reposted both to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z.

  3. The version you see here has been updated based on an 1998 draft of the standards document. It allows for restricted pointers, variable arrays, "inline", and designated initializers. The previous version's lex and yacc files (ANSI C as of ca 1995) are still around as archived copies.

  4. I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies.

  5. Jutta Degener, 2004

  6. D                        [0-9]
  7. L                        [a-zA-Z_]
  8. H                        [a-fA-F0-9]
  9. E                        [Ee][+-]?{D}+
  10. FS                        (f|F|l|L)
  11. IS                        (u|U|l|L)*

  12. %{
  13. #include <stdio.h>
  14. #include "y.tab.h"

  15. void count();
  16. %}

  17. %%
  18. "/*"                        { comment(); }

  19. "auto"                        { count(); return(AUTO); }
  20. "_Bool"                        { count(); return(BOOL); }
  21. "break"                        { count(); return(BREAK); }
  22. "case"                        { count(); return(CASE); }
  23. "char"                        { count(); return(CHAR); }
  24. "_Complex"                { count(); return(COMPLEX); }
  25. "const"                        { count(); return(CONST); }
  26. "continue"                { count(); return(CONTINUE); }
  27. "default"                { count(); return(DEFAULT); }
  28. "do"                        { count(); return(DO); }
  29. "double"                { count(); return(DOUBLE); }
  30. "else"                        { count(); return(ELSE); }
  31. "enum"                        { count(); return(ENUM); }
  32. "extern"                { count(); return(EXTERN); }
  33. "float"                        { count(); return(FLOAT); }
  34. "for"                        { count(); return(FOR); }
  35. "goto"                        { count(); return(GOTO); }
  36. "if"                        { count(); return(IF); }
  37. "_Imaginary"                { count(); return(IMAGINARY); }
  38. "inline"                { count(); return(INLINE); }
  39. "int"                        { count(); return(INT); }
  40. "long"                        { count(); return(LONG); }
  41. "register"                { count(); return(REGISTER); }
  42. "restrict"                { count(); return(RESTRICT); }
  43. "return"                { count(); return(RETURN); }
  44. "short"                        { count(); return(SHORT); }
  45. "signed"                { count(); return(SIGNED); }
  46. "sizeof"                { count(); return(SIZEOF); }
  47. "static"                { count(); return(STATIC); }
  48. "struct"                { count(); return(STRUCT); }
  49. "switch"                { count(); return(SWITCH); }
  50. "typedef"                { count(); return(TYPEDEF); }
  51. "union"                        { count(); return(UNION); }
  52. "unsigned"                { count(); return(UNSIGNED); }
  53. "void"                        { count(); return(VOID); }
  54. "volatile"                { count(); return(VOLATILE); }
  55. "while"                        { count(); return(WHILE); }

  56. {L}({L}|{D})*                { count(); return(check_type()); }

  57. 0[xX]{H}+{IS}?                { count(); return(CONSTANT); }
  58. 0{D}+{IS}?                { count(); return(CONSTANT); }
  59. {D}+{IS}?                { count(); return(CONSTANT); }
  60. L?'(\\.|[^\\'])+'        { count(); return(CONSTANT); }

  61. {D}+{E}{FS}?                { count(); return(CONSTANT); }
  62. {D}*"."{D}+({E})?{FS}?        { count(); return(CONSTANT); }
  63. {D}+"."{D}*({E})?{FS}?        { count(); return(CONSTANT); }

  64. L?"(\\.|[^\"])*"        { count(); return(STRING_LITERAL); }

  65. "..."                        { count(); return(ELLIPSIS); }
  66. ">>="                        { count(); return(RIGHT_ASSIGN); }
  67. "<<="                        { count(); return(LEFT_ASSIGN); }
  68. "+="                        { count(); return(ADD_ASSIGN); }
  69. "-="                        { count(); return(SUB_ASSIGN); }
  70. "*="                        { count(); return(MUL_ASSIGN); }
  71. "/="                        { count(); return(DIV_ASSIGN); }
  72. "%="                        { count(); return(MOD_ASSIGN); }
  73. "&="                        { count(); return(AND_ASSIGN); }
  74. "^="                        { count(); return(XOR_ASSIGN); }
  75. "|="                        { count(); return(OR_ASSIGN); }
  76. ">>"                        { count(); return(RIGHT_OP); }
  77. "<<"                        { count(); return(LEFT_OP); }
  78. "++"                        { count(); return(INC_OP); }
  79. "--"                        { count(); return(DEC_OP); }
  80. "->"                        { count(); return(PTR_OP); }
  81. "&&"                        { count(); return(AND_OP); }
  82. "||"                        { count(); return(OR_OP); }
  83. "<="                        { count(); return(LE_OP); }
  84. ">="                        { count(); return(GE_OP); }
  85. "=="                        { count(); return(EQ_OP); }
  86. "!="                        { count(); return(NE_OP); }
  87. ";"                        { count(); return(';'); }
  88. ("{"|"<%")                { count(); return('{'); }
  89. ("}"|"%>")                { count(); return('}'); }
  90. ","                        { count(); return(','); }
  91. ":"                        { count(); return(':'); }
  92. "="                        { count(); return('='); }
  93. "("                        { count(); return('('); }
  94. ")"                        { count(); return(')'); }
  95. ("["|"<:")                { count(); return('['); }
  96. ("]"|":>")                { count(); return(']'); }
  97. "."                        { count(); return('.'); }
  98. "&"                        { count(); return('&'); }
  99. "!"                        { count(); return('!'); }
  100. "~"                        { count(); return('~'); }
  101. "-"                        { count(); return('-'); }
  102. "+"                        { count(); return('+'); }
  103. "*"                        { count(); return('*'); }
  104. "/"                        { count(); return('/'); }
  105. "%"                        { count(); return('%'); }
  106. "<"                        { count(); return('<'); }
  107. ">"                        { count(); return('>'); }
  108. "^"                        { count(); return('^'); }
  109. "|"                        { count(); return('|'); }
  110. "?"                        { count(); return('?'); }

  111. [ \t\v\n\f]                { count(); }
  112. .                        { /* ignore bad characters */ }

  113. %%

  114. yywrap()
  115. {
  116.         return(1);
  117. }


  118. comment()
  119. {
  120.         char c, c1;

  121. loop:
  122.         while ((c = input()) != '*' && c != 0)
  123.                 putchar(c);

  124.         if ((c1 = input()) != '/' && c != 0)
  125.         {
  126.                 unput(c1);
  127.                 goto loop;
  128.         }

  129.         if (c != 0)
  130.                 putchar(c1);
  131. }


  132. int column = 0;

  133. void count()
  134. {
  135.         int i;

  136.         for (i = 0; yytext[i] != '\0'; i++)
  137.                 if (yytext[i] == '\n')
  138.                         column = 0;
  139.                 else if (yytext[i] == '\t')
  140.                         column += 8 - (column % 8);
  141.                 else
  142.                         column++;

  143.         ECHO;
  144. }


  145. int check_type()
  146. {
  147. /*
  148. * pseudo code --- this is what it should check
  149. *
  150. *        if (yytext == type_name)
  151. *                return(TYPE_NAME);
  152. *
  153. *        return(IDENTIFIER);
  154. */

  155. /*
  156. *        it actually will only return IDENTIFIER
  157. */

  158.         return(IDENTIFIER);
  159. }

复制代码
发表于 2005-5-23 11:33:00 | 显示全部楼层
好东西,收藏。
不过我还不懂yacc,很不好意思的说:)
回复 支持 反对

使用道具 举报

发表于 2005-5-23 12:09:01 | 显示全部楼层
Post by Tetris
好东西,收藏。
不过我还不懂yacc,很不好意思的说:)

yacc 俺也不熟, lex 倒是熟点,用过
回复 支持 反对

使用道具 举报

发表于 2005-5-23 12:35:09 | 显示全部楼层
我都不懂
回复 支持 反对

使用道具 举报

发表于 2005-5-23 14:03:07 | 显示全部楼层
不错不错,收起来了。
回复 支持 反对

使用道具 举报

发表于 2005-8-30 15:56:27 | 显示全部楼层
大侠们介绍一下Lex 和 Yacc 吧
回复 支持 反对

使用道具 举报

发表于 2005-8-30 16:27:11 | 显示全部楼层
http://www-128.ibm.com/developerworks/library/l-lex.html 这里有Ashish Bansal写的一个入门教程
回复 支持 反对

使用道具 举报

发表于 2005-8-31 22:28:05 | 显示全部楼层
oh ,very cool
除了那个<lex和yacc>外,我在也没有看到这么cool的东西了!
回复 支持 反对

使用道具 举报

发表于 2006-2-8 13:23:25 | 显示全部楼层
收藏            
回复 支持 反对

使用道具 举报

发表于 2009-2-18 23:42:43 | 显示全部楼层
我要看看,呵呵
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 注册

本版积分规则

快速回复 返回顶部 返回列表