;;; wisent-python.wy -- LALR grammar for Python ;; ;; Copyright (C) 2002, 2003, 2004, 2007 Richard Kim ;; ;; Author: Richard Kim ;; Maintainer: Richard Kim ;; Created: June 2002 ;; Keywords: syntax ;; X-RCS: $Id: wisent-python.wy,v 1.35 2007/03/08 02:12:07 zappo Exp $ ;; ;; This file is not part of GNU Emacs. ;; ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation; either version 2, or (at ;; your option) any later version. ;; ;; This software is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; ;; This is an LALR python parser that follows the official python ;; grammar closely with very few exceptions. ;; ;;; To do: ;; ;; * Verify that semantic-lex-python-number regexp is correct. ;; -------- ;; Settings ;; -------- ;;%package wisent-python-wy %languagemode python-mode ;; The default start symbol %start goal ;; Alternate entry points ;; - Needed by partial re-parse %start function_parameter %start paren_class %start indented_block ;; - Needed by EXPANDFULL clauses %start function_parameters %start paren_classes %start indented_block_body ;; ------------------------------- ;; Misc. Python specific terminals ;; ------------------------------- ;; The value of these tokens are for documentation only, they are not ;; used by the lexer. %token BACKSLASH "\\" %token NEWLINE "\n" %token INDENT "^\\s-+" %token DEDENT "[^:INDENT:]" %token INDENT_BLOCK "(INDENT DEDENT)" ;; ----------------------------- ;; Block & Parenthesis terminals ;; ----------------------------- %type ;;syntax "\\s(\\|\\s)" matchdatatype block %token PAREN_BLOCK "(LPAREN RPAREN)" %token BRACE_BLOCK "(LBRACE RBRACE)" %token BRACK_BLOCK "(LBRACK RBRACK)" %token LPAREN "(" %token RPAREN ")" %token LBRACE "{" %token RBRACE "}" %token LBRACK "[" %token RBRACK "]" ;; ------------------ ;; Operator terminals ;; ------------------ %type ;;syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string %token LTLTEQ "<<=" %token GTGTEQ ">>=" %token EXPEQ "**=" %token DIVDIVEQ "//=" %token DIVDIV "//" %token LTLT "<<" %token GTGT ">>" %token EXPONENT "**" %token EQ "==" %token GE ">=" %token LE "<=" %token PLUSEQ "+=" %token MINUSEQ "-=" %token MULTEQ "*=" %token DIVEQ "/=" %token MODEQ "%=" %token AMPEQ "&=" %token OREQ "|=" %token HATEQ "^=" %token LTGT "<>" %token NE "!=" %token HAT "^" %token LT "<" %token GT ">" %token AMP "&" %token MULT "*" %token DIV "/" %token MOD "%" %token PLUS "+" %token MINUS "-" %token PERIOD "." %token TILDE "~" %token BAR "|" %token COLON ":" %token SEMICOLON ";" %token COMMA "," %token ASSIGN "=" %token BACKQUOTE "`" ;; ----------------- ;; Literal terminals ;; ----------------- %token STRING_LITERAL %type ;;syntax semantic-lex-number-expression %token NUMBER_LITERAL %type ;;syntax "\\(\\sw\\|\\s_\\)+" %token NAME ;; ----------------- ;; Keyword terminals ;; ----------------- %type ;;syntax "\\(\\sw\\|\\s_\\)+" matchdatatype keyword %keyword AND "and" %put AND summary "Logical AND binary operator ... " %keyword ASSERT "assert" %put ASSERT summary "Raise AssertionError exception if is false" %keyword BREAK "break" %put BREAK summary "Terminate 'for' or 'while loop" %keyword CLASS "class" %put CLASS summary "Define a new class" %keyword CONTINUE "continue" %put CONTINUE summary "Skip to the next interation of enclosing for or whilte loop" %keyword DEF "def" %put DEF summary "Define a new function" %keyword DEL "del" %put DEL summary "Delete specified objects, i.e., undo what assignment did" %keyword ELIF "elif" %put ELIF summary "Shorthand for 'else if' following an 'if' statement" %keyword ELSE "else" %put ELSE summary "Start the 'else' clause following an 'if' statement" %keyword EXCEPT "except" %put EXCEPT summary "Specify exception handlers along with 'try' keyword" %keyword EXEC "exec" %put EXEC summary "Dynamically execute python code" %keyword FINALLY "finally" %put FINALLY summary "Specify code to be executed after 'try' statements whether or not an exception occured" %keyword FOR "for" %put FOR summary "Start a 'for' loop" %keyword FROM "from" %put FROM summary "Modify behavior of 'import' statement" %keyword GLOBAL "global" %put GLOBAL summary "Declare one or more symbols as global symbols" %keyword IF "if" %put IF summary "Start 'if' conditional statement" %keyword IMPORT "import" %put IMPORT summary "Load specified modules" %keyword IN "in" %put IN summary "Part of 'for' statement " %keyword IS "is" %put IS summary "Binary operator that tests for object equality" %keyword LAMBDA "lambda" %put LAMDA summary "Create anonymous function" %keyword NOT "not" %put NOT summary "Unary boolean negation operator" %keyword OR "or" %put OR summary "Binary logical 'or' operator" %keyword PASS "pass" %put PASS summary "Statement that does nothing" %keyword PRINT "print" %put PRINT summary "Print each argument to standard output" %keyword RAISE "raise" %put RAISE summary "Raise an exception" %keyword RETURN "return" %put RETURN summary "Return from a function" %keyword TRY "try" %put TRY summary "Start of statements protected by exception handlers" %keyword WHILE "while" %put WHILE summary "Start a 'while' loop" %keyword YIELD "yield" %put YIELD summary "Create a generator function" %% ;;;**************************************************************************** ;;;@ goal ;;;**************************************************************************** ;; simple_stmt are statements that do not involve INDENT tokens ;; compound_stmt are statements that involve INDENT tokens goal : NEWLINE | simple_stmt | compound_stmt ; ;;;**************************************************************************** ;;;@ simple_stmt ;;;**************************************************************************** ;; simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE simple_stmt : small_stmt_list semicolon_opt NEWLINE ; ;; small_stmt (';' small_stmt)* small_stmt_list : small_stmt | small_stmt_list SEMICOLON small_stmt ; small_stmt : expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt ; ;;;============================================================================ ;;;@@ print_stmt ;;;============================================================================ ;; print_stmt: 'print' [ test (',' test)* [','] ] ;; | '>>' test [ (',' test)+ [','] ] print_stmt : PRINT print_stmt_trailer (CODE-TAG $1 nil) ; ;; [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ] print_stmt_trailer : test_list_opt () | GTGT test trailing_test_list_with_opt_comma_opt () ; ;; [ (',' test)+ [','] ] trailing_test_list_with_opt_comma_opt : ;;EMPTY | trailing_test_list comma_opt () ; ;; (',' test)+ trailing_test_list : COMMA test () | trailing_test_list COMMA test () ; ;;;============================================================================ ;;;@@ expr_stmt ;;;============================================================================ ;; expr_stmt: testlist (augassign testlist | ('=' testlist)*) expr_stmt : testlist expr_stmt_trailer (if (and $2 (stringp $1) (string-match "^\\(\\sw\\|\\s_\\)+$" $1)) ;; If this is an assignment statement and left side is a symbol, ;; then generate a 'variable token, else return 'code token. (VARIABLE-TAG $1 nil nil) (CODE-TAG $1 nil)) ; ;; Could be EMPTY because of eq_testlist_zom. ;; (augassign testlist | ('=' testlist)*) expr_stmt_trailer : augassign testlist | eq_testlist_zom ; ;; Could be EMPTY! ;; ('=' testlist)* eq_testlist_zom : ;;EMPTY | eq_testlist_zom ASSIGN testlist (identity $3) ; ;; augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' ;; | '<<=' | '>>=' | '**=' | '//=' augassign : PLUSEQ | MINUSEQ | MULTEQ | DIVEQ | MODEQ | AMPEQ | OREQ | HATEQ | LTLTEQ | GTGTEQ | EXPEQ | DIVDIVEQ ; ;;;============================================================================ ;;;@@ del_stmt ;;;============================================================================ ;; del_stmt: 'del' exprlist del_stmt : DEL exprlist (CODE-TAG $1 nil) ; ;; exprlist: expr (',' expr)* [','] exprlist : expr_list comma_opt () ; ;; expr (',' expr)* expr_list : expr () | expr_list COMMA expr () ; ;;;============================================================================ ;;;@@ pass_stmt ;;;============================================================================ ;; pass_stmt: 'pass' pass_stmt : PASS (CODE-TAG $1 nil) ; ;;;============================================================================ ;;;@@ flow_stmt ;;;============================================================================ flow_stmt : break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt ; ;; break_stmt: 'break' break_stmt : BREAK (CODE-TAG $1 nil) ; ;; continue_stmt: 'continue' continue_stmt : CONTINUE (CODE-TAG $1 nil) ; ;; return_stmt: 'return' [testlist] return_stmt : RETURN testlist_opt (CODE-TAG $1 nil) ; ;; [testlist] testlist_opt : ;;EMPTY | testlist () ; ;; yield_stmt: 'yield' testlist yield_stmt : YIELD testlist (CODE-TAG $1 nil) ; ;; raise_stmt: 'raise' [test [',' test [',' test]]] raise_stmt : RAISE zero_one_two_or_three_tests (CODE-TAG $1 nil) ; ;; [test [',' test [',' test]]] zero_one_two_or_three_tests : ;;EMPTY | test zero_one_or_two_tests () ; ;; [',' test [',' test]] zero_one_or_two_tests : ;;EMPTY | COMMA test zero_or_one_comma_test () ; ;; [',' test] zero_or_one_comma_test : ;;EMPTY | COMMA test () ; ;;;============================================================================ ;;;@@ import_stmt ;;;============================================================================ ;; import_stmt : 'import' dotted_as_name (',' dotted_as_name)* ;; | 'from' dotted_name 'import' ;; ('*' | import_as_name (',' import_as_name)*) import_stmt : IMPORT dotted_as_name_list (INCLUDE-TAG $2 nil) | FROM dotted_name IMPORT star_or_import_as_name_list (INCLUDE-TAG $2 nil) ; ;; dotted_as_name (',' dotted_as_name)* dotted_as_name_list : dotted_as_name | dotted_as_name_list COMMA dotted_as_name ; ;; ('*' | import_as_name (',' import_as_name)*) star_or_import_as_name_list : MULT () | import_as_name_list () ; ;; import_as_name (',' import_as_name)* import_as_name_list : import_as_name () | import_as_name_list COMMA import_as_name () ; ;; import_as_name: NAME [NAME NAME] import_as_name : NAME name_name_opt () ; ;; dotted_as_name: dotted_name [NAME NAME] dotted_as_name : dotted_name name_name_opt ; ;; [NAME NAME] name_name_opt : ;;EMPTY | NAME NAME () ; ;; dotted_name: NAME ('.' NAME)* dotted_name : NAME | dotted_name PERIOD NAME (format "%s.%s" $1 $3) ; ;;;============================================================================ ;;;@@ global_stmt ;;;============================================================================ ;; global_stmt: 'global' NAME (',' NAME)* global_stmt : GLOBAL comma_sep_name_list (CODE-TAG $1 nil) ; ;; NAME (',' NAME)* comma_sep_name_list : NAME | comma_sep_name_list COMMA NAME ; ;;;============================================================================ ;;;@@ exec_stmt ;;;============================================================================ ;; exec_stmt: 'exec' expr ['in' test [',' test]] exec_stmt : EXEC expr exec_trailer (CODE-TAG $1 nil) ; ;; ['in' test [',' test]] exec_trailer : ;;EMPTY | IN test comma_test_opt () ; ;; [',' test] comma_test_opt : ;;EMPTY | COMMA test () ; ;;;============================================================================ ;;;@@ assert_stmt ;;;============================================================================ ;; assert_stmt: 'assert' test [',' test] assert_stmt : ASSERT test comma_test_opt (CODE-TAG $1 nil) ; ;;;**************************************************************************** ;;;@ compound_stmt ;;;**************************************************************************** compound_stmt : if_stmt | while_stmt | for_stmt | try_stmt | funcdef | class_declaration ; ;;;============================================================================ ;;;@@ if_stmt ;;;============================================================================ ;; if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] if_stmt : IF test COLON suite elif_suite_pair_list else_suite_pair_opt (CODE-TAG $1 nil) ; ;; ('elif' test ':' suite)* elif_suite_pair_list : ;;EMPTY | elif_suite_pair_list ELIF test COLON suite () ; ;; ['else' ':' suite] else_suite_pair_opt : ;;EMPTY | ELSE COLON suite () ; ;; This NT follows the COLON token for most compound statements. ;; suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT suite : simple_stmt (list $1) | NEWLINE indented_block (progn $2) ; indented_block : INDENT_BLOCK (EXPANDFULL $1 indented_block_body) ; indented_block_body : INDENT () | DEDENT () | simple_stmt | compound_stmt ; ;;;============================================================================ ;;;@@ while_stmt ;;;============================================================================ ;; while_stmt: 'while' test ':' suite ['else' ':' suite] while_stmt : WHILE test COLON suite else_suite_pair_opt (CODE-TAG $1 nil) ; ;;;============================================================================ ;;;@@ for_stmt ;;;============================================================================ ;; for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] for_stmt : FOR exprlist IN testlist COLON suite else_suite_pair_opt (CODE-TAG $1 nil) ; ;;;============================================================================ ;;;@@ try_stmt ;;;============================================================================ ;; try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break ;; ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite) try_stmt : TRY COLON suite except_clause_suite_pair_list else_suite_pair_opt (CODE-TAG $1 nil) | TRY COLON suite FINALLY COLON suite (CODE-TAG $1 nil) ; ;; (except_clause ':' suite)+ except_clause_suite_pair_list : except_clause COLON suite () | except_clause_suite_pair_list except_clause COLON suite () ; ;; # NB compile.c makes sure that the default except clause is last ;; except_clause: 'except' [test [',' test]] except_clause : EXCEPT zero_one_or_two_test () ; ;; [test [',' test]] zero_one_or_two_test : ;;EMPTY | test zero_or_one_comma_test () ; ;;;============================================================================ ;;;@@ funcdef ;;;============================================================================ ;; funcdef: 'def' NAME parameters ':' suite funcdef : DEF NAME function_parameter_list COLON suite (FUNCTION-TAG $2 nil $3) ; function_parameter_list : PAREN_BLOCK (let ((wisent-python-EXPANDING-block t)) (EXPANDFULL $1 function_parameters)) ; ;; parameters: '(' [varargslist] ')' function_parameters : LPAREN () | RPAREN () | function_parameter COMMA | function_parameter RPAREN ; function_parameter : fpdef_opt_test ;; : NAME ;; (VARIABLE-TAG $1 nil nil) | MULT NAME (VARIABLE-TAG $2 nil nil) | EXPONENT NAME (VARIABLE-TAG $2 nil nil) ; ;;;============================================================================ ;;;@@ class_declaration ;;;============================================================================ ;; classdef: 'class' NAME ['(' testlist ')'] ':' suite class_declaration : CLASS NAME paren_class_list_opt COLON suite (TYPE-TAG $2 $1 ;; Name "class" $5 ;; Members (cons $3 nil) ;; (SUPERCLASSES . INTERFACES) ) ; ;; ['(' testlist ')'] paren_class_list_opt : ;;EMPTY | paren_class_list ; paren_class_list : PAREN_BLOCK (let ((wisent-python-EXPANDING-block t)) (mapcar 'semantic-tag-name (EXPANDFULL $1 paren_classes))) ; ;; parameters: '(' [varargslist] ')' paren_classes : LPAREN () | RPAREN () | paren_class COMMA (VARIABLE-TAG $1 nil nil) | paren_class RPAREN (VARIABLE-TAG $1 nil nil) ; ;; In general, the base class can be specified by a general expression ;; which evalue to a class object, i.e., base classes are not just names! ;; However base classes are names in most cases. Thus the ;; non-terminals below work only with simple names. Even if the ;; parser can parse general expressions, I don't see much benefit in ;; generating a string of expression as base class "name". paren_class : dotted_name ; ;;;**************************************************************************** ;;;@ test ;;;**************************************************************************** ;; test: and_test ('or' and_test)* | lambdef test : test_test | lambdef ; ;; and_test ('or' and_test)* test_test : and_test | test_test OR and_test () ; ;; and_test: not_test ('and' not_test)* and_test : not_test | and_test AND not_test () ; ;; not_test: 'not' not_test | comparison not_test : NOT not_test () | comparison ; ;; comparison: expr (comp_op expr)* comparison : expr | comparison comp_op expr () ; ;; comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' comp_op : LT | GT | EQ | GE | LE | LTGT | NE | IN | NOT IN | IS | IS NOT ; ;; expr: xor_expr ('|' xor_expr)* expr : xor_expr | expr BAR xor_expr () ; ;; xor_expr: and_expr ('^' and_expr)* xor_expr : and_expr | xor_expr HAT and_expr () ; ;; and_expr: shift_expr ('&' shift_expr)* and_expr : shift_expr | and_expr AMP shift_expr () ; ;; shift_expr: arith_expr (('<<'|'>>') arith_expr)* shift_expr : arith_expr | shift_expr shift_expr_operators arith_expr () ; ;; ('<<'|'>>') shift_expr_operators : LTLT | GTGT ; ;; arith_expr: term (('+'|'-') term)* arith_expr : term | arith_expr plus_or_minus term () ; ;; ('+'|'-') plus_or_minus : PLUS | MINUS ; ;; term: factor (('*'|'/'|'%'|'//') factor)* term : factor | term term_operator factor () ; term_operator : MULT | DIV | MOD | DIVDIV ; ;; factor: ('+'|'-'|'~') factor | power factor : prefix_operators factor () | power ; ;; ('+'|'-'|'~') prefix_operators : PLUS | MINUS | TILDE ; ;; power: atom trailer* ('**' factor)* power : atom trailer_zom exponent_zom (concat $1 (if $2 (concat " " $2 " ") "") (if $3 (concat " " $3) "") ) ; trailer_zom : ;;EMPTY | trailer_zom trailer () ; exponent_zom : ;;EMPTY | exponent_zom EXPONENT factor () ; ;; trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer : PAREN_BLOCK () | BRACK_BLOCK () | PERIOD NAME () ; ;; atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' ;; | '`' testlist '`' | NAME | NUMBER | STRING+ atom : PAREN_BLOCK () | BRACK_BLOCK () | BRACE_BLOCK () | BACKQUOTE testlist BACKQUOTE () | NAME | NUMBER_LITERAL | one_or_more_string ; test_list_opt : ;;EMPTY | testlist () ; ;; testlist: test (',' test)* [','] testlist : comma_sep_test_list comma_opt ; ;; test (',' test)* comma_sep_test_list : test | comma_sep_test_list COMMA test (format "%s, %s" $1 $3) ; ;; (read $1) and (read $2) were done before to peel away the double quotes. ;; However that does not work for single quotes, so it was taken out. one_or_more_string : STRING_LITERAL | one_or_more_string STRING_LITERAL (concat $1 $2) ; ;;;**************************************************************************** ;;;@ lambdef ;;;**************************************************************************** ;; lambdef: 'lambda' [varargslist] ':' test lambdef : LAMBDA varargslist_opt COLON test (format "%s %s" $1 (or $2 "")) ; ;; [varargslist] varargslist_opt : ;;EMPTY | varargslist ; ;; varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) ;; | fpdef ['=' test] (',' fpdef ['=' test])* [','] varargslist : fpdef_opt_test_list_comma_zom rest_args (nconc $2 $1) | fpdef_opt_test_list comma_opt ; ;; ('*' NAME [',' '**' NAME] | '**' NAME) rest_args : MULT NAME multmult_name_opt () ;;(VARIABLE-TAG $2 nil nil) | EXPONENT NAME () ;;(VARIABLE-TAG $2 nil nil) ; ;; [',' '**' NAME] multmult_name_opt : ;;EMPTY | COMMA EXPONENT NAME (VARIABLE-TAG $3 nil nil) ; fpdef_opt_test_list_comma_zom : ;;EMPTY | fpdef_opt_test_list_comma_zom fpdef_opt_test COMMA (nconc $2 $1) ; ;; fpdef ['=' test] (',' fpdef ['=' test])* fpdef_opt_test_list : fpdef_opt_test | fpdef_opt_test_list COMMA fpdef_opt_test (nconc $3 $1) ; ;; fpdef ['=' test] fpdef_opt_test : fpdef eq_test_opt ; ;; fpdef: NAME | '(' fplist ')' fpdef : NAME (VARIABLE-TAG $1 nil nil) ;; Below breaks the parser. Don't know why, but my guess is that ;; LPAREN/RPAREN clashes with the ones in function_parameters. ;; | LPAREN fplist RPAREN ;; (identity $2) ; ;; fplist: fpdef (',' fpdef)* [','] fplist : fpdef_list comma_opt ; ;; fpdef (',' fpdef)* fpdef_list : fpdef | fpdef_list COMMA fpdef ; ;; ['=' test] eq_test_opt : ;;EMPTY | ASSIGN test () ; ;;;**************************************************************************** ;;;@ Misc ;;;**************************************************************************** ;; [','] comma_opt : ;;EMPTY | COMMA ; ;; [';'] semicolon_opt : ;;EMPTY | SEMICOLON ; ;;; wisent-python.wy ends here