add read me
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
from .latex_parser import parse_latex_lark, LarkLaTeXParser # noqa
|
||||
from .transformer import TransformToSymPyExpr # noqa
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,28 @@
|
||||
// Greek symbols
|
||||
// TODO: Shouold we include the uppercase variants for the symbols where the uppercase variant doesn't have a separate meaning?
|
||||
ALPHA: "\\alpha"
|
||||
BETA: "\\beta"
|
||||
GAMMA: "\\gamma"
|
||||
DELTA: "\\delta" // TODO: Should this be included? Delta usually denotes other things.
|
||||
EPSILON: "\\epsilon" | "\\varepsilon"
|
||||
ZETA: "\\zeta"
|
||||
ETA: "\\eta"
|
||||
THETA: "\\theta" | "\\vartheta"
|
||||
// TODO: Should I add iota to the list?
|
||||
KAPPA: "\\kappa"
|
||||
LAMBDA: "\\lambda" // TODO: What about the uppercase variant?
|
||||
MU: "\\mu"
|
||||
NU: "\\nu"
|
||||
XI: "\\xi"
|
||||
// TODO: Should there be a separate note for transforming \pi into sympy.pi?
|
||||
RHO: "\\rho" | "\\varrho"
|
||||
// TODO: What should we do about sigma?
|
||||
TAU: "\\tau"
|
||||
UPSILON: "\\upsilon"
|
||||
PHI: "\\phi" | "\\varphi"
|
||||
CHI: "\\chi"
|
||||
PSI: "\\psi"
|
||||
OMEGA: "\\omega"
|
||||
|
||||
GREEK_SYMBOL: ALPHA | BETA | GAMMA | DELTA | EPSILON | ZETA | ETA | THETA | KAPPA
|
||||
| LAMBDA | MU | NU | XI | RHO | TAU | UPSILON | PHI | CHI | PSI | OMEGA
|
||||
@@ -0,0 +1,403 @@
|
||||
%ignore /[ \t\n\r]+/
|
||||
|
||||
%ignore "\\," | "\\thinspace" | "\\:" | "\\medspace" | "\\;" | "\\thickspace"
|
||||
%ignore "\\quad" | "\\qquad"
|
||||
%ignore "\\!" | "\\negthinspace" | "\\negmedspace" | "\\negthickspace"
|
||||
%ignore "\\vrule" | "\\vcenter" | "\\vbox" | "\\vskip" | "\\vspace" | "\\hfill"
|
||||
%ignore "\\*" | "\\-" | "\\." | "\\/" | "\\(" | "\\="
|
||||
|
||||
%ignore "\\left" | "\\right"
|
||||
%ignore "\\limits" | "\\nolimits"
|
||||
%ignore "\\displaystyle"
|
||||
|
||||
///////////////////// tokens ///////////////////////
|
||||
|
||||
// basic binary operators
|
||||
ADD: "+"
|
||||
SUB: "-"
|
||||
MUL: "*"
|
||||
DIV: "/"
|
||||
|
||||
// tokens with distinct left and right symbols
|
||||
L_BRACE: "{"
|
||||
R_BRACE: "}"
|
||||
L_BRACE_LITERAL: "\\{"
|
||||
R_BRACE_LITERAL: "\\}"
|
||||
L_BRACKET: "["
|
||||
R_BRACKET: "]"
|
||||
L_CEIL: "\\lceil"
|
||||
R_CEIL: "\\rceil"
|
||||
L_FLOOR: "\\lfloor"
|
||||
R_FLOOR: "\\rfloor"
|
||||
L_PAREN: "("
|
||||
R_PAREN: ")"
|
||||
|
||||
// limit, integral, sum, and product symbols
|
||||
FUNC_LIM: "\\lim"
|
||||
LIM_APPROACH_SYM: "\\to" | "\\rightarrow" | "\\Rightarrow" | "\\longrightarrow" | "\\Longrightarrow"
|
||||
FUNC_INT: "\\int" | "\\intop"
|
||||
FUNC_SUM: "\\sum"
|
||||
FUNC_PROD: "\\prod"
|
||||
|
||||
// common functions
|
||||
FUNC_EXP: "\\exp"
|
||||
FUNC_LOG: "\\log"
|
||||
FUNC_LN: "\\ln"
|
||||
FUNC_LG: "\\lg"
|
||||
FUNC_MIN: "\\min"
|
||||
FUNC_MAX: "\\max"
|
||||
|
||||
// trigonometric functions
|
||||
FUNC_SIN: "\\sin"
|
||||
FUNC_COS: "\\cos"
|
||||
FUNC_TAN: "\\tan"
|
||||
FUNC_CSC: "\\csc"
|
||||
FUNC_SEC: "\\sec"
|
||||
FUNC_COT: "\\cot"
|
||||
|
||||
// inverse trigonometric functions
|
||||
FUNC_ARCSIN: "\\arcsin"
|
||||
FUNC_ARCCOS: "\\arccos"
|
||||
FUNC_ARCTAN: "\\arctan"
|
||||
FUNC_ARCCSC: "\\arccsc"
|
||||
FUNC_ARCSEC: "\\arcsec"
|
||||
FUNC_ARCCOT: "\\arccot"
|
||||
|
||||
// hyperbolic trigonometric functions
|
||||
FUNC_SINH: "\\sinh"
|
||||
FUNC_COSH: "\\cosh"
|
||||
FUNC_TANH: "\\tanh"
|
||||
FUNC_ARSINH: "\\arsinh"
|
||||
FUNC_ARCOSH: "\\arcosh"
|
||||
FUNC_ARTANH: "\\artanh"
|
||||
|
||||
FUNC_SQRT: "\\sqrt"
|
||||
|
||||
// miscellaneous symbols
|
||||
CMD_TIMES: "\\times"
|
||||
CMD_CDOT: "\\cdot"
|
||||
CMD_DIV: "\\div"
|
||||
CMD_FRAC: "\\frac" | "\\dfrac" | "\\tfrac" | "\\nicefrac"
|
||||
CMD_BINOM: "\\binom" | "\\dbinom" | "\\tbinom"
|
||||
CMD_OVERLINE: "\\overline"
|
||||
CMD_LANGLE: "\\langle"
|
||||
CMD_RANGLE: "\\rangle"
|
||||
|
||||
CMD_MATHIT: "\\mathit"
|
||||
|
||||
CMD_INFTY: "\\infty"
|
||||
|
||||
BANG: "!"
|
||||
BAR: "|"
|
||||
CARET: "^"
|
||||
COLON: ":"
|
||||
UNDERSCORE: "_"
|
||||
|
||||
// relational symbols
|
||||
EQUAL: "="
|
||||
NOT_EQUAL: "\\neq" | "\\ne"
|
||||
LT: "<"
|
||||
LTE: "\\leq" | "\\le" | "\\leqslant"
|
||||
GT: ">"
|
||||
GTE: "\\geq" | "\\ge" | "\\geqslant"
|
||||
|
||||
DIV_SYMBOL: CMD_DIV | DIV
|
||||
MUL_SYMBOL: MUL | CMD_TIMES | CMD_CDOT
|
||||
|
||||
%import .greek_symbols.GREEK_SYMBOL
|
||||
|
||||
UPRIGHT_DIFFERENTIAL_SYMBOL: "\\text{d}" | "\\mathrm{d}"
|
||||
DIFFERENTIAL_SYMBOL: "d" | UPRIGHT_DIFFERENTIAL_SYMBOL
|
||||
|
||||
// disallow "d" as a variable name because we want to parse "d" as a differential symbol.
|
||||
SYMBOL: /[a-zA-Z]'*/
|
||||
GREEK_SYMBOL_WITH_PRIMES: GREEK_SYMBOL "'"*
|
||||
LATIN_SYMBOL_WITH_LATIN_SUBSCRIPT: /([a-zA-Z]'*)_(([A-Za-z0-9]|[a-zA-Z]+)|\{([A-Za-z0-9]|[a-zA-Z]+'*)\})/
|
||||
LATIN_SYMBOL_WITH_GREEK_SUBSCRIPT: /([a-zA-Z]'*)_/ GREEK_SYMBOL | /([a-zA-Z]'*)_/ L_BRACE GREEK_SYMBOL_WITH_PRIMES R_BRACE
|
||||
// best to define the variant with braces like that instead of shoving it all into one case like in
|
||||
// /([a-zA-Z])_/ L_BRACE? GREEK_SYMBOL R_BRACE? because then we can easily error out on input like
|
||||
// r"h_{\theta"
|
||||
GREEK_SYMBOL_WITH_LATIN_SUBSCRIPT: GREEK_SYMBOL_WITH_PRIMES /_(([A-Za-z0-9]|[a-zA-Z]+)|\{([A-Za-z0-9]|[a-zA-Z]+'*)\})/
|
||||
GREEK_SYMBOL_WITH_GREEK_SUBSCRIPT: GREEK_SYMBOL_WITH_PRIMES /_/ (GREEK_SYMBOL | L_BRACE GREEK_SYMBOL_WITH_PRIMES R_BRACE)
|
||||
MULTI_LETTER_SYMBOL: /[a-zA-Z]+(\s+[a-zA-Z]+)*'*/
|
||||
|
||||
%import common.DIGIT -> DIGIT
|
||||
|
||||
CMD_PRIME: "\\prime"
|
||||
CMD_ASTERISK: "\\ast"
|
||||
|
||||
PRIMES: "'"+
|
||||
STARS: "*"+
|
||||
PRIMES_VIA_CMD: CMD_PRIME+
|
||||
STARS_VIA_CMD: CMD_ASTERISK+
|
||||
|
||||
CMD_IMAGINARY_UNIT: "\\imaginaryunit"
|
||||
|
||||
CMD_BEGIN: "\\begin"
|
||||
CMD_END: "\\end"
|
||||
|
||||
// matrices
|
||||
IGNORE_L: /[ \t\n\r]*/ L_BRACE* /[ \t\n\r]*/
|
||||
IGNORE_R: /[ \t\n\r]*/ R_BRACE* /[ \t\n\r]*/
|
||||
ARRAY_MATRIX_BEGIN: L_BRACE "array" R_BRACE L_BRACE /[^}]*/ R_BRACE
|
||||
ARRAY_MATRIX_END: L_BRACE "array" R_BRACE
|
||||
AMSMATH_MATRIX: L_BRACE "matrix" R_BRACE
|
||||
AMSMATH_PMATRIX: L_BRACE "pmatrix" R_BRACE
|
||||
AMSMATH_BMATRIX: L_BRACE "bmatrix" R_BRACE
|
||||
// Without the (L|R)_PARENs and (L|R)_BRACKETs, a matrix defined using
|
||||
// \begin{array}...\end{array} or \begin{matrix}...\end{matrix} must
|
||||
// not qualify as a complete matrix expression; this is done so that
|
||||
// if we have \begin{array}...\end{array} or \begin{matrix}...\end{matrix}
|
||||
// between BAR pairs, then they should be interpreted as determinants as
|
||||
// opposed to sympy.Abs (absolute value) applied to a matrix.
|
||||
CMD_BEGIN_AMSPMATRIX_AMSBMATRIX: CMD_BEGIN (AMSMATH_PMATRIX | AMSMATH_BMATRIX)
|
||||
CMD_BEGIN_ARRAY_AMSMATRIX: (L_PAREN | L_BRACKET) IGNORE_L CMD_BEGIN (ARRAY_MATRIX_BEGIN | AMSMATH_MATRIX)
|
||||
CMD_MATRIX_BEGIN: CMD_BEGIN_AMSPMATRIX_AMSBMATRIX | CMD_BEGIN_ARRAY_AMSMATRIX
|
||||
CMD_END_AMSPMATRIX_AMSBMATRIX: CMD_END (AMSMATH_PMATRIX | AMSMATH_BMATRIX)
|
||||
CMD_END_ARRAY_AMSMATRIX: CMD_END (ARRAY_MATRIX_END | AMSMATH_MATRIX) IGNORE_R "\\right"? (R_PAREN | R_BRACKET)
|
||||
CMD_MATRIX_END: CMD_END_AMSPMATRIX_AMSBMATRIX | CMD_END_ARRAY_AMSMATRIX
|
||||
MATRIX_COL_DELIM: "&"
|
||||
MATRIX_ROW_DELIM: "\\\\"
|
||||
FUNC_MATRIX_TRACE: "\\trace"
|
||||
FUNC_MATRIX_ADJUGATE: "\\adjugate"
|
||||
|
||||
// determinants
|
||||
AMSMATH_VMATRIX: L_BRACE "vmatrix" R_BRACE
|
||||
CMD_DETERMINANT_BEGIN_SIMPLE: CMD_BEGIN AMSMATH_VMATRIX
|
||||
CMD_DETERMINANT_BEGIN_VARIANT: BAR IGNORE_L CMD_BEGIN (ARRAY_MATRIX_BEGIN | AMSMATH_MATRIX)
|
||||
CMD_DETERMINANT_BEGIN: CMD_DETERMINANT_BEGIN_SIMPLE | CMD_DETERMINANT_BEGIN_VARIANT
|
||||
CMD_DETERMINANT_END_SIMPLE: CMD_END AMSMATH_VMATRIX
|
||||
CMD_DETERMINANT_END_VARIANT: CMD_END (ARRAY_MATRIX_END | AMSMATH_MATRIX) IGNORE_R "\\right"? BAR
|
||||
CMD_DETERMINANT_END: CMD_DETERMINANT_END_SIMPLE | CMD_DETERMINANT_END_VARIANT
|
||||
FUNC_DETERMINANT: "\\det"
|
||||
|
||||
//////////////////// grammar //////////////////////
|
||||
|
||||
latex_string: _relation | _expression
|
||||
|
||||
_one_letter_symbol: SYMBOL
|
||||
| LATIN_SYMBOL_WITH_LATIN_SUBSCRIPT
|
||||
| LATIN_SYMBOL_WITH_GREEK_SUBSCRIPT
|
||||
| GREEK_SYMBOL_WITH_LATIN_SUBSCRIPT
|
||||
| GREEK_SYMBOL_WITH_GREEK_SUBSCRIPT
|
||||
| GREEK_SYMBOL_WITH_PRIMES
|
||||
// LuaTeX-generated outputs of \mathit{foo'} and \mathit{foo}'
|
||||
// seem to be the same on the surface. We allow both styles.
|
||||
multi_letter_symbol: CMD_MATHIT L_BRACE MULTI_LETTER_SYMBOL R_BRACE
|
||||
| CMD_MATHIT L_BRACE MULTI_LETTER_SYMBOL R_BRACE /'+/
|
||||
number: /\d+(\.\d*)?/ | CMD_IMAGINARY_UNIT
|
||||
|
||||
_atomic_expr: _one_letter_symbol
|
||||
| multi_letter_symbol
|
||||
| number
|
||||
| CMD_INFTY
|
||||
|
||||
group_round_parentheses: L_PAREN _expression R_PAREN
|
||||
group_square_brackets: L_BRACKET _expression R_BRACKET
|
||||
group_curly_parentheses: L_BRACE _expression R_BRACE
|
||||
|
||||
_relation: eq | ne | lt | lte | gt | gte
|
||||
|
||||
eq: _expression EQUAL _expression
|
||||
ne: _expression NOT_EQUAL _expression
|
||||
lt: _expression LT _expression
|
||||
lte: _expression LTE _expression
|
||||
gt: _expression GT _expression
|
||||
gte: _expression GTE _expression
|
||||
|
||||
_expression_core: _atomic_expr | group_curly_parentheses
|
||||
|
||||
add: _expression ADD _expression_mul
|
||||
| ADD _expression_mul
|
||||
sub: _expression SUB _expression_mul
|
||||
| SUB _expression_mul
|
||||
mul: _expression_mul MUL_SYMBOL _expression_power
|
||||
div: _expression_mul DIV_SYMBOL _expression_power
|
||||
|
||||
adjacent_expressions: (_one_letter_symbol | number) _expression_mul
|
||||
| group_round_parentheses (group_round_parentheses | _one_letter_symbol)
|
||||
| _function _function
|
||||
| fraction _expression_mul
|
||||
|
||||
_expression_func: _expression_core
|
||||
| group_round_parentheses
|
||||
| fraction
|
||||
| binomial
|
||||
| _function
|
||||
| _integral// | derivative
|
||||
| limit
|
||||
| matrix
|
||||
|
||||
_expression_power: _expression_func | superscript | matrix_prime | symbol_prime
|
||||
|
||||
_expression_mul: _expression_power
|
||||
| mul | div | adjacent_expressions
|
||||
| summation | product
|
||||
|
||||
_expression: _expression_mul | add | sub
|
||||
|
||||
_limit_dir: "+" | "-" | L_BRACE ("+" | "-") R_BRACE
|
||||
|
||||
limit_dir_expr: _expression CARET _limit_dir
|
||||
|
||||
group_curly_parentheses_lim: L_BRACE _expression LIM_APPROACH_SYM (limit_dir_expr | _expression) R_BRACE
|
||||
|
||||
limit: FUNC_LIM UNDERSCORE group_curly_parentheses_lim _expression
|
||||
|
||||
differential: DIFFERENTIAL_SYMBOL _one_letter_symbol
|
||||
|
||||
//_derivative_operator: CMD_FRAC L_BRACE DIFFERENTIAL_SYMBOL R_BRACE L_BRACE differential R_BRACE
|
||||
|
||||
//derivative: _derivative_operator _expression
|
||||
|
||||
_integral: normal_integral | integral_with_special_fraction
|
||||
|
||||
normal_integral: FUNC_INT _expression DIFFERENTIAL_SYMBOL _one_letter_symbol
|
||||
| FUNC_INT (CARET _expression_core UNDERSCORE _expression_core)? _expression? DIFFERENTIAL_SYMBOL _one_letter_symbol
|
||||
| FUNC_INT (UNDERSCORE _expression_core CARET _expression_core)? _expression? DIFFERENTIAL_SYMBOL _one_letter_symbol
|
||||
|
||||
group_curly_parentheses_int: L_BRACE _expression? differential R_BRACE
|
||||
|
||||
special_fraction: CMD_FRAC group_curly_parentheses_int group_curly_parentheses
|
||||
|
||||
integral_with_special_fraction: FUNC_INT special_fraction
|
||||
| FUNC_INT (CARET _expression_core UNDERSCORE _expression_core)? special_fraction
|
||||
| FUNC_INT (UNDERSCORE _expression_core CARET _expression_core)? special_fraction
|
||||
|
||||
group_curly_parentheses_special: UNDERSCORE L_BRACE _atomic_expr EQUAL _atomic_expr R_BRACE CARET _expression_core
|
||||
| CARET _expression_core UNDERSCORE L_BRACE _atomic_expr EQUAL _atomic_expr R_BRACE
|
||||
|
||||
summation: FUNC_SUM group_curly_parentheses_special _expression
|
||||
| FUNC_SUM group_curly_parentheses_special _expression
|
||||
|
||||
product: FUNC_PROD group_curly_parentheses_special _expression
|
||||
| FUNC_PROD group_curly_parentheses_special _expression
|
||||
|
||||
superscript: _expression_func CARET (_expression_power | CMD_PRIME | CMD_ASTERISK)
|
||||
| _expression_func CARET L_BRACE (PRIMES | STARS | PRIMES_VIA_CMD | STARS_VIA_CMD) R_BRACE
|
||||
|
||||
matrix_prime: (matrix | group_round_parentheses) PRIMES
|
||||
|
||||
symbol_prime: (LATIN_SYMBOL_WITH_LATIN_SUBSCRIPT
|
||||
| LATIN_SYMBOL_WITH_GREEK_SUBSCRIPT
|
||||
| GREEK_SYMBOL_WITH_LATIN_SUBSCRIPT
|
||||
| GREEK_SYMBOL_WITH_GREEK_SUBSCRIPT) PRIMES
|
||||
|
||||
fraction: _basic_fraction
|
||||
| _simple_fraction
|
||||
| _general_fraction
|
||||
|
||||
_basic_fraction: CMD_FRAC DIGIT (DIGIT | SYMBOL | GREEK_SYMBOL_WITH_PRIMES)
|
||||
|
||||
_simple_fraction: CMD_FRAC DIGIT group_curly_parentheses
|
||||
| CMD_FRAC group_curly_parentheses (DIGIT | SYMBOL | GREEK_SYMBOL_WITH_PRIMES)
|
||||
|
||||
_general_fraction: CMD_FRAC group_curly_parentheses group_curly_parentheses
|
||||
|
||||
binomial: _basic_binomial
|
||||
| _simple_binomial
|
||||
| _general_binomial
|
||||
|
||||
_basic_binomial: CMD_BINOM DIGIT (DIGIT | SYMBOL | GREEK_SYMBOL_WITH_PRIMES)
|
||||
|
||||
_simple_binomial: CMD_BINOM DIGIT group_curly_parentheses
|
||||
| CMD_BINOM group_curly_parentheses (DIGIT | SYMBOL | GREEK_SYMBOL_WITH_PRIMES)
|
||||
|
||||
_general_binomial: CMD_BINOM group_curly_parentheses group_curly_parentheses
|
||||
|
||||
list_of_expressions: _expression ("," _expression)*
|
||||
|
||||
function_applied: _one_letter_symbol L_PAREN list_of_expressions R_PAREN
|
||||
|
||||
min: FUNC_MIN L_PAREN list_of_expressions R_PAREN
|
||||
|
||||
max: FUNC_MAX L_PAREN list_of_expressions R_PAREN
|
||||
|
||||
bra: CMD_LANGLE _expression BAR
|
||||
|
||||
ket: BAR _expression CMD_RANGLE
|
||||
|
||||
inner_product: CMD_LANGLE _expression BAR _expression CMD_RANGLE
|
||||
|
||||
_function: function_applied
|
||||
| abs | floor | ceil
|
||||
| _trigonometric_function | _inverse_trigonometric_function
|
||||
| _trigonometric_function_power
|
||||
| _hyperbolic_trigonometric_function | _inverse_hyperbolic_trigonometric_function
|
||||
| exponential
|
||||
| log
|
||||
| square_root
|
||||
| factorial
|
||||
| conjugate
|
||||
| max | min
|
||||
| bra | ket | inner_product
|
||||
| determinant
|
||||
| trace
|
||||
| adjugate
|
||||
|
||||
exponential: FUNC_EXP _expression
|
||||
|
||||
log: FUNC_LOG _expression
|
||||
| FUNC_LN _expression
|
||||
| FUNC_LG _expression
|
||||
| FUNC_LOG UNDERSCORE (DIGIT | _one_letter_symbol) _expression
|
||||
| FUNC_LOG UNDERSCORE group_curly_parentheses _expression
|
||||
|
||||
square_root: FUNC_SQRT group_curly_parentheses
|
||||
| FUNC_SQRT group_square_brackets group_curly_parentheses
|
||||
|
||||
factorial: _expression_func BANG
|
||||
|
||||
conjugate: CMD_OVERLINE group_curly_parentheses
|
||||
| CMD_OVERLINE DIGIT
|
||||
|
||||
_trigonometric_function: sin | cos | tan | csc | sec | cot
|
||||
|
||||
sin: FUNC_SIN _expression
|
||||
cos: FUNC_COS _expression
|
||||
tan: FUNC_TAN _expression
|
||||
csc: FUNC_CSC _expression
|
||||
sec: FUNC_SEC _expression
|
||||
cot: FUNC_COT _expression
|
||||
|
||||
_trigonometric_function_power: sin_power | cos_power | tan_power | csc_power | sec_power | cot_power
|
||||
|
||||
sin_power: FUNC_SIN CARET _expression_core _expression
|
||||
cos_power: FUNC_COS CARET _expression_core _expression
|
||||
tan_power: FUNC_TAN CARET _expression_core _expression
|
||||
csc_power: FUNC_CSC CARET _expression_core _expression
|
||||
sec_power: FUNC_SEC CARET _expression_core _expression
|
||||
cot_power: FUNC_COT CARET _expression_core _expression
|
||||
|
||||
_hyperbolic_trigonometric_function: sinh | cosh | tanh
|
||||
|
||||
sinh: FUNC_SINH _expression
|
||||
cosh: FUNC_COSH _expression
|
||||
tanh: FUNC_TANH _expression
|
||||
|
||||
_inverse_trigonometric_function: arcsin | arccos | arctan | arccsc | arcsec | arccot
|
||||
|
||||
arcsin: FUNC_ARCSIN _expression
|
||||
arccos: FUNC_ARCCOS _expression
|
||||
arctan: FUNC_ARCTAN _expression
|
||||
arccsc: FUNC_ARCCSC _expression
|
||||
arcsec: FUNC_ARCSEC _expression
|
||||
arccot: FUNC_ARCCOT _expression
|
||||
|
||||
_inverse_hyperbolic_trigonometric_function: asinh | acosh | atanh
|
||||
|
||||
asinh: FUNC_ARSINH _expression
|
||||
acosh: FUNC_ARCOSH _expression
|
||||
atanh: FUNC_ARTANH _expression
|
||||
|
||||
abs: BAR _expression BAR
|
||||
floor: L_FLOOR _expression R_FLOOR
|
||||
ceil: L_CEIL _expression R_CEIL
|
||||
|
||||
matrix: CMD_MATRIX_BEGIN matrix_body CMD_MATRIX_END
|
||||
matrix_body: matrix_row (MATRIX_ROW_DELIM matrix_row)* (MATRIX_ROW_DELIM)?
|
||||
matrix_row: _expression (MATRIX_COL_DELIM _expression)*
|
||||
determinant: (CMD_DETERMINANT_BEGIN matrix_body CMD_DETERMINANT_END)
|
||||
| FUNC_DETERMINANT _expression
|
||||
trace: FUNC_MATRIX_TRACE _expression
|
||||
adjugate: FUNC_MATRIX_ADJUGATE _expression
|
||||
@@ -0,0 +1,145 @@
|
||||
import os
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from sympy.external import import_module
|
||||
from sympy.parsing.latex.lark.transformer import TransformToSymPyExpr
|
||||
|
||||
_lark = import_module("lark")
|
||||
|
||||
|
||||
class LarkLaTeXParser:
|
||||
r"""Class for converting input `\mathrm{\LaTeX}` strings into SymPy Expressions.
|
||||
It holds all the necessary internal data for doing so, and exposes hooks for
|
||||
customizing its behavior.
|
||||
|
||||
Parameters
|
||||
==========
|
||||
|
||||
print_debug_output : bool, optional
|
||||
|
||||
If set to ``True``, prints debug output to the logger. Defaults to ``False``.
|
||||
|
||||
transform : bool, optional
|
||||
|
||||
If set to ``True``, the class runs the Transformer class on the parse tree
|
||||
generated by running ``Lark.parse`` on the input string. Defaults to ``True``.
|
||||
|
||||
Setting it to ``False`` can help with debugging the `\mathrm{\LaTeX}` grammar.
|
||||
|
||||
grammar_file : str, optional
|
||||
|
||||
The path to the grammar file that the parser should use. If set to ``None``,
|
||||
it uses the default grammar, which is in ``grammar/latex.lark``, relative to
|
||||
the ``sympy/parsing/latex/lark/`` directory.
|
||||
|
||||
transformer : str, optional
|
||||
|
||||
The name of the Transformer class to use. If set to ``None``, it uses the
|
||||
default transformer class, which is :py:func:`TransformToSymPyExpr`.
|
||||
|
||||
"""
|
||||
def __init__(self, print_debug_output=False, transform=True, grammar_file=None, transformer=None):
|
||||
grammar_dir_path = os.path.join(os.path.dirname(__file__), "grammar/")
|
||||
|
||||
if grammar_file is None:
|
||||
latex_grammar = Path(os.path.join(grammar_dir_path, "latex.lark")).read_text(encoding="utf-8")
|
||||
else:
|
||||
latex_grammar = Path(grammar_file).read_text(encoding="utf-8")
|
||||
|
||||
self.parser = _lark.Lark(
|
||||
latex_grammar,
|
||||
source_path=grammar_dir_path,
|
||||
parser="earley",
|
||||
start="latex_string",
|
||||
lexer="auto",
|
||||
ambiguity="explicit",
|
||||
propagate_positions=False,
|
||||
maybe_placeholders=False,
|
||||
keep_all_tokens=True)
|
||||
|
||||
self.print_debug_output = print_debug_output
|
||||
self.transform_expr = transform
|
||||
|
||||
if transformer is None:
|
||||
self.transformer = TransformToSymPyExpr()
|
||||
else:
|
||||
self.transformer = transformer()
|
||||
|
||||
def doparse(self, s: str):
|
||||
if self.print_debug_output:
|
||||
_lark.logger.setLevel(logging.DEBUG)
|
||||
|
||||
parse_tree = self.parser.parse(s)
|
||||
|
||||
if not self.transform_expr:
|
||||
# exit early and return the parse tree
|
||||
_lark.logger.debug("expression = %s", s)
|
||||
_lark.logger.debug(parse_tree)
|
||||
_lark.logger.debug(parse_tree.pretty())
|
||||
return parse_tree
|
||||
|
||||
if self.print_debug_output:
|
||||
# print this stuff before attempting to run the transformer
|
||||
_lark.logger.debug("expression = %s", s)
|
||||
# print the `parse_tree` variable
|
||||
_lark.logger.debug(parse_tree.pretty())
|
||||
|
||||
sympy_expression = self.transformer.transform(parse_tree)
|
||||
|
||||
if self.print_debug_output:
|
||||
_lark.logger.debug("SymPy expression = %s", sympy_expression)
|
||||
|
||||
return sympy_expression
|
||||
|
||||
|
||||
if _lark is not None:
|
||||
_lark_latex_parser = LarkLaTeXParser()
|
||||
|
||||
|
||||
def parse_latex_lark(s: str):
|
||||
"""
|
||||
Experimental LaTeX parser using Lark.
|
||||
|
||||
This function is still under development and its API may change with the
|
||||
next releases of SymPy.
|
||||
"""
|
||||
if _lark is None:
|
||||
raise ImportError("Lark is probably not installed")
|
||||
return _lark_latex_parser.doparse(s)
|
||||
|
||||
|
||||
def _pretty_print_lark_trees(tree, indent=0, show_expr=True):
|
||||
if isinstance(tree, _lark.Token):
|
||||
return tree.value
|
||||
|
||||
data = str(tree.data)
|
||||
|
||||
is_expr = data.startswith("expression")
|
||||
|
||||
if is_expr:
|
||||
data = re.sub(r"^expression", "E", data)
|
||||
|
||||
is_ambig = (data == "_ambig")
|
||||
|
||||
if is_ambig:
|
||||
new_indent = indent + 2
|
||||
else:
|
||||
new_indent = indent
|
||||
|
||||
output = ""
|
||||
show_node = not is_expr or show_expr
|
||||
|
||||
if show_node:
|
||||
output += str(data) + "("
|
||||
|
||||
if is_ambig:
|
||||
output += "\n" + "\n".join([" " * new_indent + _pretty_print_lark_trees(i, new_indent, show_expr) for i in tree.children])
|
||||
else:
|
||||
output += ",".join([_pretty_print_lark_trees(i, new_indent, show_expr) for i in tree.children])
|
||||
|
||||
if show_node:
|
||||
output += ")"
|
||||
|
||||
return output
|
||||
@@ -0,0 +1,730 @@
|
||||
import re
|
||||
|
||||
import sympy
|
||||
from sympy.external import import_module
|
||||
from sympy.parsing.latex.errors import LaTeXParsingError
|
||||
|
||||
lark = import_module("lark")
|
||||
|
||||
if lark:
|
||||
from lark import Transformer, Token, Tree # type: ignore
|
||||
else:
|
||||
class Transformer: # type: ignore
|
||||
def transform(self, *args):
|
||||
pass
|
||||
|
||||
|
||||
class Token: # type: ignore
|
||||
pass
|
||||
|
||||
|
||||
class Tree: # type: ignore
|
||||
pass
|
||||
|
||||
|
||||
# noinspection PyPep8Naming,PyMethodMayBeStatic
|
||||
class TransformToSymPyExpr(Transformer):
|
||||
"""Returns a SymPy expression that is generated by traversing the ``lark.Tree``
|
||||
passed to the ``.transform()`` function.
|
||||
|
||||
Notes
|
||||
=====
|
||||
|
||||
**This class is never supposed to be used directly.**
|
||||
|
||||
In order to tweak the behavior of this class, it has to be subclassed and then after
|
||||
the required modifications are made, the name of the new class should be passed to
|
||||
the :py:class:`LarkLaTeXParser` class by using the ``transformer`` argument in the
|
||||
constructor.
|
||||
|
||||
Parameters
|
||||
==========
|
||||
|
||||
visit_tokens : bool, optional
|
||||
For information about what this option does, see `here
|
||||
<https://lark-parser.readthedocs.io/en/latest/visitors.html#lark.visitors.Transformer>`_.
|
||||
|
||||
Note that the option must be set to ``True`` for the default parser to work.
|
||||
"""
|
||||
|
||||
SYMBOL = sympy.Symbol
|
||||
DIGIT = sympy.core.numbers.Integer
|
||||
|
||||
def CMD_INFTY(self, tokens):
|
||||
return sympy.oo
|
||||
|
||||
def GREEK_SYMBOL_WITH_PRIMES(self, tokens):
|
||||
# we omit the first character because it is a backslash. Also, if the variable name has "var" in it,
|
||||
# like "varphi" or "varepsilon", we remove that too
|
||||
variable_name = re.sub("var", "", tokens[1:])
|
||||
|
||||
return sympy.Symbol(variable_name)
|
||||
|
||||
def LATIN_SYMBOL_WITH_LATIN_SUBSCRIPT(self, tokens):
|
||||
base, sub = tokens.value.split("_")
|
||||
if sub.startswith("{"):
|
||||
return sympy.Symbol("%s_{%s}" % (base, sub[1:-1]))
|
||||
else:
|
||||
return sympy.Symbol("%s_{%s}" % (base, sub))
|
||||
|
||||
def GREEK_SYMBOL_WITH_LATIN_SUBSCRIPT(self, tokens):
|
||||
base, sub = tokens.value.split("_")
|
||||
greek_letter = re.sub("var", "", base[1:])
|
||||
|
||||
if sub.startswith("{"):
|
||||
return sympy.Symbol("%s_{%s}" % (greek_letter, sub[1:-1]))
|
||||
else:
|
||||
return sympy.Symbol("%s_{%s}" % (greek_letter, sub))
|
||||
|
||||
def LATIN_SYMBOL_WITH_GREEK_SUBSCRIPT(self, tokens):
|
||||
base, sub = tokens.value.split("_")
|
||||
if sub.startswith("{"):
|
||||
greek_letter = sub[2:-1]
|
||||
else:
|
||||
greek_letter = sub[1:]
|
||||
|
||||
greek_letter = re.sub("var", "", greek_letter)
|
||||
return sympy.Symbol("%s_{%s}" % (base, greek_letter))
|
||||
|
||||
|
||||
def GREEK_SYMBOL_WITH_GREEK_SUBSCRIPT(self, tokens):
|
||||
base, sub = tokens.value.split("_")
|
||||
greek_base = re.sub("var", "", base[1:])
|
||||
|
||||
if sub.startswith("{"):
|
||||
greek_sub = sub[2:-1]
|
||||
else:
|
||||
greek_sub = sub[1:]
|
||||
|
||||
greek_sub = re.sub("var", "", greek_sub)
|
||||
return sympy.Symbol("%s_{%s}" % (greek_base, greek_sub))
|
||||
|
||||
def multi_letter_symbol(self, tokens):
|
||||
if len(tokens) == 4: # no primes (single quotes) on symbol
|
||||
return sympy.Symbol(tokens[2])
|
||||
if len(tokens) == 5: # there are primes on the symbol
|
||||
return sympy.Symbol(tokens[2] + tokens[4])
|
||||
|
||||
def number(self, tokens):
|
||||
if tokens[0].type == "CMD_IMAGINARY_UNIT":
|
||||
return sympy.I
|
||||
|
||||
if "." in tokens[0]:
|
||||
return sympy.core.numbers.Float(tokens[0])
|
||||
else:
|
||||
return sympy.core.numbers.Integer(tokens[0])
|
||||
|
||||
def latex_string(self, tokens):
|
||||
return tokens[0]
|
||||
|
||||
def group_round_parentheses(self, tokens):
|
||||
return tokens[1]
|
||||
|
||||
def group_square_brackets(self, tokens):
|
||||
return tokens[1]
|
||||
|
||||
def group_curly_parentheses(self, tokens):
|
||||
return tokens[1]
|
||||
|
||||
def eq(self, tokens):
|
||||
return sympy.Eq(tokens[0], tokens[2])
|
||||
|
||||
def ne(self, tokens):
|
||||
return sympy.Ne(tokens[0], tokens[2])
|
||||
|
||||
def lt(self, tokens):
|
||||
return sympy.Lt(tokens[0], tokens[2])
|
||||
|
||||
def lte(self, tokens):
|
||||
return sympy.Le(tokens[0], tokens[2])
|
||||
|
||||
def gt(self, tokens):
|
||||
return sympy.Gt(tokens[0], tokens[2])
|
||||
|
||||
def gte(self, tokens):
|
||||
return sympy.Ge(tokens[0], tokens[2])
|
||||
|
||||
def add(self, tokens):
|
||||
if len(tokens) == 2: # +a
|
||||
return tokens[1]
|
||||
if len(tokens) == 3: # a + b
|
||||
lh = tokens[0]
|
||||
rh = tokens[2]
|
||||
|
||||
if self._obj_is_sympy_Matrix(lh) or self._obj_is_sympy_Matrix(rh):
|
||||
return sympy.MatAdd(lh, rh)
|
||||
|
||||
return sympy.Add(lh, rh)
|
||||
|
||||
def sub(self, tokens):
|
||||
if len(tokens) == 2: # -a
|
||||
x = tokens[1]
|
||||
|
||||
if self._obj_is_sympy_Matrix(x):
|
||||
return sympy.MatMul(-1, x)
|
||||
|
||||
return -x
|
||||
if len(tokens) == 3: # a - b
|
||||
lh = tokens[0]
|
||||
rh = tokens[2]
|
||||
|
||||
if self._obj_is_sympy_Matrix(lh) or self._obj_is_sympy_Matrix(rh):
|
||||
return sympy.MatAdd(lh, sympy.MatMul(-1, rh))
|
||||
|
||||
return sympy.Add(lh, -rh)
|
||||
|
||||
def mul(self, tokens):
|
||||
lh = tokens[0]
|
||||
rh = tokens[2]
|
||||
|
||||
if self._obj_is_sympy_Matrix(lh) or self._obj_is_sympy_Matrix(rh):
|
||||
return sympy.MatMul(lh, rh)
|
||||
|
||||
return sympy.Mul(lh, rh)
|
||||
|
||||
def div(self, tokens):
|
||||
return self._handle_division(tokens[0], tokens[2])
|
||||
|
||||
def adjacent_expressions(self, tokens):
|
||||
# Most of the time, if two expressions are next to each other, it means implicit multiplication,
|
||||
# but not always
|
||||
from sympy.physics.quantum import Bra, Ket
|
||||
if isinstance(tokens[0], Ket) and isinstance(tokens[1], Bra):
|
||||
from sympy.physics.quantum import OuterProduct
|
||||
return OuterProduct(tokens[0], tokens[1])
|
||||
elif tokens[0] == sympy.Symbol("d"):
|
||||
# If the leftmost token is a "d", then it is highly likely that this is a differential
|
||||
return tokens[0], tokens[1]
|
||||
elif isinstance(tokens[0], tuple):
|
||||
# then we have a derivative
|
||||
return sympy.Derivative(tokens[1], tokens[0][1])
|
||||
else:
|
||||
return sympy.Mul(tokens[0], tokens[1])
|
||||
|
||||
def superscript(self, tokens):
|
||||
def isprime(x):
|
||||
return isinstance(x, Token) and x.type == "PRIMES"
|
||||
|
||||
def iscmdprime(x):
|
||||
return isinstance(x, Token) and (x.type == "PRIMES_VIA_CMD"
|
||||
or x.type == "CMD_PRIME")
|
||||
|
||||
def isstar(x):
|
||||
return isinstance(x, Token) and x.type == "STARS"
|
||||
|
||||
def iscmdstar(x):
|
||||
return isinstance(x, Token) and (x.type == "STARS_VIA_CMD"
|
||||
or x.type == "CMD_ASTERISK")
|
||||
|
||||
base = tokens[0]
|
||||
if len(tokens) == 3: # a^b OR a^\prime OR a^\ast
|
||||
sup = tokens[2]
|
||||
if len(tokens) == 5:
|
||||
# a^{'}, a^{''}, ... OR
|
||||
# a^{*}, a^{**}, ... OR
|
||||
# a^{\prime}, a^{\prime\prime}, ... OR
|
||||
# a^{\ast}, a^{\ast\ast}, ...
|
||||
sup = tokens[3]
|
||||
|
||||
if self._obj_is_sympy_Matrix(base):
|
||||
if sup == sympy.Symbol("T"):
|
||||
return sympy.Transpose(base)
|
||||
if sup == sympy.Symbol("H"):
|
||||
return sympy.adjoint(base)
|
||||
if isprime(sup):
|
||||
sup = sup.value
|
||||
if len(sup) % 2 == 0:
|
||||
return base
|
||||
return sympy.Transpose(base)
|
||||
if iscmdprime(sup):
|
||||
sup = sup.value
|
||||
if (len(sup)/len(r"\prime")) % 2 == 0:
|
||||
return base
|
||||
return sympy.Transpose(base)
|
||||
if isstar(sup):
|
||||
sup = sup.value
|
||||
# need .doit() in order to be consistent with
|
||||
# sympy.adjoint() which returns the evaluated adjoint
|
||||
# of a matrix
|
||||
if len(sup) % 2 == 0:
|
||||
return base.doit()
|
||||
return sympy.adjoint(base)
|
||||
if iscmdstar(sup):
|
||||
sup = sup.value
|
||||
# need .doit() for same reason as above
|
||||
if (len(sup)/len(r"\ast")) % 2 == 0:
|
||||
return base.doit()
|
||||
return sympy.adjoint(base)
|
||||
|
||||
if isprime(sup) or iscmdprime(sup) or isstar(sup) or iscmdstar(sup):
|
||||
raise LaTeXParsingError(f"{base} with superscript {sup} is not understood.")
|
||||
|
||||
return sympy.Pow(base, sup)
|
||||
|
||||
def matrix_prime(self, tokens):
|
||||
base = tokens[0]
|
||||
primes = tokens[1].value
|
||||
|
||||
if not self._obj_is_sympy_Matrix(base):
|
||||
raise LaTeXParsingError(f"({base}){primes} is not understood.")
|
||||
|
||||
if len(primes) % 2 == 0:
|
||||
return base
|
||||
|
||||
return sympy.Transpose(base)
|
||||
|
||||
def symbol_prime(self, tokens):
|
||||
base = tokens[0]
|
||||
primes = tokens[1].value
|
||||
|
||||
return sympy.Symbol(f"{base.name}{primes}")
|
||||
|
||||
def fraction(self, tokens):
|
||||
numerator = tokens[1]
|
||||
if isinstance(tokens[2], tuple):
|
||||
# we only need the variable w.r.t. which we are differentiating
|
||||
_, variable = tokens[2]
|
||||
|
||||
# we will pass this information upwards
|
||||
return "derivative", variable
|
||||
else:
|
||||
denominator = tokens[2]
|
||||
return self._handle_division(numerator, denominator)
|
||||
|
||||
def binomial(self, tokens):
|
||||
return sympy.binomial(tokens[1], tokens[2])
|
||||
|
||||
def normal_integral(self, tokens):
|
||||
underscore_index = None
|
||||
caret_index = None
|
||||
|
||||
if "_" in tokens:
|
||||
# we need to know the index because the next item in the list is the
|
||||
# arguments for the lower bound of the integral
|
||||
underscore_index = tokens.index("_")
|
||||
|
||||
if "^" in tokens:
|
||||
# we need to know the index because the next item in the list is the
|
||||
# arguments for the upper bound of the integral
|
||||
caret_index = tokens.index("^")
|
||||
|
||||
lower_bound = tokens[underscore_index + 1] if underscore_index else None
|
||||
upper_bound = tokens[caret_index + 1] if caret_index else None
|
||||
|
||||
differential_symbol = self._extract_differential_symbol(tokens)
|
||||
|
||||
if differential_symbol is None:
|
||||
raise LaTeXParsingError("Differential symbol was not found in the expression."
|
||||
"Valid differential symbols are \"d\", \"\\text{d}, and \"\\mathrm{d}\".")
|
||||
|
||||
# else we can assume that a differential symbol was found
|
||||
differential_variable_index = tokens.index(differential_symbol) + 1
|
||||
differential_variable = tokens[differential_variable_index]
|
||||
|
||||
# we can't simply do something like `if (lower_bound and not upper_bound) ...` because this would
|
||||
# evaluate to `True` if the `lower_bound` is 0 and upper bound is non-zero
|
||||
if lower_bound is not None and upper_bound is None:
|
||||
# then one was given and the other wasn't
|
||||
raise LaTeXParsingError("Lower bound for the integral was found, but upper bound was not found.")
|
||||
|
||||
if upper_bound is not None and lower_bound is None:
|
||||
# then one was given and the other wasn't
|
||||
raise LaTeXParsingError("Upper bound for the integral was found, but lower bound was not found.")
|
||||
|
||||
# check if any expression was given or not. If it wasn't, then set the integrand to 1.
|
||||
if underscore_index is not None and underscore_index == differential_variable_index - 3:
|
||||
# The Token at differential_variable_index - 2 should be the integrand. However, if going one more step
|
||||
# backwards after that gives us the underscore, then that means that there _was_ no integrand.
|
||||
# Example: \int^7_0 dx
|
||||
integrand = 1
|
||||
elif caret_index is not None and caret_index == differential_variable_index - 3:
|
||||
# The Token at differential_variable_index - 2 should be the integrand. However, if going one more step
|
||||
# backwards after that gives us the caret, then that means that there _was_ no integrand.
|
||||
# Example: \int_0^7 dx
|
||||
integrand = 1
|
||||
elif differential_variable_index == 2:
|
||||
# this means we have something like "\int dx", because the "\int" symbol will always be
|
||||
# at index 0 in `tokens`
|
||||
integrand = 1
|
||||
else:
|
||||
# The Token at differential_variable_index - 1 is the differential symbol itself, so we need to go one
|
||||
# more step before that.
|
||||
integrand = tokens[differential_variable_index - 2]
|
||||
|
||||
if lower_bound is not None:
|
||||
# then we have a definite integral
|
||||
|
||||
# we can assume that either both the lower and upper bounds are given, or
|
||||
# neither of them are
|
||||
return sympy.Integral(integrand, (differential_variable, lower_bound, upper_bound))
|
||||
else:
|
||||
# we have an indefinite integral
|
||||
return sympy.Integral(integrand, differential_variable)
|
||||
|
||||
def group_curly_parentheses_int(self, tokens):
|
||||
# return signature is a tuple consisting of the expression in the numerator, along with the variable of
|
||||
# integration
|
||||
if len(tokens) == 3:
|
||||
return 1, tokens[1]
|
||||
elif len(tokens) == 4:
|
||||
return tokens[1], tokens[2]
|
||||
# there are no other possibilities
|
||||
|
||||
def special_fraction(self, tokens):
|
||||
numerator, variable = tokens[1]
|
||||
denominator = tokens[2]
|
||||
|
||||
# We pass the integrand, along with information about the variable of integration, upw
|
||||
return sympy.Mul(numerator, sympy.Pow(denominator, -1)), variable
|
||||
|
||||
def integral_with_special_fraction(self, tokens):
|
||||
underscore_index = None
|
||||
caret_index = None
|
||||
|
||||
if "_" in tokens:
|
||||
# we need to know the index because the next item in the list is the
|
||||
# arguments for the lower bound of the integral
|
||||
underscore_index = tokens.index("_")
|
||||
|
||||
if "^" in tokens:
|
||||
# we need to know the index because the next item in the list is the
|
||||
# arguments for the upper bound of the integral
|
||||
caret_index = tokens.index("^")
|
||||
|
||||
lower_bound = tokens[underscore_index + 1] if underscore_index else None
|
||||
upper_bound = tokens[caret_index + 1] if caret_index else None
|
||||
|
||||
# we can't simply do something like `if (lower_bound and not upper_bound) ...` because this would
|
||||
# evaluate to `True` if the `lower_bound` is 0 and upper bound is non-zero
|
||||
if lower_bound is not None and upper_bound is None:
|
||||
# then one was given and the other wasn't
|
||||
raise LaTeXParsingError("Lower bound for the integral was found, but upper bound was not found.")
|
||||
|
||||
if upper_bound is not None and lower_bound is None:
|
||||
# then one was given and the other wasn't
|
||||
raise LaTeXParsingError("Upper bound for the integral was found, but lower bound was not found.")
|
||||
|
||||
integrand, differential_variable = tokens[-1]
|
||||
|
||||
if lower_bound is not None:
|
||||
# then we have a definite integral
|
||||
|
||||
# we can assume that either both the lower and upper bounds are given, or
|
||||
# neither of them are
|
||||
return sympy.Integral(integrand, (differential_variable, lower_bound, upper_bound))
|
||||
else:
|
||||
# we have an indefinite integral
|
||||
return sympy.Integral(integrand, differential_variable)
|
||||
|
||||
def group_curly_parentheses_special(self, tokens):
|
||||
underscore_index = tokens.index("_")
|
||||
caret_index = tokens.index("^")
|
||||
|
||||
# given the type of expressions we are parsing, we can assume that the lower limit
|
||||
# will always use braces around its arguments. This is because we don't support
|
||||
# converting unconstrained sums into SymPy expressions.
|
||||
|
||||
# first we isolate the bottom limit
|
||||
left_brace_index = tokens.index("{", underscore_index)
|
||||
right_brace_index = tokens.index("}", underscore_index)
|
||||
|
||||
bottom_limit = tokens[left_brace_index + 1: right_brace_index]
|
||||
|
||||
# next, we isolate the upper limit
|
||||
top_limit = tokens[caret_index + 1:]
|
||||
|
||||
# the code below will be useful for supporting things like `\sum_{n = 0}^{n = 5} n^2`
|
||||
# if "{" in top_limit:
|
||||
# left_brace_index = tokens.index("{", caret_index)
|
||||
# if left_brace_index != -1:
|
||||
# # then there's a left brace in the string, and we need to find the closing right brace
|
||||
# right_brace_index = tokens.index("}", caret_index)
|
||||
# top_limit = tokens[left_brace_index + 1: right_brace_index]
|
||||
|
||||
# print(f"top limit = {top_limit}")
|
||||
|
||||
index_variable = bottom_limit[0]
|
||||
lower_limit = bottom_limit[-1]
|
||||
upper_limit = top_limit[0] # for now, the index will always be 0
|
||||
|
||||
# print(f"return value = ({index_variable}, {lower_limit}, {upper_limit})")
|
||||
|
||||
return index_variable, lower_limit, upper_limit
|
||||
|
||||
def summation(self, tokens):
|
||||
return sympy.Sum(tokens[2], tokens[1])
|
||||
|
||||
def product(self, tokens):
|
||||
return sympy.Product(tokens[2], tokens[1])
|
||||
|
||||
def limit_dir_expr(self, tokens):
|
||||
caret_index = tokens.index("^")
|
||||
|
||||
if "{" in tokens:
|
||||
left_curly_brace_index = tokens.index("{", caret_index)
|
||||
direction = tokens[left_curly_brace_index + 1]
|
||||
else:
|
||||
direction = tokens[caret_index + 1]
|
||||
|
||||
if direction == "+":
|
||||
return tokens[0], "+"
|
||||
elif direction == "-":
|
||||
return tokens[0], "-"
|
||||
else:
|
||||
return tokens[0], "+-"
|
||||
|
||||
def group_curly_parentheses_lim(self, tokens):
|
||||
limit_variable = tokens[1]
|
||||
if isinstance(tokens[3], tuple):
|
||||
destination, direction = tokens[3]
|
||||
else:
|
||||
destination = tokens[3]
|
||||
direction = "+-"
|
||||
|
||||
return limit_variable, destination, direction
|
||||
|
||||
def limit(self, tokens):
|
||||
limit_variable, destination, direction = tokens[2]
|
||||
|
||||
return sympy.Limit(tokens[-1], limit_variable, destination, direction)
|
||||
|
||||
def differential(self, tokens):
|
||||
return tokens[1]
|
||||
|
||||
def derivative(self, tokens):
|
||||
return sympy.Derivative(tokens[-1], tokens[5])
|
||||
|
||||
def list_of_expressions(self, tokens):
|
||||
if len(tokens) == 1:
|
||||
# we return it verbatim because the function_applied node expects
|
||||
# a list
|
||||
return tokens
|
||||
else:
|
||||
def remove_tokens(args):
|
||||
if isinstance(args, Token):
|
||||
if args.type != "COMMA":
|
||||
# An unexpected token was encountered
|
||||
raise LaTeXParsingError("A comma token was expected, but some other token was encountered.")
|
||||
return False
|
||||
return True
|
||||
|
||||
return filter(remove_tokens, tokens)
|
||||
|
||||
def function_applied(self, tokens):
|
||||
return sympy.Function(tokens[0])(*tokens[2])
|
||||
|
||||
def min(self, tokens):
|
||||
return sympy.Min(*tokens[2])
|
||||
|
||||
def max(self, tokens):
|
||||
return sympy.Max(*tokens[2])
|
||||
|
||||
def bra(self, tokens):
|
||||
from sympy.physics.quantum import Bra
|
||||
return Bra(tokens[1])
|
||||
|
||||
def ket(self, tokens):
|
||||
from sympy.physics.quantum import Ket
|
||||
return Ket(tokens[1])
|
||||
|
||||
def inner_product(self, tokens):
|
||||
from sympy.physics.quantum import Bra, Ket, InnerProduct
|
||||
return InnerProduct(Bra(tokens[1]), Ket(tokens[3]))
|
||||
|
||||
def sin(self, tokens):
|
||||
return sympy.sin(tokens[1])
|
||||
|
||||
def cos(self, tokens):
|
||||
return sympy.cos(tokens[1])
|
||||
|
||||
def tan(self, tokens):
|
||||
return sympy.tan(tokens[1])
|
||||
|
||||
def csc(self, tokens):
|
||||
return sympy.csc(tokens[1])
|
||||
|
||||
def sec(self, tokens):
|
||||
return sympy.sec(tokens[1])
|
||||
|
||||
def cot(self, tokens):
|
||||
return sympy.cot(tokens[1])
|
||||
|
||||
def sin_power(self, tokens):
|
||||
exponent = tokens[2]
|
||||
if exponent == -1:
|
||||
return sympy.asin(tokens[-1])
|
||||
else:
|
||||
return sympy.Pow(sympy.sin(tokens[-1]), exponent)
|
||||
|
||||
def cos_power(self, tokens):
|
||||
exponent = tokens[2]
|
||||
if exponent == -1:
|
||||
return sympy.acos(tokens[-1])
|
||||
else:
|
||||
return sympy.Pow(sympy.cos(tokens[-1]), exponent)
|
||||
|
||||
def tan_power(self, tokens):
|
||||
exponent = tokens[2]
|
||||
if exponent == -1:
|
||||
return sympy.atan(tokens[-1])
|
||||
else:
|
||||
return sympy.Pow(sympy.tan(tokens[-1]), exponent)
|
||||
|
||||
def csc_power(self, tokens):
|
||||
exponent = tokens[2]
|
||||
if exponent == -1:
|
||||
return sympy.acsc(tokens[-1])
|
||||
else:
|
||||
return sympy.Pow(sympy.csc(tokens[-1]), exponent)
|
||||
|
||||
def sec_power(self, tokens):
|
||||
exponent = tokens[2]
|
||||
if exponent == -1:
|
||||
return sympy.asec(tokens[-1])
|
||||
else:
|
||||
return sympy.Pow(sympy.sec(tokens[-1]), exponent)
|
||||
|
||||
def cot_power(self, tokens):
|
||||
exponent = tokens[2]
|
||||
if exponent == -1:
|
||||
return sympy.acot(tokens[-1])
|
||||
else:
|
||||
return sympy.Pow(sympy.cot(tokens[-1]), exponent)
|
||||
|
||||
def arcsin(self, tokens):
|
||||
return sympy.asin(tokens[1])
|
||||
|
||||
def arccos(self, tokens):
|
||||
return sympy.acos(tokens[1])
|
||||
|
||||
def arctan(self, tokens):
|
||||
return sympy.atan(tokens[1])
|
||||
|
||||
def arccsc(self, tokens):
|
||||
return sympy.acsc(tokens[1])
|
||||
|
||||
def arcsec(self, tokens):
|
||||
return sympy.asec(tokens[1])
|
||||
|
||||
def arccot(self, tokens):
|
||||
return sympy.acot(tokens[1])
|
||||
|
||||
def sinh(self, tokens):
|
||||
return sympy.sinh(tokens[1])
|
||||
|
||||
def cosh(self, tokens):
|
||||
return sympy.cosh(tokens[1])
|
||||
|
||||
def tanh(self, tokens):
|
||||
return sympy.tanh(tokens[1])
|
||||
|
||||
def asinh(self, tokens):
|
||||
return sympy.asinh(tokens[1])
|
||||
|
||||
def acosh(self, tokens):
|
||||
return sympy.acosh(tokens[1])
|
||||
|
||||
def atanh(self, tokens):
|
||||
return sympy.atanh(tokens[1])
|
||||
|
||||
def abs(self, tokens):
|
||||
return sympy.Abs(tokens[1])
|
||||
|
||||
def floor(self, tokens):
|
||||
return sympy.floor(tokens[1])
|
||||
|
||||
def ceil(self, tokens):
|
||||
return sympy.ceiling(tokens[1])
|
||||
|
||||
def factorial(self, tokens):
|
||||
return sympy.factorial(tokens[0])
|
||||
|
||||
def conjugate(self, tokens):
|
||||
return sympy.conjugate(tokens[1])
|
||||
|
||||
def square_root(self, tokens):
|
||||
if len(tokens) == 2:
|
||||
# then there was no square bracket argument
|
||||
return sympy.sqrt(tokens[1])
|
||||
elif len(tokens) == 3:
|
||||
# then there _was_ a square bracket argument
|
||||
return sympy.root(tokens[2], tokens[1])
|
||||
|
||||
def exponential(self, tokens):
|
||||
return sympy.exp(tokens[1])
|
||||
|
||||
def log(self, tokens):
|
||||
if tokens[0].type == "FUNC_LG":
|
||||
# we don't need to check if there's an underscore or not because having one
|
||||
# in this case would be meaningless
|
||||
# TODO: ANTLR refers to ISO 80000-2:2019. should we keep base 10 or base 2?
|
||||
return sympy.log(tokens[1], 10)
|
||||
elif tokens[0].type == "FUNC_LN":
|
||||
return sympy.log(tokens[1])
|
||||
elif tokens[0].type == "FUNC_LOG":
|
||||
# we check if a base was specified or not
|
||||
if "_" in tokens:
|
||||
# then a base was specified
|
||||
return sympy.log(tokens[3], tokens[2])
|
||||
else:
|
||||
# a base was not specified
|
||||
return sympy.log(tokens[1])
|
||||
|
||||
def _extract_differential_symbol(self, s: str):
|
||||
differential_symbols = {"d", r"\text{d}", r"\mathrm{d}"}
|
||||
|
||||
differential_symbol = next((symbol for symbol in differential_symbols if symbol in s), None)
|
||||
|
||||
return differential_symbol
|
||||
|
||||
def matrix(self, tokens):
|
||||
def is_matrix_row(x):
|
||||
return (isinstance(x, Tree) and x.data == "matrix_row")
|
||||
|
||||
def is_not_col_delim(y):
|
||||
return (not isinstance(y, Token) or y.type != "MATRIX_COL_DELIM")
|
||||
|
||||
matrix_body = tokens[1].children
|
||||
return sympy.Matrix([[y for y in x.children if is_not_col_delim(y)]
|
||||
for x in matrix_body if is_matrix_row(x)])
|
||||
|
||||
def determinant(self, tokens):
|
||||
if len(tokens) == 2: # \det A
|
||||
if not self._obj_is_sympy_Matrix(tokens[1]):
|
||||
raise LaTeXParsingError("Cannot take determinant of non-matrix.")
|
||||
|
||||
return tokens[1].det()
|
||||
|
||||
if len(tokens) == 3: # | A |
|
||||
return self.matrix(tokens).det()
|
||||
|
||||
def trace(self, tokens):
|
||||
if not self._obj_is_sympy_Matrix(tokens[1]):
|
||||
raise LaTeXParsingError("Cannot take trace of non-matrix.")
|
||||
|
||||
return sympy.Trace(tokens[1])
|
||||
|
||||
def adjugate(self, tokens):
|
||||
if not self._obj_is_sympy_Matrix(tokens[1]):
|
||||
raise LaTeXParsingError("Cannot take adjugate of non-matrix.")
|
||||
|
||||
# need .doit() since MatAdd does not support .adjugate() method
|
||||
return tokens[1].doit().adjugate()
|
||||
|
||||
def _obj_is_sympy_Matrix(self, obj):
|
||||
if hasattr(obj, "is_Matrix"):
|
||||
return obj.is_Matrix
|
||||
|
||||
return isinstance(obj, sympy.Matrix)
|
||||
|
||||
def _handle_division(self, numerator, denominator):
|
||||
if self._obj_is_sympy_Matrix(denominator):
|
||||
raise LaTeXParsingError("Cannot divide by matrices like this since "
|
||||
"it is not clear if left or right multiplication "
|
||||
"by the inverse is intended. Try explicitly "
|
||||
"multiplying by the inverse instead.")
|
||||
|
||||
if self._obj_is_sympy_Matrix(numerator):
|
||||
return sympy.MatMul(numerator, sympy.Pow(denominator, -1))
|
||||
|
||||
return sympy.Mul(numerator, sympy.Pow(denominator, -1))
|
||||
Reference in New Issue
Block a user