Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : * -------------------------------------------------------------------------- *
3 : * Lepton *
4 : * -------------------------------------------------------------------------- *
5 : * This is part of the Lepton expression parser originating from *
6 : * Simbios, the NIH National Center for Physics-Based Simulation of *
7 : * Biological Structures at Stanford, funded under the NIH Roadmap for *
8 : * Medical Research, grant U54 GM072970. See https://simtk.org. *
9 : * *
10 : * Portions copyright (c) 2013-2016 Stanford University and the Authors. *
11 : * Authors: Peter Eastman *
12 : * Contributors: *
13 : * *
14 : * Permission is hereby granted, free of charge, to any person obtaining a *
15 : * copy of this software and associated documentation files (the "Software"), *
16 : * to deal in the Software without restriction, including without limitation *
17 : * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
18 : * and/or sell copies of the Software, and to permit persons to whom the *
19 : * Software is furnished to do so, subject to the following conditions: *
20 : * *
21 : * The above copyright notice and this permission notice shall be included in *
22 : * all copies or substantial portions of the Software. *
23 : * *
24 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
25 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
26 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
27 : * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
28 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
29 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
30 : * USE OR OTHER DEALINGS IN THE SOFTWARE. *
31 : * -------------------------------------------------------------------------- *
32 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
33 : /* -------------------------------------------------------------------------- *
34 : * lepton *
35 : * -------------------------------------------------------------------------- *
36 : * This is part of the lepton expression parser originating from *
37 : * Simbios, the NIH National Center for Physics-Based Simulation of *
38 : * Biological Structures at Stanford, funded under the NIH Roadmap for *
39 : * Medical Research, grant U54 GM072970. See https://simtk.org. *
40 : * *
41 : * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
42 : * Authors: Peter Eastman *
43 : * Contributors: *
44 : * *
45 : * Permission is hereby granted, free of charge, to any person obtaining a *
46 : * copy of this software and associated documentation files (the "Software"), *
47 : * to deal in the Software without restriction, including without limitation *
48 : * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
49 : * and/or sell copies of the Software, and to permit persons to whom the *
50 : * Software is furnished to do so, subject to the following conditions: *
51 : * *
52 : * The above copyright notice and this permission notice shall be included in *
53 : * all copies or substantial portions of the Software. *
54 : * *
55 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
56 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
57 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
58 : * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
59 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
60 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
61 : * USE OR OTHER DEALINGS IN THE SOFTWARE. *
62 : * -------------------------------------------------------------------------- */
63 :
64 : #include "Parser.h"
65 : #include "CustomFunction.h"
66 : #include "Exception.h"
67 : #include "ExpressionTreeNode.h"
68 : #include "Operation.h"
69 : #include "ParsedExpression.h"
70 : #include <cctype>
71 : #include <iostream>
72 :
73 : namespace PLMD {
74 : using namespace lepton;
75 : using namespace std;
76 :
77 : namespace lepton {
78 :
79 1839 : static const string Digits = "0123456789";
80 1839 : static const string Operators = "+-*/^";
81 : static const bool LeftAssociative[] = {true, true, true, true, false};
82 : static const int Precedence[] = {0, 0, 1, 1, 3};
83 : static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
84 :
85 802 : const std::map<std::string, double> & Constants() {
86 : static const std::map<std::string, double> constants = {
87 : {"e", std::exp(1.0)},
88 : {"log2e", 1.0/std::log(2.0)},
89 : {"log10e", 1.0/std::log(10.0)},
90 : {"ln2", std::log(2.0)},
91 : {"ln10", std::log(10.0)},
92 : {"pi", 3.14159265358979323844},
93 : {"pi_2", 3.14159265358979323844*0.5},
94 : {"pi_4", 3.14159265358979323844*0.25},
95 : // {"1_pi", 1.0/pi},
96 : // {"2_pi", 2.0/pi},
97 : // {"2_sqrtpi", 2.0/std::sqrt(pi)},
98 : {"sqrt2", std::sqrt(2.0)},
99 : {"sqrt1_2", std::sqrt(0.5)}
100 882 : };
101 802 : return constants;
102 : }
103 :
104 : }
105 :
106 :
107 125313 : class lepton::ParseToken {
108 : public:
109 : enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
110 :
111 29010 : ParseToken(string text, Type type) : text(text), type(type) {
112 : }
113 : const string& getText() const {
114 14 : return text;
115 : }
116 : Type getType() const {
117 36356 : return type;
118 : }
119 : private:
120 : string text;
121 : Type type;
122 : };
123 :
124 4 : string Parser::trim(const string& expression) {
125 : // Remove leading and trailing spaces.
126 :
127 : int start, end;
128 12 : for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
129 : ;
130 8 : for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
131 : ;
132 4 : if (start == end && isspace(expression[end]))
133 0 : return "";
134 4 : return expression.substr(start, end-start+1);
135 : }
136 :
137 14505 : ParseToken Parser::getNextToken(const string& expression, int start) {
138 29010 : char c = expression[start];
139 14505 : if (c == '(')
140 1242 : return ParseToken("(", ParseToken::LeftParen);
141 13884 : if (c == ')')
142 4290 : return ParseToken(")", ParseToken::RightParen);
143 11739 : if (c == ',')
144 16 : return ParseToken(",", ParseToken::Comma);
145 11731 : if (Operators.find(c) != string::npos)
146 4688 : return ParseToken(string(1, c), ParseToken::Operator);
147 7043 : if (isspace(c)) {
148 : // White space
149 :
150 56 : for (int pos = start+1; pos < (int) expression.size(); pos++) {
151 42 : if (!isspace(expression[pos]))
152 42 : return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
153 : }
154 14 : return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
155 : }
156 7015 : if (c == '.' || Digits.find(c) != string::npos) {
157 : // A number
158 :
159 2585 : bool foundDecimal = (c == '.');
160 : bool foundExp = false;
161 : int pos;
162 18498 : for (pos = start+1; pos < (int) expression.size(); pos++) {
163 18046 : c = expression[pos];
164 9023 : if (Digits.find(c) != string::npos)
165 4538 : continue;
166 4485 : if (c == '.' && !foundDecimal) {
167 : foundDecimal = true;
168 2126 : continue;
169 : }
170 2359 : if ((c == 'e' || c == 'E') && !foundExp) {
171 : foundExp = true;
172 0 : if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
173 : pos++;
174 0 : continue;
175 : }
176 : break;
177 : }
178 5170 : return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
179 : }
180 :
181 : // A variable, function, or left parenthesis
182 :
183 21404 : for (int pos = start; pos < (int) expression.size(); pos++) {
184 25562 : c = expression[pos];
185 12781 : if (c == '(')
186 3048 : return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
187 11257 : if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
188 5540 : return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
189 : }
190 272 : return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
191 : }
192 :
193 822 : vector<ParseToken> Parser::tokenize(const string& expression) {
194 : vector<ParseToken> tokens;
195 : int pos = 0;
196 29832 : while (pos < (int) expression.size()) {
197 14505 : ParseToken token = getNextToken(expression, pos);
198 14505 : if (token.getType() != ParseToken::Whitespace)
199 14477 : tokens.push_back(token);
200 14505 : pos += (int) token.getText().size();
201 : }
202 822 : return tokens;
203 : }
204 :
205 820 : ParsedExpression Parser::parse(const string& expression) {
206 1622 : return parse(expression, map<string, CustomFunction*>());
207 : }
208 :
209 820 : ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
210 : try {
211 : // First split the expression into subexpressions.
212 :
213 : string primaryExpression = expression;
214 820 : vector<string> subexpressions;
215 : while (true) {
216 : string::size_type pos = primaryExpression.find_last_of(';');
217 822 : if (pos == string::npos)
218 : break;
219 22 : string sub = trim(primaryExpression.substr(pos+1));
220 2 : if (sub.size() > 0)
221 2 : subexpressions.push_back(sub);
222 4 : primaryExpression = primaryExpression.substr(0, pos);
223 2 : }
224 :
225 : // Parse the subexpressions.
226 :
227 : map<string, ExpressionTreeNode> subexpDefs;
228 824 : for (int i = 0; i < (int) subexpressions.size(); i++) {
229 4 : string::size_type equalsPos = subexpressions[i].find('=');
230 2 : if (equalsPos == string::npos)
231 0 : throw Exception("subexpression does not specify a name");
232 22 : string name = trim(subexpressions[i].substr(0, equalsPos));
233 2 : if (name.size() == 0)
234 0 : throw Exception("subexpression does not specify a name");
235 6 : vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
236 2 : int pos = 0;
237 2 : subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
238 4 : if (pos != tokens.size())
239 0 : throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
240 : }
241 :
242 : // Now parse the primary expression.
243 :
244 1640 : vector<ParseToken> tokens = tokenize(primaryExpression);
245 820 : int pos = 0;
246 1636 : ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
247 1632 : if (pos != tokens.size())
248 42 : throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
249 1604 : return ParsedExpression(result);
250 : }
251 36 : catch (Exception& ex) {
252 72 : throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
253 : }
254 : }
255 :
256 7663 : ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
257 : const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
258 15326 : if (pos == tokens.size())
259 8 : throw Exception("unexpected end of expression");
260 :
261 : // Parse the next value (number, variable, function, parenthesized expression)
262 :
263 : ParseToken token = tokens[pos];
264 7659 : ExpressionTreeNode result;
265 7659 : if (token.getType() == ParseToken::Number) {
266 : double value;
267 5170 : stringstream(token.getText()) >> value;
268 5170 : result = ExpressionTreeNode(new Operation::Constant(value));
269 2585 : pos++;
270 : }
271 5074 : else if (token.getType() == ParseToken::Variable) {
272 : map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
273 2892 : if (subexp == subexpressionDefs.end()) {
274 2890 : Operation* op = new Operation::Variable(token.getText());
275 2890 : result = ExpressionTreeNode(op);
276 : }
277 : else
278 2 : result = subexp->second;
279 2892 : pos++;
280 : }
281 2182 : else if (token.getType() == ParseToken::LeftParen) {
282 621 : pos++;
283 621 : result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
284 1863 : if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
285 0 : throw Exception("unbalanced parentheses");
286 621 : pos++;
287 : }
288 1561 : else if (token.getType() == ParseToken::Function) {
289 1524 : pos++;
290 1524 : vector<ExpressionTreeNode> args;
291 : bool moreArgs;
292 1532 : do {
293 3064 : args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
294 4596 : moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
295 : if (moreArgs)
296 8 : pos++;
297 : } while (moreArgs);
298 4572 : if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
299 0 : throw Exception("unbalanced parentheses");
300 1524 : pos++;
301 1524 : Operation* op = getFunctionOperation(token.getText(), customFunctions);
302 : try {
303 1524 : result = ExpressionTreeNode(op, args);
304 : }
305 0 : catch (...) {
306 0 : delete op;
307 0 : throw;
308 : }
309 : }
310 74 : else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
311 37 : pos++;
312 74 : ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
313 74 : result = ExpressionTreeNode(new Operation::Negate(), toNegate);
314 : }
315 : else
316 0 : throw Exception("unexpected token: "+token.getText());
317 :
318 : // Now deal with the next binary operator.
319 :
320 39786 : while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
321 : token = tokens[pos];
322 6831 : int opIndex = (int) Operators.find(token.getText());
323 6831 : int opPrecedence = Precedence[opIndex];
324 6831 : if (opPrecedence < precedence)
325 2180 : return result;
326 4651 : pos++;
327 9302 : ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
328 4651 : Operation* op = getOperatorOperation(token.getText());
329 : try {
330 4651 : result = ExpressionTreeNode(op, result, arg);
331 : }
332 0 : catch (...) {
333 0 : delete op;
334 0 : throw;
335 : }
336 : }
337 : return result;
338 : }
339 :
340 4651 : Operation* Parser::getOperatorOperation(const std::string& name) {
341 4651 : switch (OperationId[Operators.find(name)]) {
342 974 : case Operation::ADD:
343 1948 : return new Operation::Add();
344 831 : case Operation::SUBTRACT:
345 1662 : return new Operation::Subtract();
346 2249 : case Operation::MULTIPLY:
347 4498 : return new Operation::Multiply();
348 415 : case Operation::DIVIDE:
349 830 : return new Operation::Divide();
350 182 : case Operation::POWER:
351 364 : return new Operation::Power();
352 0 : default:
353 0 : throw Exception("unknown operator");
354 : }
355 : }
356 :
357 1524 : Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
358 :
359 : const static map<string, Operation::Id> opMap ={
360 : { "sqrt" , Operation::SQRT },
361 : { "exp" , Operation::EXP },
362 : { "log" , Operation::LOG },
363 : { "sin" , Operation::SIN },
364 : { "cos" , Operation::COS },
365 : { "sec" , Operation::SEC },
366 : { "csc" , Operation::CSC },
367 : { "tan" , Operation::TAN },
368 : { "cot" , Operation::COT },
369 : { "asin" , Operation::ASIN },
370 : { "acos" , Operation::ACOS },
371 : { "atan" , Operation::ATAN },
372 : { "sinh" , Operation::SINH },
373 : { "cosh" , Operation::COSH },
374 : { "tanh" , Operation::TANH },
375 : { "erf" , Operation::ERF },
376 : { "erfc" , Operation::ERFC },
377 : { "step" , Operation::STEP },
378 : { "delta" , Operation::DELTA },
379 : { "nandelta" , Operation::NANDELTA },
380 : { "square" , Operation::SQUARE },
381 : { "cube", Operation::CUBE },
382 : { "recip" , Operation::RECIPROCAL },
383 : { "min" , Operation::MIN },
384 : { "max" , Operation::MAX },
385 : { "abs" , Operation::ABS },
386 : { "floor" , Operation::FLOOR },
387 : { "ceil" , Operation::CEIL },
388 : { "select" , Operation::SELECT },
389 : { "acot" , Operation::ACOT },
390 : { "asec" , Operation::ASEC },
391 : { "acsc" , Operation::ACSC },
392 : { "coth" , Operation::COTH },
393 : { "sech" , Operation::SECH },
394 : { "csch" , Operation::CSCH },
395 : { "asinh" , Operation::ASINH },
396 : { "acosh" , Operation::ACOSH },
397 : { "atanh" , Operation::ATANH },
398 : { "acoth" , Operation::ACOTH },
399 : { "asech" , Operation::ASECH },
400 : { "acsch" , Operation::ACSCH },
401 : { "atan2" , Operation::ATAN2 },
402 1590 : };
403 1524 : string trimmed = name.substr(0, name.size()-1);
404 :
405 : // First check custom functions.
406 :
407 : map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
408 1524 : if (custom != customFunctions.end())
409 0 : return new Operation::Custom(trimmed, custom->second->clone());
410 :
411 : // Now try standard functions.
412 :
413 : map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
414 1524 : if (iter == opMap.end())
415 0 : throw Exception("unknown function: "+trimmed);
416 1524 : switch (iter->second) {
417 52 : case Operation::SQRT:
418 104 : return new Operation::Sqrt();
419 24 : case Operation::EXP:
420 48 : return new Operation::Exp();
421 2 : case Operation::LOG:
422 4 : return new Operation::Log();
423 110 : case Operation::SIN:
424 220 : return new Operation::Sin();
425 1261 : case Operation::COS:
426 2522 : return new Operation::Cos();
427 2 : case Operation::SEC:
428 4 : return new Operation::Sec();
429 2 : case Operation::CSC:
430 4 : return new Operation::Csc();
431 2 : case Operation::TAN:
432 4 : return new Operation::Tan();
433 2 : case Operation::COT:
434 4 : return new Operation::Cot();
435 2 : case Operation::ASIN:
436 4 : return new Operation::Asin();
437 2 : case Operation::ACOS:
438 4 : return new Operation::Acos();
439 2 : case Operation::ATAN:
440 4 : return new Operation::Atan();
441 2 : case Operation::SINH:
442 4 : return new Operation::Sinh();
443 2 : case Operation::COSH:
444 4 : return new Operation::Cosh();
445 2 : case Operation::TANH:
446 4 : return new Operation::Tanh();
447 2 : case Operation::ERF:
448 4 : return new Operation::Erf();
449 0 : case Operation::ERFC:
450 0 : return new Operation::Erfc();
451 6 : case Operation::STEP:
452 12 : return new Operation::Step();
453 2 : case Operation::DELTA:
454 4 : return new Operation::Delta();
455 2 : case Operation::NANDELTA:
456 4 : return new Operation::Nandelta();
457 0 : case Operation::SQUARE:
458 0 : return new Operation::Square();
459 0 : case Operation::CUBE:
460 0 : return new Operation::Cube();
461 0 : case Operation::RECIPROCAL:
462 0 : return new Operation::Reciprocal();
463 0 : case Operation::MIN:
464 0 : return new Operation::Min();
465 0 : case Operation::MAX:
466 0 : return new Operation::Max();
467 11 : case Operation::ABS:
468 22 : return new Operation::Abs();
469 0 : case Operation::FLOOR:
470 0 : return new Operation::Floor();
471 0 : case Operation::CEIL:
472 0 : return new Operation::Ceil();
473 0 : case Operation::SELECT:
474 0 : return new Operation::Select();
475 2 : case Operation::ACOT:
476 4 : return new Operation::Acot();
477 2 : case Operation::ASEC:
478 4 : return new Operation::Asec();
479 2 : case Operation::ACSC:
480 4 : return new Operation::Acsc();
481 2 : case Operation::COTH:
482 4 : return new Operation::Coth();
483 2 : case Operation::SECH:
484 4 : return new Operation::Sech();
485 2 : case Operation::CSCH:
486 4 : return new Operation::Csch();
487 2 : case Operation::ASINH:
488 4 : return new Operation::Asinh();
489 2 : case Operation::ACOSH:
490 4 : return new Operation::Acosh();
491 2 : case Operation::ATANH:
492 4 : return new Operation::Atanh();
493 2 : case Operation::ACOTH:
494 4 : return new Operation::Acoth();
495 2 : case Operation::ASECH:
496 4 : return new Operation::Asech();
497 2 : case Operation::ACSCH:
498 4 : return new Operation::Acsch();
499 8 : case Operation::ATAN2:
500 16 : return new Operation::Atan2();
501 0 : default:
502 0 : throw Exception("unknown function");
503 : }
504 : }
505 5517 : }
|