A Reverse Descent Parser

Parsing the formula into tokens:

The parsing is done with a Finite State Machine. It takes the formula string and extracts the individual tokens (e.g. '+') onto a list.

The states are all created as static pointers.

This is the struct for all the nodes on the Token list and the RPN list.
Each node can be just a type, or have ether a double precision float, 64bit integer, or a string array associated with it.

typedef struct {
	struct Node node;		// a node for the double linked list.
	int type;
	union {
		double number;
		long long integer;
		char *str;
	};
} t_node;

The virtual base class

class StateStart : public StateBase {
	protected:
		string name = "Start";
	public:
		void enter (Tokenize *fsm) override {};
		void work (Tokenize *fsm, const char *str) override;
		void exit (Tokenize *fsm) override {};
};

This is the class definition for the FSM. On a state change it calls the exit() function on the previous state, then changes the state and calls the enter() on the new class.

class Tokenize {
	private:
		string name = "Tokenize";		// for debug
		void reset (void) { flush_list(); formula = ""; used = 0; } 
		void flush_list (void) {
			t_node *node;
			while (node = (t_node *)list->rem_head()) {
				free (node);
			}
		}
	protected:
		StateBase *state;
	public:
		List *list;
		string formula;
		int used = 0;
		Tokenize(List *list) { 
			create_all_states(); 
			this->list = list; 
			this->state = state_start; 
		}
		~Tokenize() { }
		void set_formula (string str) { reset(); formula = str; }
		void set_state (StateBase *n) { 
			if (state) state->exit(this);
			state = n;
			if (state) state->enter(this);
		}
		bool do_work (void) { 
			while (formula.length() > 0) {
				state->work(this, &formula.c_str()[used]);
			} 
			return formula.length() == 0;
		};
};

For each state it is subclassed from the StateBase virtual class. For most states the work and the exit state functions are overrided.

For this implementation there are

state_start: initial state
state_number: all numbers are assumed integer
state_plus_minus: handles + and -
state_mult: handles *
state_divide: handles /
state_float: number with a '.' in them
state_open: open bracket
state_close: close bracket

Convert the token list into a RPN list:

This takes the nodes on the token list, and re-orders those tokens onto the RPN list so they can be evaluated easier.
Using a recursive function chain to work out what to do.

The Reverse Descent Parser class.

class Parser {
	private:
	t_node *level_add_plus (t_node *);
	t_node *level_mult_div (t_node *);
	t_node *level_bracket (t_node *);
	t_node *level_push (t_node *);
	protected:
	public:
	List *token_list;
	List *rdp_list;
	Parser(List *token_list, List *rdp_list) { this->token_list = token_list; this->rdp_list = rdp_list; }
	Parser() {}
	~Parser() {}
	int parse (void);
	void set_token_list (List *list) { token_list = list; } ;
	void set_rdp_list (List *list) { rdp_list = list; } ;
};

The main parse function, this sets up for the parser chain.

int Parser::parse (void) {
	t_node *node = (t_node *) token_list->rem_head();
	if (node) {
		spaces = "";
		node = LEVEL_START (node);
	}
	return 0;			// 0 = success
}

This is the first in the function chain. It is the lowest in the priority in the order of precedence, and calls higher level_mult_div().

t_node *Parser::level_add_plus (t_node *node) {
	node = level_mult_div (node);
	while (node && (node->type == NT_ADD || node->type == NT_SUB)) {
		t_node *current = node;
		node = (t_node *) token_list->rem_head();
		node = level_mult_div (node);
		rdp_list->add_head (¤t->node);
	}
	return node;
}

This calls the next higher function level_bracket().

t_node *Parser::level_mult_div (t_node *node) {
	node = level_bracket (node);
	while (node && (node->type == NT_MULT || node->type == NT_DIV)) {
		t_node *current = node;
		node = (t_node *) token_list->rem_head();
		node = level_bracket (node);
		rdp_list->add_head (¤t->node);
	}
	spaces = spaces.substr(2);
	return node;
}

This calls the next higher function level_push().

t_node *Parser::level_bracket (t_node *node) {
	if (node && (node->type == NT_OPEN)) {
		free (node);												// not interested in the OPEN token
		node = LEVEL_START ((t_node *) token_list->rem_head());			// start at the top of the chain again with the next token
		if (node->type == NT_CLOSE) {
			free (node);
		}
		node = (t_node *)token_list->rem_head ();						// get the next token
	}
	else node = level_push(node);
	return node;
}

This is the last in the chain and puts the token on list.

t_node *Parser::level_push (t_node *node) {		/* bottom of the chain */
	rdp_list->add_head (&node->node);
	node = (t_node *) token_list->rem_head();
	return node;
}

Traverse the RPN list to get the answer:

This takes the RPN list and applies the mathimatical functions to work out the answer.

The class for RPN parser.

class Evaluate {
	private:
	List *list;
	t_node * create_number_node (double number);
	t_node * create_integer_node (long long integer);
	protected:
	public:
	Evaluate(List *list) { this->list = list; }
	t_node * evaluate (void);
};

This is a recursive functon to work though the RPN list evaluating the nodes as the are removed from the head of the list.
It removes a node from the list, and according to its type, it does the appropriate function.

t_node * Evaluate::evaluate (void) {
	t_node *node = (t_node *) list->rem_head();
	switch (node->type) {
		case NT_ADD: {
			t_node *node2 = evaluate();
			t_node *node1 = evaluate();
			t_node *result;
			if (node1->type == NT_INT && node2->type == NT_INT) {
				result = create_integer_node (node1->integer + node2->integer);
			}
			else {
				result = create_number_node (to_dbl(node1) + to_dbl(node2));
			}
			free (node1);
			free (node2);
			free (node);
			node = result;
			break;
		}
		case NT_SUB: {
			t_node *node2 = evaluate();
			t_node *node1 = evaluate();
			t_node *result;
			if (node1->type == NT_INT && node2->type == NT_INT) {
				 result = create_integer_node (node1->integer - node2->integer);
			}
			else {
				result = create_number_node (to_dbl(node1) - to_dbl(node2));
			}
			free (node1);
			free (node2);
			free (node);
			node = result;
			break;
		}
		case NT_MULT: {
			t_node *node2 = evaluate();
			t_node *node1 = evaluate();
			t_node *result;
			if (node1->type == NT_INT && node2->type == NT_INT) {
				 result = create_integer_node (node1->integer * node2->integer);
			}
			else {
				result = create_number_node (to_dbl(node1) - to_dbl(node2));
			}
			free (node1);
			free (node2);
			free (node);
			node = result;
			break;
		}
		case NT_DIV: {
			t_node *node2 = evaluate();
			t_node *node1 = evaluate();
			t_node *result;
			if (node1->type == NT_INT && node2->type == NT_INT) {
				 result = create_integer_node (node1->integer / node2->integer);
			}
			else {
				result = create_number_node (to_dbl(node1) / to_dbl(node2));
			}
			free (node1);
			free (node2);
			free (node);
			node = result;
			break;
		}
	}
	return node;
}

Creates an integer node that is returned by one call to the evaluate function.

t_node *Evaluate::create_integer_node (long long integer) {
	t_node *node = (t_node *) malloc (sizeof(t_node));
	node->type = NT_INT;
	node->integer = integer;
	return node;
}

Creates a floating point node that is returned by one call to the evaluate function.

t_node *Evaluate::create_number_node (double number) {
	t_node *node = (t_node *) malloc (sizeof(t_node));
	node->type = NT_FLOAT;
	node->number = number;
	return node;
}

A set of C++ classes to implement a Reverse Descent Parser.

Purpose:

Parsing the formula into tokens:

Convert the token list into a RPN list:

Traverse the RPN list to get the answer: