Code Review Asked on January 4, 2022
So this is my first time writing a Lexer, and I want to make sure I’m doing it right. The lexer is not complete for a programming language right now, because I think I can easily add more stuff later. Here is the code:
main.cpp
#include <iostream>
#include <fstream>
#include <memory>
#include <unordered_map>
#include "lexer.h"
std::vector<std::unique_ptr<table_entry>> symbol_table;
int main(int argc, char **argv) {
--argc;
++argv;
std::string input_string;
std::ifstream file(*argv);
std::string next_line;
char last_char;
while (std::getline(file, next_line)) {
last_char = next_line.back();
next_line.pop_back();
input_string.append(next_line + "n");
}
input_string.pop_back();
input_string.append(std::string(1, last_char));
// std::cout << input_string << std::endl;
init_lexer(&input_string);
table_entry* next_token;
while((next_token = next())->type != token_type::end) { // Just For Testing, I'll remove this later.
std::cout << next_token->lexme << " " << next_token->type << std::endl;
}
}
lexer.h
#ifndef LEXER
#define LEXER
#include <utility>
#include <utility>
#include <utility>
#include <vector>
#include <list>
#include <unordered_set>
#include <sstream>
#include <string>
#define letter_pair(num) std::make_pair(num, 'a'), std::make_pair(num, 'b'), std::make_pair(num, 'c'), std::make_pair(num, 'd'), std::make_pair(num, 'e'), std::make_pair(num, 'f'), std::make_pair(num, 'g'), std::make_pair(num, 'h'), std::make_pair(num, 'i'), std::make_pair(num, 'j'), std::make_pair(num, 'k'), std::make_pair(num, 'l'), std::make_pair(num, 'm'), std::make_pair(num, 'n'), std::make_pair(num, 'o'), std::make_pair(num, 'p'), std::make_pair(num, 'q'), std::make_pair(num, 'r'), std::make_pair(num, 's'), std::make_pair(num, 't'), std::make_pair(num, 'u'), std::make_pair(num, 'v'), std::make_pair(num, 'w'), std::make_pair(num, 'x'), std::make_pair(num, 'y'), std::make_pair(num, 'z')
enum token_type {
/// Relative Operations ///
not_op /* ! */,
lt /* < */,
le /* <= */,
eq /* == */,
ne /* != */,
gt /* > */,
ge /* >= */,
/// Assignment ///
assign, /* = */
/// Identifiers ///
id /* Variable/Function Names */,
num /* Numbers */,
string_lit /* strings */,
decimal /* decimal number */,
/// Conrol Flow ///
if_stmt /* if */,
elif_stmt /* elif */,
else_stmt /* else */,
while_stmt /* while */,
for_stmt /* for */,
/// Other ///
ws /* Space, New Line, Or Tabs */,
end /* End Of File */
};
struct table_entry {
std::string lexme;
token_type type;
int line;
int col;
table_entry(std::string &lexme, token_type token_type, int line, int col)
: lexme(lexme), type(token_type), line(line), col(col) {}
};
extern std::vector<std::unique_ptr<table_entry>> symbol_table;
int pos = 0, line = 1, col = 1;
std::vector<std::list<std::pair<int, char>>> state_machine;
std::unordered_set<int> accepting_states;
std::string *input;
void init_lexer(std::string *_input) {
input = _input;
/* Set the states of the finite state machine. You can view the output at
* State_Machine.png, but good luck understanding it since it was drawn by
* yours truly.
*/
state_machine.push_back({ /* State 0 */
std::make_pair(1, '_'),
std::make_pair(2, ' '),
std::make_pair(2, 't'),
std::make_pair(2, 'n'),
letter_pair(3),
std::make_pair(4, '!'),
std::make_pair(6, '='),
std::make_pair(8, '<'),
std::make_pair(10, '>')
});
state_machine.push_back({ /* State 1 */
std::make_pair(1, '_'),
letter_pair(3)
});
state_machine.push_back({ /* State 2 */
std::make_pair(2, 'n'),
std::make_pair(2, ' '),
std::make_pair(2, 't')
});
state_machine.push_back({ /* State 3 */
std::make_pair(3, '_'),
letter_pair(3)
});
state_machine.push_back({ /* State 4 */
std::make_pair(5, '=')
});
state_machine.emplace_back( /* State 5 */
);
state_machine.push_back({ /* State 6 */
std::make_pair(7, '=')
});
state_machine.emplace_back( /* State 7 */
);
state_machine.push_back({ /* State 8 */
std::make_pair(9, '=')
});
state_machine.emplace_back( /* State 9 */
);
state_machine.push_back({ /* State 10 */
std::make_pair(11, '=')
});
state_machine.push_back({
});
accepting_states.insert(2);
accepting_states.insert(3);
accepting_states.insert(4);
accepting_states.insert(5);
accepting_states.insert(6);
accepting_states.insert(7);
accepting_states.insert(8);
accepting_states.insert(9);
accepting_states.insert(10);
accepting_states.insert(11);
}
table_entry *generate_entry(int state, std::string &lexme) {
switch (state) {
case 2: return new table_entry(lexme, token_type::ws, line, col);
case 3:
if (lexme == "if") {
return new table_entry(lexme, token_type::if_stmt, line, col);
} else if (lexme == "elif") {
return new table_entry(lexme, token_type::elif_stmt, line, col);
} else if (lexme == "else") {
return new table_entry(lexme, token_type::else_stmt, line, col);
} else if (lexme == "for") {
return new table_entry(lexme, token_type::for_stmt, line, col);
} else if (lexme == "while") {
return new table_entry(lexme, token_type::while_stmt, line, col);
} else {
return new table_entry(lexme, token_type::id, line, col);
}
case 4:return new table_entry(lexme, token_type::not_op, line, col);
case 5:return new table_entry(lexme, token_type::ne, line, col);
case 6:return new table_entry(lexme, token_type::assign, line, col);
case 7:return new table_entry(lexme, token_type::eq, line, col);
case 8:return new table_entry(lexme, token_type::lt, line, col);
case 9:return new table_entry(lexme, token_type::le, line, col);
case 10:return new table_entry(lexme, token_type::gt, line, col);
case 11:return new table_entry(lexme, token_type::ge, line, col);
default:return nullptr;
}
}
table_entry *next() {
if (pos >= input->length()) {
return new table_entry(*(new std::string()), token_type::end, line, col);
}
int state = 0;
int _pos = pos; // Copy the positions to save the initial locations of the tokens
int _line = line;
int _col = col;
std::string next_lexme;
while (true) {
if (_pos >= input->length()) {
break;
}
char next = (*input)[_pos];
std::list<std::pair<int, char>> &neighbors = state_machine[state];
bool didFind = false;
for (auto &neighbor : neighbors) {
if (neighbor.second == next) {
state = neighbor.first;
didFind = true;
break;
}
}
if (!didFind) {
break;
}
if (next == 'n') {
_col = 1;
_line++;
} else {
_col++;
}
_pos++;
next_lexme.push_back(next);
}
if (accepting_states.contains(state)) {
table_entry *ans = generate_entry(state, next_lexme);
if (ans->type != token_type::id) {
symbol_table.emplace_back(ans);
}
pos = _pos;
line = _line;
col = _col;
return ans;
} else {
std::cerr << "Unexpected Token: " << next_lexme << " At Line " << line << " And Column " << col;
return new table_entry(*(new std::string()), token_type::end, line, col);
}
}
#endif // LEXER
A couple of things:
Get help from others!
Recent Answers
Recent Questions
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP