From 0dd91da74f4f96ff65cfc2163d85118ab868b707 Mon Sep 17 00:00:00 2001 From: lisk77 Date: Mon, 30 Jun 2025 19:43:09 +0200 Subject: [PATCH] feat: initial commit --- .gitignore | 1 + CMakeLists.txt | 24 ++++++ examples/booleans.lambda | 9 +++ examples/numbers.lambda | 13 ++++ examples/pair.lambda | 12 +++ include/ast.hpp | 39 ++++++++++ include/evaluator.hpp | 14 ++++ include/lexer.hpp | 18 +++++ include/parser.hpp | 31 ++++++++ include/token.hpp | 31 ++++++++ src/ast.cpp | 0 src/evaluator.cpp | 138 ++++++++++++++++++++++++++++++++++ src/lexer.cpp | 72 ++++++++++++++++++ src/main.cpp | 40 ++++++++++ src/parser.cpp | 158 +++++++++++++++++++++++++++++++++++++++ src/print.cpp | 56 ++++++++++++++ src/token.cpp | 27 +++++++ 17 files changed, 683 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 examples/booleans.lambda create mode 100644 examples/numbers.lambda create mode 100644 examples/pair.lambda create mode 100644 include/ast.hpp create mode 100644 include/evaluator.hpp create mode 100644 include/lexer.hpp create mode 100644 include/parser.hpp create mode 100644 include/token.hpp create mode 100644 src/ast.cpp create mode 100644 src/evaluator.cpp create mode 100644 src/lexer.cpp create mode 100644 src/main.cpp create mode 100644 src/parser.cpp create mode 100644 src/print.cpp create mode 100644 src/token.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4951f80 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +./build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..9841405 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.10) +project(lambda VERSION 1.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 1) gather all your .cpp files +file(GLOB_RECURSE PROJECT_SOURCES + ${PROJECT_SOURCE_DIR}/src/*.cpp +) + +# sanity check +if(NOT PROJECT_SOURCES) + message(FATAL_ERROR "No .cpp files found in src/") +endif() + +# 2) create the executable from those sources +add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) + +# 3) point it at your headers +target_include_directories(${PROJECT_NAME} + PRIVATE ${PROJECT_SOURCE_DIR}/include +) + diff --git a/examples/booleans.lambda b/examples/booleans.lambda new file mode 100644 index 0000000..5f28428 --- /dev/null +++ b/examples/booleans.lambda @@ -0,0 +1,9 @@ +true = \ x y . x; +false = \ x y . y; + +not = \ p . p false true; +and = \ p q . p q p; +or = \ p q . p p q; +xor = \ p q . p ( not q ) q; + +main = and true false; diff --git a/examples/numbers.lambda b/examples/numbers.lambda new file mode 100644 index 0000000..70a5e8c --- /dev/null +++ b/examples/numbers.lambda @@ -0,0 +1,13 @@ +0 = \ f x . x; +1 = \ f x . f x; +2 = \ f x . f ( f x ); + +isZero = \ n . n (\ x . false) true; +succ = \ n f x . f ( n f x ); +pred = \ n f x . n (\ g h . h ( g f )) (\u . x) (\u . u); +add = \ m n . n succ m; +sub = \ m n . n pred m; +mul = \ m n f . m ( n f ); +exp = \ b n . n b; + +main = add 2 1; diff --git a/examples/pair.lambda b/examples/pair.lambda new file mode 100644 index 0000000..a927006 --- /dev/null +++ b/examples/pair.lambda @@ -0,0 +1,12 @@ +pair = \ x y z . z x y; + +first = \ p . p (\ x y . x); +second = \ p . p (\ x y . y); + +cons = pair; +head = first; +tail = second; +nil = \ x y . y; +isNil = \ l . l (\ h t d . (\ x y . y)) (\ x y . x); + +main = first (pair a b); diff --git a/include/ast.hpp b/include/ast.hpp new file mode 100644 index 0000000..fbde70d --- /dev/null +++ b/include/ast.hpp @@ -0,0 +1,39 @@ +#ifndef AST_HPP +#define AST_HPP + +#include +#include + +// ast stuff lul +struct Expr { + virtual ~Expr() = default; +}; + +// represents a simple symbol like "x" or even "thisIsACoolFunctionTrustme100" +// (ikik the true lambda calculus professionals would now scream at me and say +// "lambda calculus has only one letter variables!", to which id say ¯\_(ツ)_/¯ +struct Variable : Expr { + std::string name; + + Variable(std::string n) : name(std::move(n)) {} +}; + +// represents a function itself like \ x . x +struct Abstraction : Expr { + std::string param; + std::unique_ptr body; + + Abstraction(std::string p, std::unique_ptr b) : param(std::move(p)), body(std::move(b)) {} +}; + +// wouldnt be turing complete if we couldnt apply to these functions innit +// (\ x . x) a would be such an application +// note that parenthesis are your savior because (\ x y . x) (\ x . x) b and +// (\ x y . x) ((\ x . x) b ) are not the same thing +struct Application : Expr { + std::unique_ptr left, right; + + Application(std::unique_ptr l, std::unique_ptr r) : left(std::move(l)), right(std::move(r)) {} +}; + +#endif // AST_HPP diff --git a/include/evaluator.hpp b/include/evaluator.hpp new file mode 100644 index 0000000..fa4845b --- /dev/null +++ b/include/evaluator.hpp @@ -0,0 +1,14 @@ +#ifndef EVALUATOR_HPP +#define EVALUATOR_HPP + +#include "ast.hpp" +#include +#include +#include + +std::unique_ptr normalize(const std::unique_ptr&, const std::unordered_map>&); +std::unique_ptr substitute(const std::string&, const std::unique_ptr&, const std::unique_ptr&); +std::unique_ptr evaluate(const std::unique_ptr&, const std::unordered_map>&); +std::unique_ptr evaluateMain(const std::unordered_map>&); + +#endif // EVALUATOR_HPP diff --git a/include/lexer.hpp b/include/lexer.hpp new file mode 100644 index 0000000..41f93c9 --- /dev/null +++ b/include/lexer.hpp @@ -0,0 +1,18 @@ +#ifndef LEXER_HPP +#define LEXER_HPP + +#include +#include +#include "token.hpp" + +class Lexer { + std::string src; + bool error_flag; + size_t position; + +public: + Lexer(std::string); + std::vector tokenize(); +}; + +#endif // LEXER_HPP diff --git a/include/parser.hpp b/include/parser.hpp new file mode 100644 index 0000000..7156e8e --- /dev/null +++ b/include/parser.hpp @@ -0,0 +1,31 @@ +#ifndef PARSER_HPP +#define PARSER_HPP + +#include +#include + +#include "token.hpp" +#include "ast.hpp" + +class Parser { + std::vector tokens; + size_t position; + bool error_flag; + std::unordered_map> defs; + +public: + Parser(std::vector); + const std::unordered_map>& definitions() const; + const Token& peek() const; + const Token& get(); + bool accept(TokenType t); + void expect(TokenType t); + bool tryParseDefinition(); + std::unique_ptr parseSimple(); + std::unique_ptr parseApplication(); + std::unique_ptr parseTerm(); + std::unique_ptr parse(); + std::vector> parseProgram(); +}; + +#endif // PARSER_HPP diff --git a/include/token.hpp b/include/token.hpp new file mode 100644 index 0000000..2e496b9 --- /dev/null +++ b/include/token.hpp @@ -0,0 +1,31 @@ +#ifndef TOKEN_H +#define TOKEN_H + +#include +#include +#include + +typedef enum { + ERROR, + EOL, + EOC, + LAMBDA, + DOT, + LPAREN, + RPAREN, + VARIABLE, + EQUALS +} TokenType; + +typedef struct { + TokenType type; + std::string lexeme; + size_t start; + size_t end; +} Token; + +void print_lexeme(Token); +std::string display_tokentype(TokenType); +void print_token(Token); + +#endif // TOKEN_H diff --git a/src/ast.cpp b/src/ast.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/evaluator.cpp b/src/evaluator.cpp new file mode 100644 index 0000000..f7c4030 --- /dev/null +++ b/src/evaluator.cpp @@ -0,0 +1,138 @@ +#include "evaluator.hpp" +#include "ast.hpp" +#include +#include +#include +#include +#include + +// TODO: optimize that later lol + +static std::unique_ptr clone(const std::unique_ptr& e) { + if (auto v = dynamic_cast(e.get())) + return std::make_unique(v->name); + if (auto ab = dynamic_cast(e.get())) + return std::make_unique(ab->param, clone(ab->body)); + if (auto ap = dynamic_cast(e.get())) + return std::make_unique(clone(ap->left), clone(ap->right)); + throw std::runtime_error("clone: unknown expression type"); +} + +// alpha conversion because lambda calc is all global scope +static std::string newVar(const std::string& baseVar, const std::unordered_set& avoid) { + std::string var = baseVar + "'"; + + while (avoid.count(var)) { + var += "'"; + } + + return var; +} + +// gives you all free variables in the expression +static std::unordered_set freeVars(const std::unique_ptr& expr) { + std::unordered_set result; + + if (auto v = dynamic_cast(expr.get())) { + result.insert(v->name); + } + + else if (auto ab = dynamic_cast(expr.get())) { + auto bodyFVs = freeVars(ab->body); + bodyFVs.erase(ab->param); + result = bodyFVs; + } + + else if (auto ap = dynamic_cast(expr.get())) { + auto leftFVs = freeVars(ap->left); + auto rightFVs = freeVars(ap->right); + result.insert(leftFVs.begin(), leftFVs.end()); + result.insert(rightFVs.begin(), rightFVs.end()); + } + + return result; +} + +// beta reduction because we obviously want to apply parameters to functions +std::unique_ptr substitute(const std::string& var, const std::unique_ptr& val, const std::unique_ptr& expr) { + if (auto v = dynamic_cast(expr.get())) { + if (v->name == var) { + return clone(val); + } + + else { + return clone(expr); + } + } + + if (auto ab = dynamic_cast(expr.get())) { + if (ab->param == var) { + return clone(expr); + } + + auto valFVs = freeVars(val); + if (valFVs.count(ab->param)) { + auto exprFVs = freeVars(expr); + auto allFVs = valFVs; + allFVs.insert(exprFVs.begin(), exprFVs.end()); + allFVs.insert(var); + allFVs.erase(ab->param); + + std::string newParam = newVar(ab->param, allFVs); + std::unique_ptr renamedBody = substitute(ab->param, std::make_unique(newParam), ab->body); + std::unique_ptr newBody = substitute(var, val, renamedBody); + return std::make_unique(newParam, std::move(newBody)); + } + + else { + std::unique_ptr newBody = substitute(var, val, ab->body); + return std::make_unique(ab->param, std::move(newBody)); + } + } + + if (auto ap = dynamic_cast(expr.get())) { + std::unique_ptr newLeft = substitute(var, val, ap->left); + std::unique_ptr newRight = substitute(var, val, ap->right); + return std::make_unique(std::move(newLeft), std::move(newRight)); + } + + throw std::runtime_error("substitute: unknown expression type"); +} + +// idk about you but i would like my expressions to be simpler than just "put in for x and done" +std::unique_ptr normalize(const std::unique_ptr& expr, const std::unordered_map>& env) { + if (auto v = dynamic_cast(expr.get())) { + auto it = env.find(v->name); + if (it != env.end()) { + return normalize(it->second, env); + } + return clone(expr); + } + + if (auto ab = dynamic_cast(expr.get())) { + std::unique_ptr normalizedBody = normalize(ab->body, env); + return std::make_unique(ab->param, std::move(normalizedBody)); + } + + if (auto ap = dynamic_cast(expr.get())) { + std::unique_ptr func = normalize(ap->left, env); + if (auto ab = dynamic_cast(func.get())) { + std::unique_ptr result = substitute(ab->param, ap->right, ab->body); + return normalize(result, env); + } + + else { + std::unique_ptr arg = normalize(ap->right, env); + return std::make_unique(std::move(func), std::move(arg)); + } + } + + throw std::runtime_error("normalize: unknown expression type"); +} + +// i think that is self explanatory +std::unique_ptr evaluateMain(const std::unordered_map>& defs) { + auto it = defs.find("main"); + if (it == defs.end()) throw std::runtime_error("evaluate: no main function found in file"); + return normalize(it->second, defs); +} diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..f21ba84 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,72 @@ +#include + +#include "lexer.hpp" + +#define IS_SPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') + +Lexer::Lexer(std::string src) { + this->src = src; + this->position = 0; + this->error_flag = false; +} + +std::vector Lexer::tokenize() { + std::vector tokens; + std::string src = this->src; + size_t len = src.length(); + + while (position < len && !error_flag) { + char curr = src[position]; + + if (curr == '\\') { + tokens.push_back({ LAMBDA, "\\", position, position+1 }); + position++; + } + else if (curr == '.') { + tokens.push_back({ DOT, ".", position, position+1 }); + position++; + } + else if (curr == '(') { + tokens.push_back({ LPAREN, "(", position, position+1 }); + position++; + } + else if (curr == ')') { + tokens.push_back({ RPAREN, ")", position, position+1 }); + position++; + } + else if (curr == '=') { + tokens.push_back({ EQUALS, "=", position, position+1 }); + position++; + } + else if (curr == ';') { + tokens.push_back({ EOL, ";", position, position+1 }); + position++; + } + else if (!IS_SPACE(curr)) { + size_t start = position; + while (position < len && !IS_SPACE(src[position]) + && src[position] != '\\' + && src[position] != '(' + && src[position] != ')' + && src[position] != '.' + && src[position] != '=' + && src[position] != ';') + { + position++; + } + std::string lex = src.substr(start, position - start); + tokens.push_back({ VARIABLE, lex, start, position }); + } + else if (IS_SPACE(curr)) { + position++; + } + else { + error_flag = true; + std::cerr << "Unknown symbol " << curr << std::endl; + tokens.push_back({ ERROR, std::string (1, curr), position, position }); + } + } + + tokens.push_back({ EOC, "", position, position }); + return tokens; +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..ab437ff --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +#include "lexer.hpp" +#include "parser.hpp" +#include "evaluator.hpp" +#include "print.cpp" + +int main() { + // get that stream + std::ostringstream ss; + ss << std::cin.rdbuf(); + const std::string src = ss.str(); + + // chop it up nicely + Lexer lex(src); + auto tokens = lex.tokenize(); + + // make sense of the chopping and scream at people if + // they are in the wrong order or smth + Parser parser(tokens); + auto trees = parser.parseProgram(); + + // try to print the evaluation + try { + std::unique_ptr result = evaluateMain(parser.definitions()); + std::cout << toString(result) << "\n"; + } + catch (const std::exception &e) { + // eh. + std::cerr << "evaluation: " << e.what() << "\n"; + return 1; + } + + return 0; +} + diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..bf2a896 --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,158 @@ +// src/parser.cpp +#include "parser.hpp" +#include "ast.hpp" + +#include +#include +#include +#include +#include + +Parser::Parser(std::vector toks) + : tokens(std::move(toks)) + , position(0) + , error_flag(false) + , defs() +{} + +const std::unordered_map>& Parser::definitions() const { + return defs; +} + +const Token& Parser::peek() const { + if (position >= tokens.size()) + throw std::runtime_error("parser: unexpected end of file"); + return tokens[position]; +} + +const Token& Parser::get() { + if (position >= tokens.size()) + throw std::runtime_error("parser: unexpected end of file"); + return tokens[position++]; +} + +bool Parser::accept(TokenType t) { + if (position < tokens.size() && tokens[position].type == t) { + ++position; + return true; + } + return false; +} + +void Parser::expect(TokenType t) { + if (!accept(t)) { + std::cout << display_tokentype(t) << std::endl; + throw std::runtime_error("parser: unexpected token at position " + + std::to_string(peek().start)); + } +} + +// Try to parse a definition of the form: +// '=' ';' +// Returns true if a definition was parsed and stored in defs. +bool Parser::tryParseDefinition() { + if (peek().type == TokenType::VARIABLE + && position+1 < tokens.size() + && tokens[position+1].type == TokenType::EQUALS) + { + std::string name = get().lexeme; // consume VARIABLE + expect(TokenType::EQUALS); // consume '=' + + // parse the right-hand term + std::unique_ptr value = parseTerm(); + + // require a semicolon (EOL token) + expect(TokenType::EOL); + + // store in definitions map + defs.emplace(std::move(name), std::move(value)); + return true; + } + return false; +} + +// ::= '\' + '.' +// | +// | '(' ')' +std::unique_ptr Parser::parseSimple() { + // abstraction with one-or-more parameters + if (accept(TokenType::LAMBDA)) { + std::vector params; + while (peek().type == TokenType::VARIABLE) { + params.push_back(get().lexeme); + } + expect(TokenType::DOT); + + std::unique_ptr body = parseTerm(); + // right-nest them: \p1.\p2.…body + for (auto it = params.rbegin(); it != params.rend(); ++it) { + body = std::make_unique(*it, std::move(body)); + } + return body; + } + + // variable + if (peek().type == TokenType::VARIABLE) { + const auto &tok = get(); + return std::make_unique(tok.lexeme); + } + + // parenthesized term + if (accept(TokenType::LPAREN)) { + std::unique_ptr e = parseTerm(); + expect(TokenType::RPAREN); + return e; + } + + throw std::runtime_error("parser: expected \\, variable, or '(' at position " + + std::to_string(peek().start)); +} + +// ::= { } +std::unique_ptr Parser::parseApplication() { + std::unique_ptr expr = parseSimple(); + while (true) { + TokenType t = peek().type; + if (t == TokenType::VARIABLE || + t == TokenType::LAMBDA || + t == TokenType::LPAREN) + { + std::unique_ptr rhs = parseSimple(); + expr = std::make_unique(std::move(expr), std::move(rhs)); + } else { + break; + } + } + return expr; +} + +// ::= +std::unique_ptr Parser::parseTerm() { + return parseApplication(); +} + +// parse exactly one term and expect EOC +std::unique_ptr Parser::parse() { + std::unique_ptr root = parseTerm(); + expect(TokenType::EOC); + return root; +} + +// ::= { | EOL } EOC +std::vector> Parser::parseProgram() { + std::vector> results; + + while (peek().type != TokenType::EOC) { + // first try a definition + if (tryParseDefinition()) + continue; + + // otherwise a bare term ending in EOL + results.push_back(parseTerm()); + expect(TokenType::EOL); + } + + expect(TokenType::EOC); + return results; +} + diff --git a/src/print.cpp b/src/print.cpp new file mode 100644 index 0000000..3c2d863 --- /dev/null +++ b/src/print.cpp @@ -0,0 +1,56 @@ +#include "ast.hpp" +#include +#include +#include + +// Forward‐declare +static void _print(const Expr* e, std::ostream& out); + +// Public API: return string +inline std::string toString(const std::unique_ptr& e) { + std::ostringstream oss; + _print(e.get(), oss); + return oss.str(); +} + +// Internal recursive printer +static void _print(const Expr* e, std::ostream& out) { + // 1) Abstraction(s): collect all params + if (auto ab = dynamic_cast(e)) { + std::vector params; + const Expr* body = ab; + while (auto a2 = dynamic_cast(body)) { + params.push_back(a2->param); + body = a2->body.get(); + } + // print "\x y z. " + out << '\\' << params[0]; + for (size_t i = 1; i < params.size(); ++i) { + out << ' ' << params[i]; + } + out << ". "; + // then the body + _print(body, out); + return; + } + + // 2) Variable + if (auto v = dynamic_cast(e)) { + out << v->name; + return; + } + + // 3) Application + if (auto ap = dynamic_cast(e)) { + out << '('; + _print(ap->left.get(), out); + out << ' '; + _print(ap->right.get(), out); + out << ')'; + return; + } + + // fallback + out << ""; +} + diff --git a/src/token.cpp b/src/token.cpp new file mode 100644 index 0000000..f1b8ddb --- /dev/null +++ b/src/token.cpp @@ -0,0 +1,27 @@ +#include +#include + +#include "token.hpp" + +std::string display_tokentype(TokenType type) { + switch (type) { + case ERROR : return "ERROR"; + case EOL : return "EOL"; + case EOC : return "EOC"; + case LAMBDA : return "LAMBDA"; + case DOT : return "DOT"; + case LPAREN : return "LPAREN"; + case RPAREN : return "RPAREN"; + case EQUALS : return "EQUALS"; + case VARIABLE : return "VARIABLE"; + default : return ""; + } +} + +void print_lexeme(Token self) { + printf("Lexeme: '%.*s'\n", (int)(self.end-self.start), self.lexeme); +} + +void print_token(Token self) { + printf("[%s] %.*s %li..%li\n", display_tokentype(self.type), (int)(self.end-self.start), self.lexeme, self.start, self.end); +}