From c4011152b6c8913d3d87bf8281ee0ec9c631b772 Mon Sep 17 00:00:00 2001 From: Karutoh Date: Sun, 27 Jul 2025 23:50:41 -0700 Subject: [PATCH] Huge update, backup commit. --- CMakeLists.txt | 7 + include/arctyx/compiler/Compiler.h | 39 +++- include/arctyx/compiler/Language.h | 20 +- include/arctyx/compiler/Register.h | 2 +- include/arctyx/compiler/Stack.h | 41 ++++ include/arctyx/compiler/StackItem.h | 33 +++ include/arctyx/compiler/StackParam.h | 11 + include/arctyx/compiler/Symbol.h | 5 +- include/arctyx/compiler/Token.h | 4 + include/arctyxLang/ArctyxCompiler.h | 7 + src/Arctyx.cpp | 10 +- src/ArctyxCompiler.cpp | 292 +++++++++++++++++++++++++++ src/ArctyxLang.cpp | 43 +++- src/compiler/Compiler.cpp | 216 ++++++++------------ src/compiler/Language.cpp | 91 +++++---- src/compiler/Stack.cpp | 49 +++++ src/compiler/StackItem.cpp | 74 +++++++ src/compiler/StackParam.cpp | 1 + src/compiler/Symbol.cpp | 22 +- src/compiler/Token.cpp | 5 + src/main.cpp | 3 +- 21 files changed, 775 insertions(+), 200 deletions(-) create mode 100644 include/arctyx/compiler/Stack.h create mode 100644 include/arctyx/compiler/StackItem.h create mode 100644 include/arctyx/compiler/StackParam.h create mode 100644 include/arctyxLang/ArctyxCompiler.h create mode 100644 src/ArctyxCompiler.cpp create mode 100644 src/compiler/Stack.cpp create mode 100644 src/compiler/StackItem.cpp create mode 100644 src/compiler/StackParam.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index eb36860..171a0eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,12 @@ add_library(Arctyx SHARED src/compiler/Register.cpp include/arctyx/compiler/Operator.h src/compiler/Operator.cpp + include/arctyx/compiler/StackItem.h + src/compiler/StackItem.cpp + include/arctyx/compiler/Stack.h + src/compiler/Stack.cpp + include/arctyx/compiler/StackParam.h + src/compiler/StackParam.cpp ) add_library(x64Arch SHARED @@ -63,6 +69,7 @@ set_target_properties(x64Arch PROPERTIES ) add_library(ArctyxLang SHARED + include/arctyxLang/ArctyxCompiler.h src/ArctyxCompiler.cpp src/ArctyxLang.cpp ) diff --git a/include/arctyx/compiler/Compiler.h b/include/arctyx/compiler/Compiler.h index 321370b..0e591a1 100644 --- a/include/arctyx/compiler/Compiler.h +++ b/include/arctyx/compiler/Compiler.h @@ -5,6 +5,7 @@ #include "Architecture.h" #include "Language.h" +#include "Stack.h" #include "Symbol.h" class Compiler @@ -12,12 +13,14 @@ class Compiler private: const Architecture *architecture; const Language *language; + ehs::UInt_64 entryPointId; + ehs::Str_8 entryPointName; ehs::Array symbols; public: Compiler(); - Compiler(const ehs::Str_8 &arch, const ehs::Str_8 &lang); + Compiler(const ehs::Str_8 &arch, const ehs::Str_8 &lang, ehs::Str_8 entryPoint); Compiler(Compiler &&other) noexcept; @@ -27,18 +30,42 @@ public: Compiler &operator=(const Compiler &other); - ehs::Array Compile(const ehs::Str_8 &code) const; + bool HasSymbol(const ehs::UInt_64 &id) const; + + bool HasSymbol(const ehs::Str_8 &name) const; + + bool AddSymbol(Symbol symbol); + + Symbol *GetSymbol(const ehs::UInt_64 &id) const; + + Symbol *GetSymbol(const ehs::Str_8 &name) const; + + ehs::Vector Compile(const ehs::Str_8 &code); private: - static bool IsEOL(const ehs::Array &eols, const ehs::Char_8 *c); + static bool IsNumber(const ehs::Char_8 *c); - static bool IsSeparator(const ehs::Array &separators, const ehs::Char_8 *c); + static bool IsOperator(const ehs::Char_8 *c); + + static bool IsAlphabet(const ehs::Char_8 *c); + + static bool IsAlphaNumeric(const ehs::Char_8 *c); static bool IsPrimitive(const ehs::Array &primitives, const ehs::Str_8 &value); static bool IsKeyword(const ehs::Array &keywords, const ehs::Str_8 &value); - static const Operator *IsOperator(const ehs::Array &operators, const ehs::Str_8 &value); + static bool IsEncapsulator(const ehs::Char_8 *c); - ehs::Vector Parse(const ehs::Str_8 &code) const; + static bool IsEOL(const ehs::Array &eols, const ehs::Char_8 *c); + + static void ParseNumber(ehs::Vector &tokens, const ehs::Array &eols, ehs::Char_8 **start, ehs::Char_8 **i); + + static void ParseOperator(ehs::Vector &tokens, const ehs::Array &eols, ehs::Char_8 **start, ehs::Char_8 **i); + + static void ParseText(ehs::Vector &tokens, const ehs::Array &eols, + const ehs::Array &primitives, const ehs::Array &keywords, ehs::Char_8 **start, + ehs::Char_8 **i); + + ehs::Vector ParseIntoTokens(const ehs::Str_8 &code) const; }; \ No newline at end of file diff --git a/include/arctyx/compiler/Language.h b/include/arctyx/compiler/Language.h index 1524241..d91783d 100644 --- a/include/arctyx/compiler/Language.h +++ b/include/arctyx/compiler/Language.h @@ -9,6 +9,9 @@ #include "Operator.h" #include "Primitive.h" +typedef ehs::Vector (*CompileIntoTokensCb)(Compiler *compiler, const Language *lang, const ehs::Str_8 &code); +typedef ehs::Vector (*CompileIntoMachineCodeCb)(Compiler *compiler, const Language *lang, const ehs::Vector &tokens); + class EHS_LIB_IO Language { private: @@ -18,11 +21,12 @@ private: ehs::Str_8 name; ehs::Version version; ehs::Array eols; - ehs::Array separators; ehs::Array primitives; ehs::Array keywords; ehs::Array operators; ehs::Array interpretations; + CompileIntoTokensCb compileIntoTokensCb; + CompileIntoMachineCodeCb compileIntoMachineCodeCb; static ehs::Array languages; @@ -51,12 +55,6 @@ public: bool AddEOL(const ehs::Char_8 &eol); - ehs::Array GetSeparators() const; - - bool HasSeparator(const ehs::Char_8 &separator) const; - - bool AddSeparator(const ehs::Char_8 &separator); - ehs::Array GetPrimitives() const; bool HasPrimitive(const ehs::UInt_64 &id) const; @@ -91,6 +89,14 @@ public: bool AddInterpretation(Interpretation interpretation); + void SetCompileIntoTokensCb(CompileIntoTokensCb cb); + + ehs::Vector CompileIntoTokens(Compiler *compiler, const ehs::Str_8 &code) const; + + void SetCompileIntoMachineCodeCb(CompileIntoMachineCodeCb cb); + + ehs::Vector CompileIntoMachineCode(Compiler *compiler, const ehs::Vector &tokens) const; + static bool Has(const ehs::UInt_64 &id); static bool Has(const ehs::Str_8 &name); diff --git a/include/arctyx/compiler/Register.h b/include/arctyx/compiler/Register.h index 2131ca7..ea3a582 100644 --- a/include/arctyx/compiler/Register.h +++ b/include/arctyx/compiler/Register.h @@ -8,7 +8,7 @@ private: ehs::UInt_64 id; ehs::Str_8 name; ehs::UInt_32 byteDepth; - ehs::UInt_64 code;; + ehs::UInt_64 code; public: Register(); diff --git a/include/arctyx/compiler/Stack.h b/include/arctyx/compiler/Stack.h new file mode 100644 index 0000000..0ebf975 --- /dev/null +++ b/include/arctyx/compiler/Stack.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +#include "arctyx/compiler/StackParam.h" +#include "arctyx/compiler/StackItem.h" + +class Stack +{ +private: + ehs::Array inputs; + ehs::Array outputs; + ehs::UInt_64 offset; + ehs::Vector items; + +public: + Stack(); + + Stack(const ehs::UInt_64 &offset); + + Stack(Stack &&other) noexcept; + + Stack(const Stack &other); + + Stack &operator=(Stack &&other) noexcept; + + Stack &operator=(const Stack &other); + + ehs::UInt_64 GetOffset() const; + + bool HasItem(const ehs::UInt_64 &id) const; + + bool HasItem(const ehs::Str_8 &name) const; + + bool AddItem(StackItem item); + + StackItem *GetItem(const ehs::UInt_64 &id) const; + + StackItem *GetItem(const ehs::Str_8 &name) const; +}; diff --git a/include/arctyx/compiler/StackItem.h b/include/arctyx/compiler/StackItem.h new file mode 100644 index 0000000..ee6f481 --- /dev/null +++ b/include/arctyx/compiler/StackItem.h @@ -0,0 +1,33 @@ +#pragma once + +#include + +class StackItem +{ +private: + ehs::UInt_64 id; + ehs::Str_8 name; + ehs::UInt_64 address; + ehs::UInt_64 size; + +public: + StackItem(); + + StackItem(ehs::Str_8 name, const ehs::UInt_64 &address, const ehs::UInt_64 &size); + + StackItem(StackItem &&other) noexcept; + + StackItem(const StackItem &other); + + StackItem &operator=(StackItem &&other) noexcept; + + StackItem &operator=(const StackItem &other); + + ehs::Str_8 GetName() const; + + ehs::UInt_64 GetId() const; + + ehs::UInt_64 GetAddress() const; + + ehs::UInt_64 GetSize() const; +}; \ No newline at end of file diff --git a/include/arctyx/compiler/StackParam.h b/include/arctyx/compiler/StackParam.h new file mode 100644 index 0000000..ae44cad --- /dev/null +++ b/include/arctyx/compiler/StackParam.h @@ -0,0 +1,11 @@ +#pragma once + +#include "arctyx/compiler/Register.h" + +class StackParam +{ +private: + bool pointer; + const Register *reg; + +}; \ No newline at end of file diff --git a/include/arctyx/compiler/Symbol.h b/include/arctyx/compiler/Symbol.h index 94cb657..42476d4 100644 --- a/include/arctyx/compiler/Symbol.h +++ b/include/arctyx/compiler/Symbol.h @@ -20,11 +20,12 @@ private: ehs::UInt_64 id; ehs::Str_8 name; ehs::UInt_64 address; + ehs::UInt_64 size; public: Symbol(); - Symbol(const SymbolType &type, ehs::Str_8 name, const ehs::UInt_64 &address); + Symbol(const SymbolType &type, ehs::Str_8 name, const ehs::UInt_64 &address, const ehs::UInt_64 &size); Symbol(Symbol &&other) noexcept; @@ -41,4 +42,6 @@ public: ehs::UInt_64 GetId() const; ehs::UInt_64 GetAddress() const; + + ehs::UInt_64 GetSize() const; }; \ No newline at end of file diff --git a/include/arctyx/compiler/Token.h b/include/arctyx/compiler/Token.h index 94ed7d7..2983213 100644 --- a/include/arctyx/compiler/Token.h +++ b/include/arctyx/compiler/Token.h @@ -6,6 +6,7 @@ enum class TokenT : ehs::UInt_8 { UNKNOWN, SEPARATOR, + BOOLEAN, NUMBER, STRING, CHARACTER, @@ -14,6 +15,7 @@ enum class TokenT : ehs::UInt_8 IDENTIFIER, UNARY_OPERATOR, COMPOUND_OPERATOR, + ENCAPSULATOR, EOL }; @@ -36,6 +38,8 @@ public: Token &operator=(const Token &token); + void SetType(const TokenT &newType); + TokenT GetType() const; ehs::Str_8 GetValue() const; diff --git a/include/arctyxLang/ArctyxCompiler.h b/include/arctyxLang/ArctyxCompiler.h new file mode 100644 index 0000000..48047e1 --- /dev/null +++ b/include/arctyxLang/ArctyxCompiler.h @@ -0,0 +1,7 @@ +#pragma once + +#include "arctyx/compiler/Compiler.h" + +ehs::Vector CompileIntoTokens(Compiler *compiler, const Language *lang, const ehs::Str_8 &code); + +ehs::Vector CompileIntoMachineCode(Compiler *compiler, const Language *lang, const ehs::Vector &tokens); \ No newline at end of file diff --git a/src/Arctyx.cpp b/src/Arctyx.cpp index 1b1f210..37fa83f 100644 --- a/src/Arctyx.cpp +++ b/src/Arctyx.cpp @@ -20,11 +20,11 @@ typedef bool (*ShutdownPlugin)(); void Arctyx::LoadArchitecturePlugins() { -#if defined(EHS_OS_LINUX) - const ehs::Str_8 delimeter = ".so"; -#elif defined(EHS_OS_WINDOWS) - const ehs::Str_8 delimeter = ".dll"; -#endif + #if defined(EHS_OS_LINUX) + const ehs::Str_8 delimeter = ".so"; + #elif defined(EHS_OS_WINDOWS) + const ehs::Str_8 delimeter = ".dll"; + #endif ehs::Array files = ehs::Directory::GetAllFiles("plugins/architectures"); for (ehs::UInt_64 i = 0; i < files.Size(); ++i) diff --git a/src/ArctyxCompiler.cpp b/src/ArctyxCompiler.cpp new file mode 100644 index 0000000..42ba149 --- /dev/null +++ b/src/ArctyxCompiler.cpp @@ -0,0 +1,292 @@ +#include "arctyxLang/ArctyxCompiler.h" + +bool IsNumber(const ehs::Char_8* c) +{ + if (*c >= '0' && *c <= '9') + return true; + + return false; +} + +bool IsOperator(const ehs::Char_8* c) +{ + if (*c == '!' || *c == ':' || *c == '\\' || *c == '|' || *c == '^' || *c == '`' || *c == '~' || *c == '*' + || *c == '+' || (*c >= '-' && *c <= '/') || (*c >= '#' && *c <= '&') || (*c >= '<' && *c <= '@')) + return true; + + return false; +} + +bool IsAlphabet(const ehs::Char_8* c) +{ + if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z') || *c == '_') + return true; + + return false; +} + +bool IsAlphaNumeric(const ehs::Char_8 *c) +{ + if ((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z') || *c == '_' || (*c >= '0' && *c <= '9')) + return true; + + return false; +} + +bool IsPrimitive(const ehs::Array& primitives, const ehs::Str_8& value) +{ + for (ehs::UInt_64 i = 0; i < primitives.Size(); ++i) + if (value == primitives[i].GetName()) + return true; + + return false; +} + +bool IsKeyword(const ehs::Array& keywords, const ehs::Str_8& value) +{ + for (ehs::UInt_64 i = 0; i < keywords.Size(); ++i) + if (value == keywords[i]) + return true; + + return false; +} + +bool IsEncapsulator(const ehs::Char_8* c) +{ + switch (*c) + { + case '(': + return true; + case '{': + return true; + case '[': + return true; + case ')': + return true; + case '}': + return true; + case ']': + return true; + default: + return false; + } +} + +bool IsEOL(const ehs::Array& eols, const ehs::Char_8* c) +{ + for (ehs::UInt_64 s = 0; s < eols.Size(); ++s) + if (*c == eols[s]) + return true; + + return false; +} + +void ParseNumber(ehs::Vector& tokens, const ehs::Array &eols, ehs::Char_8 **start, ehs::Char_8 **i) +{ + if (const bool isEOL = IsEOL(eols, *i), isSpace = **i == ' ', isEnd = *i != *start && !IsNumber(*i); isEOL || isSpace || isEnd) + { + tokens.Push({TokenT::NUMBER, ehs::Str_8(*start, *i - *start)}); + + if (isEOL) + tokens.Push({TokenT::EOL, {*i, 1}}); + else if (!isSpace && isEnd) + { + *start = *i; + + return; + } + + *start = *i + 1; + } + + ++*i; +} + +void ParseOperator(ehs::Vector& tokens, const ehs::Array &eols, ehs::Char_8 **start, ehs::Char_8 **i) +{ + if (const bool isEOL = IsEOL(eols, *i), isSpace = **i == ' ', isEnd = *i != *start && !IsOperator(*i); isEOL || isSpace || isEnd) + { + if (*i - *start > 1) + tokens.Push({TokenT::COMPOUND_OPERATOR, ehs::Str_8(*start, *i - *start)}); + else + tokens.Push({TokenT::UNARY_OPERATOR, ehs::Str_8(*start, *i - *start)}); + + if (isEOL) + tokens.Push({TokenT::EOL, {*i, 1}}); + else if (!isSpace && isEnd) + { + *start = *i; + + return; + } + + *start = *i + 1; + } + + ++*i; +} + +void ParseText(ehs::Vector& tokens, const ehs::Array& eols, + const ehs::Array &primitives, const ehs::Array &keywords, ehs::Char_8** start, + ehs::Char_8** i) +{ + if (const bool isEOL = IsEOL(eols, *i), isSpace = **i == ' ', isEnd = *i != *start && !IsAlphaNumeric(*i); isEOL || isSpace || isEnd) + { + ehs::Str_8 text(*start, *i - *start); + + if (IsPrimitive(primitives, text)) + tokens.Push({TokenT::TYPE, text}); + else if (IsKeyword(keywords, text)) + tokens.Push({TokenT::KEYWORD, text}); + else if (text == "true" || text == "false") + tokens.Push({TokenT::BOOLEAN, text}); + else + { + if (tokens[tokens.End()].GetType() == TokenT::IDENTIFIER) + tokens[tokens.End()].SetType(TokenT::TYPE); + + tokens.Push({TokenT::IDENTIFIER, text}); + } + + if (isEOL) + tokens.Push({TokenT::EOL, {*i, 1}}); + else if (!isSpace && isEnd) + { + *start = *i; + + return; + } + + *start = *i + 1; + } + + ++*i; +} + +ehs::Vector CompileIntoTokens(Compiler *compiler, const Language *lang, const ehs::Str_8 &code) +{ + ehs::Vector tokens; + + const ehs::Array eols = lang->GetEOLs(); + const ehs::Array primitives = lang->GetPrimitives(); + const ehs::Array keywords = lang->GetKeywords(); + const ehs::Array operators = lang->GetOperators(); + + for (ehs::Char_8 *i = &code[0], *start = i; i < &code[code.Size()];) + { + if (*start == '\"') + { + if (start != i && *i == '\"') + { + ++start; + + tokens.Push({TokenT::STRING, ehs::Str_8(start, i - start)}); + + if (i + 1 < &code[code.Size()]) + start = i + 1; + } + + ++i; + } + else if (*start == '\'') + { + if (start != i && *i == '\'') + { + ++start; + + const ehs::UInt_64 size = i - 1 - start; + if (size > 1) + { + EHS_LOG(ehs::LogType::ERR, 0, "Characters cannot extend to more than one."); + + return {}; + } + + tokens.Push({TokenT::CHARACTER, {start, size}}); + + if (i + 1 < &code[code.Size()]) + start = i + 1; + } + + ++i; + } + else if (IsNumber(start)) + ParseNumber(tokens, eols, &start, &i); + else if (IsEncapsulator(start)) + { + tokens.Push({TokenT::ENCAPSULATOR, {i++, 1}}); + start = i; + } + else if (IsOperator(start)) + ParseOperator(tokens, eols, &start, &i); + else if (IsAlphabet(start)) + ParseText(tokens, eols, primitives, keywords, &start, &i); + else if (*start == ',') + { + tokens.Push({TokenT::SEPARATOR, {i++, 1}}); + start = i; + } + else if (IsEOL(eols, start)) + { + if (tokens[tokens.End()].GetType() != TokenT::EOL) + tokens.Push({TokenT::EOL, {i, 1}}); + + start = ++i; + } + else + start = ++i; + } + + EHS_LOG_SUCCESS(); + + return tokens; +} + +ehs::Vector> SplitLines(const ehs::Vector& tokens) +{ + ehs::Vector> result; + + for (ehs::UInt_64 i = 0, start = 0; i < tokens.Size(); ++i) + { + if (tokens[i].GetType() != TokenT::EOL) + continue; + + result.Push(ehs::Array(&tokens[start], i - start)); + + start = i + 1; + } + + return result; +} + +bool IsFunction(const ehs::Array& line) +{ + for (ehs::UInt_64 i = 0; i < line.Size(); ++i) + if ((line[i].GetType() == TokenT::SEPARATOR && line[i].GetValue() == ",") || + (i + 1 < line.Size() && line[i].GetType() == TokenT::TYPE && line[i + 1].GetType() == TokenT::IDENTIFIER) || + (i + 2 < line.Size() && line[i].GetType() == TokenT::TYPE && line[i + 1].GetType() == TokenT::UNARY_OPERATOR) && line[i + 2].GetType() == TokenT::IDENTIFIER) + return true; + + return false; +} + +ehs::Vector CompileIntoMachineCode(Compiler *compiler, const Language *lang, const ehs::Vector &tokens) +{ + ehs::Vector stacks(1, 5); + + const ehs::Vector> lines = SplitLines(tokens); + for (ehs::UInt_64 i = 0; i < lines.Size(); ++i) + { + const ehs::Array line = lines[i]; + + if (line[0].GetType() == TokenT::TYPE && line[1].GetType() == TokenT::IDENTIFIER) + { + if (line[2].GetType() == TokenT::ENCAPSULATOR && IsFunction({&line[3], line.Size() - 3})) + compiler->AddSymbol({SymbolType::FUNCTION, line[1].GetValue(), 0, 0}); + else + compiler->AddSymbol({SymbolType::VARIABLE, line[1].GetValue(), 0, 0}); + } + } + + return {}; +} \ No newline at end of file diff --git a/src/ArctyxLang.cpp b/src/ArctyxLang.cpp index 48d58f6..b8d9831 100644 --- a/src/ArctyxLang.cpp +++ b/src/ArctyxLang.cpp @@ -4,6 +4,11 @@ #include "arctyx/Arctyx.h" #include "arctyx/compiler/Architecture.h" #include "arctyx/compiler/Language.h" +#include "arctyx/compiler/Stack.h" +#include "arctyx/compiler/Symbol.h" +#include "arctyx/compiler/Compiler.h" + +#include "arctyxLang/ArctyxCompiler.h" ehs::Version GetPluginVersion(); @@ -15,11 +20,10 @@ bool InitializePlugin() Language arctyx("Arctyx", GetPluginVersion()); - arctyx.AddSeparator(','); - arctyx.AddEOL('\n'); arctyx.AddEOL(';'); + arctyx.AddPrimitive({"Bool", 1, Signedness::UNSIGNED}); arctyx.AddPrimitive({"Byte", 1, Signedness::UNSIGNED}); arctyx.AddPrimitive({"Char_8", 1, Signedness::UNSIGNED}); arctyx.AddPrimitive({"Char_16", 2, Signedness::UNSIGNED}); @@ -32,8 +36,43 @@ bool InitializePlugin() arctyx.AddPrimitive({"SInt_16", 2, Signedness::SIGNED}); arctyx.AddPrimitive({"SInt_32", 4, Signedness::SIGNED}); arctyx.AddPrimitive({"SInt_64", 8, Signedness::SIGNED}); + arctyx.AddPrimitive({"Void", 0, Signedness::UNSIGNED}); arctyx.AddOperator({"=", "Assign"}); + arctyx.AddOperator({"+", "Add"}); + arctyx.AddOperator({"-", "Subtract"}); + arctyx.AddOperator({"++", "Increment"}); + arctyx.AddOperator({"--", "Decrement"}); + arctyx.AddOperator({"*", "Multiply"}); + arctyx.AddOperator({"/", "Divide"}); + arctyx.AddOperator({"%", "Mod"}); + arctyx.AddOperator({"<", "Less Than"}); + arctyx.AddOperator({">", "Greater Than"}); + arctyx.AddOperator({"<=", "Less Than or Equal"}); + arctyx.AddOperator({">=", "Greater Than or Equal"}); + + arctyx.AddKeyword("If"); + arctyx.AddKeyword("Elif"); + arctyx.AddKeyword("Else"); + arctyx.AddKeyword("While"); + arctyx.AddKeyword("For"); + arctyx.AddKeyword("Return"); + arctyx.AddKeyword("Break"); + arctyx.AddKeyword("Continue"); + arctyx.AddKeyword("Const"); + arctyx.AddKeyword("Class"); + arctyx.AddKeyword("Prot"); + arctyx.AddKeyword("Priv"); + arctyx.AddKeyword("Pub"); + arctyx.AddKeyword("Static"); + arctyx.AddKeyword("New"); + arctyx.AddKeyword("Del"); + arctyx.AddKeyword("DelArr"); + arctyx.AddKeyword("Enum"); + + arctyx.SetCompileIntoTokensCb(CompileIntoTokens); + + arctyx.SetCompileIntoMachineCodeCb(CompileIntoMachineCode); Language::Add((Language &&)arctyx); diff --git a/src/compiler/Compiler.cpp b/src/compiler/Compiler.cpp index d5dcfda..970948f 100644 --- a/src/compiler/Compiler.cpp +++ b/src/compiler/Compiler.cpp @@ -1,6 +1,5 @@ #include "arctyx/compiler/Compiler.h" -#include #include Compiler::Compiler() @@ -8,7 +7,7 @@ Compiler::Compiler() { } -Compiler::Compiler(const ehs::Str_8& arch, const ehs::Str_8& lang) +Compiler::Compiler(const ehs::Str_8& arch, const ehs::Str_8& lang, ehs::Str_8 entryPoint) { architecture = Architecture::Get(arch); if (!architecture) @@ -24,18 +23,24 @@ Compiler::Compiler(const ehs::Str_8& arch, const ehs::Str_8& lang) return; } + entryPointId = entryPoint.Hash_64(); + entryPointName = (ehs::Str_8 &&)entryPoint; + EHS_LOG_SUCCESS(); } Compiler::Compiler(Compiler&& other) noexcept - : architecture(other.architecture), language(other.language), symbols((ehs::Array &&)other.symbols) + : architecture(other.architecture), language(other.language), entryPointId(other.entryPointId), + entryPointName((ehs::Str_8 &&)other.entryPointName), symbols((ehs::Array &&)other.symbols) { other.architecture = nullptr; other.language = nullptr; + other.entryPointId = 0; } Compiler::Compiler(const Compiler& other) - : architecture(other.architecture), language(other.language), symbols(other.symbols) + : architecture(other.architecture), language(other.language), entryPointId(other.entryPointId), + entryPointName(other.entryPointName), symbols(other.symbols) { } @@ -46,10 +51,13 @@ Compiler& Compiler::operator=(Compiler&& other) noexcept architecture = other.architecture; language = other.language; + entryPointId = other.entryPointId; + entryPointName = (ehs::Str_8 &&)other.entryPointName; symbols = (ehs::Array &&)other.symbols; other.architecture = nullptr; other.language = nullptr; + other.entryPointId = 0; return *this; } @@ -61,24 +69,66 @@ Compiler& Compiler::operator=(const Compiler& other) architecture = other.architecture; language = other.language; + entryPointId = other.entryPointId; + entryPointName = other.entryPointName; symbols = other.symbols; return *this; } -ehs::Array Compiler::Compile(const ehs::Str_8 &code) const +bool Compiler::HasSymbol(const ehs::UInt_64 &id) const +{ + for (ehs::UInt_64 i = 0; i < symbols.Size(); ++i) + if (symbols[i].GetId() == id) + return true; + + return false; +} + +bool Compiler::HasSymbol(const ehs::Str_8 &name) const +{ + return HasSymbol(name.Hash_64()); +} + +bool Compiler::AddSymbol(Symbol symbol) +{ + if (HasSymbol(symbol.GetId())) + return false; + + symbols.Push((Symbol &&)symbol); + + return true; +} + +Symbol *Compiler::GetSymbol(const ehs::UInt_64 &id) const +{ + for (ehs::UInt_64 i = 0; i < symbols.Size(); ++i) + if (symbols[i].GetId() == id) + return &symbols[i]; + + return nullptr; +} + +Symbol *Compiler::GetSymbol(const ehs::Str_8 &name) const +{ + return GetSymbol(name.Hash_64()); +} + +ehs::Vector Compiler::Compile(const ehs::Str_8 &code) { ehs::Console::Write_8("Code:"); ehs::Console::Write_8(code); ehs::Console::Write_8("Tokens:"); - ehs::Vector tokens = Parse(code); + ehs::Vector tokens = language->CompileIntoTokens(this, code); for (ehs::UInt_64 i = 0; i < tokens.Size(); ++i) { if (tokens[i].GetType() == TokenT::UNKNOWN) ehs::Console::Write_8("UNKNOWN, ", false); else if (tokens[i].GetType() == TokenT::SEPARATOR) ehs::Console::Write_8("SEPARATOR, ", false); + else if (tokens[i].GetType() == TokenT::BOOLEAN) + ehs::Console::Write_8("BOOLEAN, ", false); else if (tokens[i].GetType() == TokenT::NUMBER) ehs::Console::Write_8("NUMBER, ", false); else if (tokens[i].GetType() == TokenT::STRING) @@ -95,144 +145,36 @@ ehs::Array Compiler::Compile(const ehs::Str_8 &code) const ehs::Console::Write_8("UNARY_OPERATOR, ", false); else if (tokens[i].GetType() == TokenT::COMPOUND_OPERATOR) ehs::Console::Write_8("COMPOUND_OPERATOR, ", false); + else if (tokens[i].GetType() == TokenT::ENCAPSULATOR) + ehs::Console::Write_8("ENCAPSULATOR, ", false); else if (tokens[i].GetType() == TokenT::EOL) ehs::Console::Write_8("EOL"); } - ehs::Array machineCode; + ehs::Vector machineCode = language->CompileIntoMachineCode(this, tokens); - return machineCode; -} - -bool Compiler::IsEOL(const ehs::Array& eols, const ehs::Char_8* c) -{ - for (ehs::UInt_64 s = 0; s < eols.Size(); ++s) - if (*c == eols[s]) - return true; - - return false; -} - -bool Compiler::IsSeparator(const ehs::Array &separators, const ehs::Char_8 *c) -{ - for (ehs::UInt_64 s = 0; s < separators.Size(); ++s) - if (*c == separators[s]) - return true; - - return false; -} - -bool Compiler::IsPrimitive(const ehs::Array& primitives, const ehs::Str_8& value) -{ - for (ehs::UInt_64 i = 0; i < primitives.Size(); ++i) - if (value == primitives[i].GetName()) - return true; - - return false; -} - -bool Compiler::IsKeyword(const ehs::Array& keywords, const ehs::Str_8& value) -{ - for (ehs::UInt_64 i = 0; i < keywords.Size(); ++i) - if (value == keywords[i]) - return true; - - return false; -} - -const Operator *Compiler::IsOperator(const ehs::Array& operators, const ehs::Str_8& value) -{ - for (ehs::UInt_64 i = 0; i < operators.Size(); ++i) - if (value == operators[i].GetDelimeter()) - return &operators[i]; - - return nullptr; -} - -ehs::Vector Compiler::Parse(const ehs::Str_8 &code) const -{ - ehs::Vector tokens; - - const ehs::Array eols = language->GetEOLs(); - const ehs::Array separators = language->GetSeparators(); - const ehs::Array primitives = language->GetPrimitives(); - const ehs::Array keywords = language->GetKeywords(); - const ehs::Array operators = language->GetOperators(); - - for (ehs::Char_8 *i = &code[0], *start = i; i < &code[code.Size()]; ++i) + ehs::Console::Write_8("\nSymbols:"); + for (ehs::UInt_64 i = 0; i < symbols.Size(); ++i) { - if (*start == '\"') - { - if (*i == '\"') - { - ++start; + if (symbols[i].GetType() == SymbolType::VARIABLE) + ehs::Console::Write_8("Variable ", false); + else if (symbols[i].GetType() == SymbolType::FUNCTION) + ehs::Console::Write_8("Function ", false); + else if (symbols[i].GetType() == SymbolType::CLASS) + ehs::Console::Write_8("Class ", false); + else if (symbols[i].GetType() == SymbolType::MEMBER) + ehs::Console::Write_8("Member ", false); + else if (symbols[i].GetType() == SymbolType::METHOD) + ehs::Console::Write_8("Method ", false); + else + ehs::Console::Write_8("Unknown ", false); - tokens.Push({TokenT::STRING, ehs::Str_8(start, i - 1 - start)}); - - if (i + 1 < &code[code.Size()]) - start = i + 1; - } - } - else if (*start == '\'') - { - if (*i == '\'') - { - ++start; - - const ehs::UInt_64 size = i - 1 - start; - if (size > 1) - { - EHS_LOG(ehs::LogType::ERR, 0, "Characters cannot extend to more than one."); - - return {}; - } - - tokens.Push({TokenT::CHARACTER, {start, size}}); - - if (i + 1 < &code[code.Size()]) - start = i + 1; - } - } - else if (const bool isEOL = IsEOL(eols, i), isSep = IsSeparator(separators, i); *i == ' ' || *i == '\t' || isEOL || isSep) - { - if ((*start == ' ' || *start == '\t') && i + 1 < &code[code.Size()]) - { - start = i + 1; - - continue; - } - - const ehs::Str_8 value(start, i - start); - - if (value.Size()) - { - if (IsPrimitive(primitives, value)) - tokens.Push({TokenT::TYPE, value}); - else if (IsKeyword(keywords, value)) - tokens.Push({TokenT::KEYWORD, value}); - else if (const Operator *op = IsOperator(operators, value); op) - { - if (op->IsUnary()) - tokens.Push({TokenT::UNARY_OPERATOR, value}); - else - tokens.Push({TokenT::COMPOUND_OPERATOR, value}); - } - else if (value[0] >= '0' && value[0] <= '9') - tokens.Push({TokenT::NUMBER, value}); - else - tokens.Push({TokenT::IDENTIFIER, value}); - } - - if (isEOL) - tokens.Push({TokenT::EOL, {i, 1}}); - else if (isSep) - tokens.Push({TokenT::SEPARATOR, {i, 1}}); - - start = i + 1; - } + ehs::Console::Write_8(symbols[i].GetName()); } - EHS_LOG_SUCCESS(); + Symbol *entryPoint = GetSymbol(entryPointId); + if (!entryPoint || entryPoint->GetType() != SymbolType::FUNCTION) + ehs::Console::Write_8("\nCompiler Error: Function symbol, \"" + entryPointName + "\", for the entry point was not found!"); - return tokens; -} + return machineCode; +} \ No newline at end of file diff --git a/src/compiler/Language.cpp b/src/compiler/Language.cpp index 9603ca2..7b477a7 100644 --- a/src/compiler/Language.cpp +++ b/src/compiler/Language.cpp @@ -3,29 +3,32 @@ ehs::Array Language::languages; Language::Language() - : id(0) + : id(0), compileIntoTokensCb(nullptr), compileIntoMachineCodeCb(nullptr) { } Language::Language(ehs::Str_8 name, const ehs::Version& version) - : id(name.Hash_64()), name((ehs::Str_8 &&)name), version(version) + : id(name.Hash_64()), name((ehs::Str_8 &&)name), version(version), compileIntoTokensCb(nullptr), + compileIntoMachineCodeCb(nullptr) { } Language::Language(Language&& lang) noexcept : id(lang.id), name((ehs::Str_8 &&)lang.name), version(lang.version), eols((ehs::Array &&)lang.eols), - separators((ehs::Array &&)lang.separators), primitives((ehs::Array &&)lang.primitives), - keywords((ehs::Array &&)lang.keywords), operators((ehs::Array &&)lang.operators), - interpretations((ehs::Array &&)lang.interpretations) + primitives((ehs::Array &&)lang.primitives), keywords((ehs::Array &&)lang.keywords), + operators((ehs::Array &&)lang.operators), interpretations((ehs::Array &&)lang.interpretations), + compileIntoTokensCb(lang.compileIntoTokensCb), compileIntoMachineCodeCb(lang.compileIntoMachineCodeCb) { lang.id = 0; lang.version = {}; + lang.compileIntoTokensCb = nullptr; + lang.compileIntoMachineCodeCb = nullptr; } Language::Language(const Language& lang) - : id(lang.id), name(lang.name), version(lang.version), eols(lang.eols), separators(lang.separators), - primitives(lang.primitives), keywords(lang.keywords), operators(lang.operators), - interpretations(lang.interpretations) + : id(lang.id), name(lang.name), version(lang.version), eols(lang.eols), primitives(lang.primitives), + keywords(lang.keywords), operators(lang.operators), interpretations(lang.interpretations), + compileIntoTokensCb(lang.compileIntoTokensCb), compileIntoMachineCodeCb(lang.compileIntoMachineCodeCb) { } @@ -38,14 +41,17 @@ Language& Language::operator=(Language&& lang) noexcept name = (ehs::Str_8 &&)lang.name; version = lang.version; eols = (ehs::Array &&)lang.eols; - separators = (ehs::Array &&)lang.separators; primitives = (ehs::Array &&)lang.primitives; keywords = (ehs::Array &&)lang.keywords; operators = (ehs::Array &&)lang.operators; interpretations = (ehs::Array &&)lang.interpretations; + compileIntoTokensCb = lang.compileIntoTokensCb; + compileIntoMachineCodeCb = lang.compileIntoMachineCodeCb; lang.id = 0; lang.version = {}; + lang.compileIntoTokensCb = nullptr; + lang.compileIntoMachineCodeCb = nullptr; return *this; } @@ -59,11 +65,12 @@ Language& Language::operator=(const Language& lang) name = lang.name; version = lang.version; eols = lang.eols; - separators = lang.separators; primitives = lang.primitives; keywords = lang.keywords; operators = lang.operators; interpretations = lang.interpretations; + compileIntoTokensCb = lang.compileIntoTokensCb; + compileIntoMachineCodeCb = lang.compileIntoMachineCodeCb; return *this; } @@ -107,30 +114,6 @@ bool Language::AddEOL(const ehs::Char_8 &eol) return true; } -ehs::Array Language::GetSeparators() const -{ - return separators; -} - -bool Language::HasSeparator(const ehs::Char_8& separator) const -{ - for (ehs::Size i = 0; i < separators.Size(); ++i) - if (separators[i] == separator) - return true; - - return false; -} - -bool Language::AddSeparator(const ehs::Char_8& separator) -{ - if (HasSeparator(separator)) - return false; - - separators.Push(separator); - - return true; -} - ehs::Array Language::GetPrimitives() const { return primitives; @@ -264,6 +247,44 @@ bool Language::AddInterpretation(Interpretation interpretation) return true; } +void Language::SetCompileIntoTokensCb(const CompileIntoTokensCb cb) +{ + compileIntoTokensCb = cb; +} + +ehs::Vector Language::CompileIntoTokens(Compiler *compiler, const ehs::Str_8& code) const +{ + if (compileIntoTokensCb) + { + EHS_LOG_SUCCESS(); + + return compileIntoTokensCb(compiler, this, code); + } + + EHS_LOG(ehs::LogType::ERR, 0, "The " + name + " language is incomplete and does not implement a " + name + " to token compiler."); + + return {}; +} + +void Language::SetCompileIntoMachineCodeCb(const CompileIntoMachineCodeCb cb) +{ + compileIntoMachineCodeCb = cb; +} + +ehs::Vector Language::CompileIntoMachineCode(Compiler *compiler, const ehs::Vector& tokens) const +{ + if (compileIntoTokensCb) + { + EHS_LOG_SUCCESS(); + + return compileIntoMachineCodeCb(compiler, this, tokens); + } + + EHS_LOG(ehs::LogType::ERR, 0, "The " + name + " language is incomplete and does not implement a token to machine code compiler."); + + return {}; +} + bool Language::Has(const ehs::UInt_64& id) { for (ehs::UInt_64 i = 0; i < languages.Size(); ++i) @@ -300,4 +321,4 @@ bool Language::Add(Language lang) languages.Push(new Language((Language &&)lang)); return true; -} +} \ No newline at end of file diff --git a/src/compiler/Stack.cpp b/src/compiler/Stack.cpp new file mode 100644 index 0000000..ef59123 --- /dev/null +++ b/src/compiler/Stack.cpp @@ -0,0 +1,49 @@ +#include "arctyx/compiler/Stack.h" + +Stack::Stack() +{ +} + +Stack::Stack(const ehs::UInt_64& offset) +{ +} + +Stack::Stack(Stack&& other) noexcept +{ +} + +Stack::Stack(const Stack& other) +{ +} + +Stack& Stack::operator=(Stack&& other) noexcept +{ +} + +Stack& Stack::operator=(const Stack& other) +{ +} + +ehs::UInt_64 Stack::GetOffset() const +{ +} + +bool Stack::HasItem(const ehs::UInt_64& id) const +{ +} + +bool Stack::HasItem(const ehs::Str_8& name) const +{ +} + +bool Stack::AddItem(StackItem item) +{ +} + +StackItem* Stack::GetItem(const ehs::UInt_64& id) const +{ +} + +StackItem* Stack::GetItem(const ehs::Str_8& name) const +{ +} diff --git a/src/compiler/StackItem.cpp b/src/compiler/StackItem.cpp new file mode 100644 index 0000000..36a218f --- /dev/null +++ b/src/compiler/StackItem.cpp @@ -0,0 +1,74 @@ +#include "arctyx/compiler/StackItem.h" + +StackItem::StackItem() + : id(0), address(0), size(0) +{ +} + +StackItem::StackItem(ehs::Str_8 name, const ehs::UInt_64& address, const ehs::UInt_64 &size) + : id(name.Hash_64()), name((ehs::Str_8 &&)name), address(address), size(size) +{ +} + +StackItem::StackItem(StackItem&& other) noexcept + : id(other.id), name((ehs::Str_8 &&)other.name), address(other.address), size(other.size) +{ + other.id = 0; + other.address = 0; + other.size = 0; +} + +StackItem::StackItem(const StackItem &other) + : id(other.id), name(other.name), address(other.address), size(other.size) +{ +} + +StackItem &StackItem::operator=(StackItem &&other) noexcept +{ + if (this == &other) + return *this; + + name = (ehs::Str_8 &&)other.name; + id = other.id; + address = other.address; + size = other.size; + + other.id = 0; + other.address = 0; + other.size = 0; + + return *this; +} + +StackItem &StackItem::operator=(const StackItem& other) +{ + if (this == &other) + return *this; + + name = other.name; + id = other.id; + address = other.address; + size = other.size; + + return *this; +} + +ehs::Str_8 StackItem::GetName() const +{ + return name; +} + +ehs::UInt_64 StackItem::GetId() const +{ + return id; +} + +ehs::UInt_64 StackItem::GetAddress() const +{ + return address; +} + +ehs::UInt_64 StackItem::GetSize() const +{ + return size; +} \ No newline at end of file diff --git a/src/compiler/StackParam.cpp b/src/compiler/StackParam.cpp new file mode 100644 index 0000000..f5982d3 --- /dev/null +++ b/src/compiler/StackParam.cpp @@ -0,0 +1 @@ +#include "arctyx/compiler/StackParam.h" \ No newline at end of file diff --git a/src/compiler/Symbol.cpp b/src/compiler/Symbol.cpp index 46bd314..cac2c1c 100644 --- a/src/compiler/Symbol.cpp +++ b/src/compiler/Symbol.cpp @@ -1,22 +1,26 @@ #include "arctyx/compiler/Symbol.h" Symbol::Symbol() - : type(SymbolType::UNKNOWN), id(0), address(0) + : type(SymbolType::UNKNOWN), id(0), address(0), size(0) { } -Symbol::Symbol(const SymbolType& type, ehs::Str_8 name, const ehs::UInt_64& address) - : type(type), id(name.Hash_64()), name((ehs::Str_8 &&)name), address(address) +Symbol::Symbol(const SymbolType& type, ehs::Str_8 name, const ehs::UInt_64& address, const ehs::UInt_64 &size) + : type(type), id(name.Hash_64()), name((ehs::Str_8 &&)name), address(address), size(size) { } Symbol::Symbol(Symbol&& other) noexcept - : type(other.type), id(other.id), name((ehs::Str_8 &&)other.name), address(other.address) + : type(other.type), id(other.id), name((ehs::Str_8 &&)other.name), address(other.address), size(other.size) { + other.type = SymbolType::UNKNOWN; + other.id = 0; + other.address = 0; + other.size = 0; } Symbol::Symbol(const Symbol &other) - : type(other.type), id(other.id), name(other.name), address(other.address) + : type(other.type), id(other.id), name(other.name), address(other.address), size(other.size) { } @@ -29,10 +33,12 @@ Symbol &Symbol::operator=(Symbol &&other) noexcept name = (ehs::Str_8 &&)other.name; id = other.id; address = other.address; + size = other.size; other.type = SymbolType::UNKNOWN; other.id = 0; other.address = 0; + other.size = 0; return *this; } @@ -46,6 +52,7 @@ Symbol &Symbol::operator=(const Symbol& other) name = other.name; id = other.id; address = other.address; + size = other.size; return *this; } @@ -69,3 +76,8 @@ ehs::UInt_64 Symbol::GetAddress() const { return address; } + +ehs::UInt_64 Symbol::GetSize() const +{ + return size; +} diff --git a/src/compiler/Token.cpp b/src/compiler/Token.cpp index 374322c..2b8b717 100644 --- a/src/compiler/Token.cpp +++ b/src/compiler/Token.cpp @@ -45,6 +45,11 @@ Token & Token::operator=(const Token &token) return *this; } +void Token::SetType(const TokenT& newType) +{ + type = newType; +} + TokenT Token::GetType() const { return type; diff --git a/src/main.cpp b/src/main.cpp index 35e8d85..a2328d7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "arctyx/Arctyx.h" #include "arctyx/compiler/Compiler.h" @@ -19,7 +20,7 @@ int main() ehs::Str_8 code = file.ReadStr_8(file.Size()); file.Release(); - Compiler arctyxCompiler("x64", "Arctyx"); + Compiler arctyxCompiler("x64", "Arctyx", "test"); arctyxCompiler.Compile(code); /*