diff --git a/CMakeLists.txt b/CMakeLists.txt index 38b5045..129b1e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(externals/glslang EXCLUDE_FROM_ALL) include_directories( externals/glslang/ externals/glslang/glslang/Public + externals/glslang/glslang/MachineIndependent externals/glslang/StandAlone ) diff --git a/README.md b/README.md index c045c4f..c6c81f1 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ Currently this LSP implementation can be interfaced with using either HTTP or st ### Current Features - Diagnostics - -### Planned Features - - Completion - Hover - Jump to def + +### Planned Features + - Workspace symbols - Find references diff --git a/src/includer.cpp b/src/includer.cpp new file mode 100644 index 0000000..253b7d5 --- /dev/null +++ b/src/includer.cpp @@ -0,0 +1,44 @@ +#include "includer.hpp" + +#include +#include "utils.hpp" + +namespace fs = std::filesystem; + +using IncludeResult = FileIncluder::IncludeResult; + +void FileIncluder::releaseInclude(IncludeResult* result) { + delete result; +} + +IncludeResult* FileIncluder::includeLocal( + const char* header_name, + const char* includer_name, + size_t depth) +{ + auto suffix = strip_prefix("file://", includer_name); + if (!suffix) return nullptr; + + fs::path path = suffix; + path.replace_filename(header_name); + path = fs::absolute(path); + + std::string uri = "file://"; + uri += path; + + auto& documents = this->workspace->documents(); + + auto existing = documents.find(uri); + if (existing == documents.end()) { + // load the file + if (auto contents = read_file_to_string(path.c_str())) { + documents[uri] = *contents; + existing = documents.find(uri); + } else { + return nullptr; + } + } + + const std::string& contents = existing->second; + return new IncludeResult{uri, contents.c_str(), contents.size(), nullptr}; +} diff --git a/src/includer.hpp b/src/includer.hpp new file mode 100644 index 0000000..5521eb6 --- /dev/null +++ b/src/includer.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "ShaderLang.h" +#include "workspace.hpp" + +class FileIncluder : public glslang::TShader::Includer { + Workspace* workspace; + +public: + FileIncluder(Workspace* workspace) : workspace(workspace) {} + + virtual void releaseInclude(IncludeResult*) override; + + virtual IncludeResult* includeLocal( + const char* header_name, + const char* includer_name, + size_t depth) override; +}; + diff --git a/src/main.cpp b/src/main.cpp index a3364ec..f5d8fa9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,29 +9,46 @@ #include "ResourceLimits.h" #include "ShaderLang.h" +#include "Initialize.h" #include -#include +#include #include #include #include #include #include +#include #include "messagebuffer.hpp" #include "workspace.hpp" #include "utils.hpp" +#include "symbols.hpp" +#include "includer.hpp" using json = nlohmann::json; -namespace fs = std::experimental::filesystem; +namespace fs = std::filesystem; + +/// By default we target the most recent graphics APIs to be maximally permissive. +struct TargetVersions { + // The target API (eg, Vulkan, OpenGL). + glslang::EShClient client_api = glslang::EShClientVulkan; + glslang::EShTargetClientVersion client_api_version = glslang::EShTargetVulkan_1_3; + + // The target SPIR-V version + glslang::EShTargetLanguageVersion spv_version = glslang::EShTargetSpv_1_6; +}; struct AppState { Workspace workspace; bool verbose; bool use_logfile; std::ofstream logfile_stream; + TargetVersions target; }; +std::ofstream* tmp_log; + std::string make_response(const json& response) { json content = response; @@ -68,23 +85,31 @@ json get_diagnostics(std::string uri, std::string content, FILE fp_old = *stdout; *stdout = *fopen("/dev/null","w"); auto document = uri; - auto shader_cstring = content.c_str(); auto lang = find_language(document); - glslang::InitializeProcess(); + glslang::TShader shader(lang); - shader.setStrings(&shader_cstring, 1); + + auto target = appstate.target; + shader.setEnvClient(target.client_api, target.client_api_version); + shader.setEnvTarget(glslang::EShTargetSpv, target.spv_version); + + auto shader_cstring = content.c_str(); + auto shader_name = document.c_str(); + shader.setStringsWithLengthsAndNames(&shader_cstring, nullptr, &shader_name, 1); + + FileIncluder includer{&appstate.workspace}; + TBuiltInResource Resources = *GetDefaultResources(); EShMessages messages = EShMsgCascadingErrors; - shader.parse(&Resources, 110, false, messages); + shader.parse(&Resources, 110, false, messages, includer); std::string debug_log = shader.getInfoLog(); - glslang::FinalizeProcess(); *stdout = fp_old; if (appstate.use_logfile && appstate.verbose) { fmt::print(appstate.logfile_stream, "Diagnostics raw output: {}\n" , debug_log); } - std::regex re("(.*): 0:(\\d*): (.*)"); + std::regex re("([A-Z]*): (.*):(\\d*): (.*)"); std::smatch matches; auto error_lines = split_string(debug_log, "\n"); auto content_lines = split_string(content, "\n"); @@ -92,7 +117,10 @@ json get_diagnostics(std::string uri, std::string content, json diagnostics; for (auto error_line : error_lines) { std::regex_search(error_line, matches, re); - if (matches.size() == 4) { + if (matches.size() == 5) { + std::string file = matches[2]; + if (file != document) continue; // message is for another file + json diagnostic; std::string severity = matches[1]; int severity_no = -1; @@ -106,10 +134,11 @@ json get_diagnostics(std::string uri, std::string content, fmt::print(appstate.logfile_stream, "Error: Unknown severity '{}'\n", severity); } } - std::string message = trim(matches[3], " "); + + std::string message = trim(matches[4], " "); // -1 because lines are 0-indexed as per LSP specification. - int line_no = std::stoi(matches[2]) - 1; + int line_no = std::stoi(matches[3]) - 1; std::string source_line = content_lines[line_no]; int start_char = -1; @@ -150,12 +179,143 @@ json get_diagnostics(std::string uri, std::string content, } } if (appstate.use_logfile && appstate.verbose && !diagnostics.empty()) { - fmt::print(appstate.logfile_stream, "Sending diagnostics: {}\n" , diagnostics); + fmt::print(appstate.logfile_stream, "Sending diagnostics: {}\n" , diagnostics.dump(4)); } appstate.logfile_stream.flush(); return diagnostics; } +SymbolMap get_symbols(const std::string& uri, AppState& appstate){ + auto language = find_language(uri); + + // use the highest known version so that we get as many symbols as possible + int version = 460; + // same thing here: use compatibility profile for more symbols + EProfile profile = ECompatibilityProfile; + + glslang::SpvVersion spv_version{}; + spv_version.spv = appstate.target.spv_version; + spv_version.vulkanRelaxed = true; // be maximally permissive, allowing certain OpenGL in Vulkan + + glslang::TPoolAllocator pool{}; + glslang::SetThreadPoolAllocator(&pool); + pool.push(); + + const TBuiltInResource& resources = *GetDefaultResources(); + glslang::TBuiltIns builtins{}; + builtins.initialize(version, profile, spv_version); + builtins.initialize(resources, version, profile, spv_version, language); + + // TODO: cache builtin symbols between runs. + SymbolMap symbols; + add_builtin_types(symbols); + extract_symbols(builtins.getCommonString().c_str(), symbols); + extract_symbols(builtins.getStageString(language).c_str(), symbols); + + extract_symbols(appstate.workspace.documents()[uri].c_str(), symbols, uri.c_str()); + + glslang::GetThreadPoolAllocator().pop(); + glslang::SetThreadPoolAllocator(nullptr); + + return symbols; +} + +void find_completions(const SymbolMap& symbols, const std::string& prefix, std::vector& out) { + for (auto& entry : symbols) { + auto& name = entry.first; + auto& symbol = entry.second; + out.push_back(json { + { "label", name }, + { "kind", symbol.kind == Symbol::Unknown ? json(nullptr) : json(symbol.kind) }, + { "detail", symbol.details }, + }); + } +} + +json get_completions(const std::string &uri, int line, int character, AppState& appstate) +{ + const std::string& document = appstate.workspace.documents()[uri]; + int offset = find_position_offset(document.c_str(), line, character); + int word_start = get_last_word_start(document.c_str(), offset); + int length = offset - word_start; + + if (length <= 0) { + // no word under the cursor. + return nullptr; + } + + auto name = document.substr(word_start, length); + + std::vector matches; + auto symbols = get_symbols(uri, appstate); + find_completions(symbols, name, matches); + + return matches; +} + +std::optional get_word_under_cursor( + const std::string& uri, + int line, int character, + AppState& appstate) +{ + const std::string& document = appstate.workspace.documents()[uri]; + int offset = find_position_offset(document.c_str(), line, character); + int word_start = get_last_word_start(document.c_str(), offset); + int word_end = get_word_end(document.c_str(), word_start); + int length = word_end - word_start; + + if (length <= 0) { + // no word under the cursor. + return std::nullopt; + } + + return document.substr(word_start, length); +} + +json get_hover_info(const std::string& uri, int line, int character, AppState& appstate) { + auto word = get_word_under_cursor(uri, line, character, appstate); + if (!word) return nullptr; + + auto symbols = get_symbols(uri, appstate); + auto symbol = symbols.find(*word); + if (symbol == symbols.end()) return nullptr; + + return json { + { "contents", { + { "language", "glsl" }, + { "value", symbol->second.details } + } } + }; +} + +json get_definition(const std::string& uri, int line, int character, AppState& appstate) { + auto word = get_word_under_cursor(uri, line, character, appstate); + if (!word) return nullptr; + + auto symbols = get_symbols(uri, appstate); + auto symbol_iter = symbols.find(*word); + if (symbol_iter == symbols.end()) return nullptr; + auto symbol = symbol_iter->second; + if (symbol.location.uri == nullptr) return nullptr; + + const std::string& text = appstate.workspace.documents()[symbol.location.uri]; + auto position = find_source_location(text.c_str(), symbol.location.offset); + int length = word->size(); + + json start { + { "line", position.line }, + { "character", position.character }, + }; + json end { + { "line", position.line }, + { "character", position.character + length }, + }; + return json { + { "uri", symbol.location.uri }, + { "range", { { "start", start }, { "end", end } } }, + }; +} + std::optional handle_message(const MessageBuffer& message_buffer, AppState& appstate) { json body = message_buffer.body(); @@ -200,10 +360,10 @@ std::optional handle_message(const MessageBuffer& message_buffer, A "capabilities", { { "textDocumentSync", text_document_sync }, - { "hoverProvider", false }, + { "hoverProvider", true }, { "completionProvider", completion_provider }, { "signatureHelpProvider", signature_help_provider }, - { "definitionProvider", false }, + { "definitionProvider", true }, { "referencesProvider", false }, { "documentHighlightProvider", false }, { "documentSymbolProvider", false }, @@ -260,8 +420,48 @@ std::optional handle_message(const MessageBuffer& message_buffer, A } } }; return make_response(result_body); + } else if (body["method"] == "textDocument/completion") { + auto uri = body["params"]["textDocument"]["uri"]; + auto position = body["params"]["position"]; + int line = position["line"]; + int character = position["character"]; + + json completions = get_completions(uri, line, character, appstate); + + json result_body{ + { "id", body["id"] }, + { "result", completions } + }; + return make_response(result_body); + } else if (body["method"] == "textDocument/hover") { + auto uri = body["params"]["textDocument"]["uri"]; + auto position = body["params"]["position"]; + int line = position["line"]; + int character = position["character"]; + + json hover = get_hover_info(uri, line, character, appstate); + + json result_body{ + { "id", body["id"] }, + { "result", hover } + }; + return make_response(result_body); + } else if (body["method"] == "textDocument/definition") { + auto uri = body["params"]["textDocument"]["uri"]; + auto position = body["params"]["position"]; + int line = position["line"]; + int character = position["character"]; + + json result = get_definition(uri, line, character, appstate); + + json result_body{ + { "id", body["id"] }, + { "result", result } + }; + return make_response(result_body); } + // If the workspace has not yet been initialized but the client sends a // message that doesn't have method "initialize" then we'll return an error // as per LSP spec. @@ -278,12 +478,22 @@ std::optional handle_message(const MessageBuffer& message_buffer, A // If we don't know the method requested, we end up here. if (body.count("method") == 1) { + // Requests have an ID field, but notifications do not. + bool is_notification = body.find("id") == body.end(); + if (is_notification) { + // We don't have to respond to notifications. So don't error on + // notifications we don't recognize. + // https://microsoft.github.io/language-server-protocol/specifications/specification-3-15/#notificationMessage + return std::nullopt; + } + json error{ { "code", -32601 }, { "message", fmt::format("Method '{}' not supported.", body["method"].get()) }, }; json result_body{ - { "error", error } + { "id", body["id"] }, + { "error", error }, }; return make_response(result_body); } @@ -348,10 +558,26 @@ int main(int argc, char* argv[]) bool verbose = false; uint16_t port = 61313; std::string logfile; + + std::string client_api = "vulkan1.3"; + std::string spirv_version = "spv1.6"; + + std::string symbols_path; + std::string diagnostic_path; + auto stdin_option = app.add_flag("--stdin", use_stdin, "Don't launch an HTTP server and instead accept input on stdin"); app.add_flag("-v,--verbose", verbose, "Enable verbose logging"); app.add_option("-l,--log", logfile, "Log file"); + app.add_option("--debug-symbols", symbols_path, "Print the list of symbols for the given file"); + app.add_option("--debug-diagnostic", diagnostic_path, "Debug diagnostic output for the given file"); app.add_option("-p,--port", port, "Port", true)->excludes(stdin_option); + app.add_option("--target-env", client_api, + "Target client environment.\n" + " [vulkan vulkan1.0 vulkan1.1 vulkan1.2 vulkan1.3 opengl opengl4.5]", true); + app.add_option("--target-spv", spirv_version, + "The SPIR-V version to target.\n" + "Defaults to the highest possible for the target environment.\n" + " [spv1.0 spv1.1 spv1.2 spv1.3 spv1.4 spv1.5 spv1.6]", true); try { app.parse(argc, argv); @@ -364,9 +590,83 @@ int main(int argc, char* argv[]) appstate.use_logfile = !logfile.empty(); if (appstate.use_logfile) { appstate.logfile_stream.open(logfile); + tmp_log = &appstate.logfile_stream; + } + + if (!client_api.empty()) { + if (client_api == "vulkan1.3" || client_api == "vulkan") { + appstate.target.client_api = glslang::EShClientVulkan; + appstate.target.client_api_version = glslang::EShTargetVulkan_1_3; + appstate.target.spv_version = glslang::EShTargetSpv_1_6; + } else if (client_api == "vulkan1.2") { + appstate.target.client_api = glslang::EShClientVulkan; + appstate.target.client_api_version = glslang::EShTargetVulkan_1_2; + appstate.target.spv_version = glslang::EShTargetSpv_1_5; + } else if (client_api == "vulkan1.1") { + appstate.target.client_api = glslang::EShClientVulkan; + appstate.target.client_api_version = glslang::EShTargetVulkan_1_1; + appstate.target.spv_version = glslang::EShTargetSpv_1_3; + } else if (client_api == "vulkan1.0") { + appstate.target.client_api = glslang::EShClientVulkan; + appstate.target.client_api_version = glslang::EShTargetVulkan_1_0; + appstate.target.spv_version = glslang::EShTargetSpv_1_1; + } else if (client_api == "opengl4.5" || client_api == "opengl") { + appstate.target.client_api = glslang::EShClientOpenGL; + appstate.target.client_api_version = glslang::EShTargetOpenGL_450; + appstate.target.spv_version = glslang::EShTargetSpv_1_3; + } else { + fmt::print("unknown client api: {}\n", client_api); + return 1; + } + } + + if (!spirv_version.empty()) { + if (spirv_version == "spv1.6") { + appstate.target.spv_version = glslang::EShTargetSpv_1_6; + } else if (spirv_version == "spv1.5") { + appstate.target.spv_version = glslang::EShTargetSpv_1_5; + } else if (spirv_version == "spv1.4") { + appstate.target.spv_version = glslang::EShTargetSpv_1_4; + } else if (spirv_version == "spv1.3") { + appstate.target.spv_version = glslang::EShTargetSpv_1_3; + } else if (spirv_version == "spv1.2") { + appstate.target.spv_version = glslang::EShTargetSpv_1_2; + } else if (spirv_version == "spv1.1") { + appstate.target.spv_version = glslang::EShTargetSpv_1_1; + } else if (spirv_version == "spv1.0") { + appstate.target.spv_version = glslang::EShTargetSpv_1_0; + } else { + fmt::print("unknown SPIR-V version: {}\n", spirv_version); + return 1; + } } - if (!use_stdin) { + glslang::InitializeProcess(); + + if (!symbols_path.empty()) { + std::string contents = *read_file_to_string(symbols_path.c_str()); + std::string uri = make_path_uri(symbols_path); + appstate.workspace.add_document(uri, contents); + auto symbols = get_symbols(uri, appstate); + for (auto& entry : symbols) { + const auto& name = entry.first; + const auto& symbol = entry.second; + + if (symbol.location.uri) { + const auto& contents = appstate.workspace.documents()[symbol.location.uri]; + auto position = find_source_location(contents.c_str(), symbol.location.offset); + fmt::print("{} : {}:{} : {}\n", name, position.line, position.character, symbol.details); + } else { + fmt::print("{} : @{} : {}\n", name, symbol.location.offset, symbol.details); + } + } + } else if (!diagnostic_path.empty()) { + std::string contents = *read_file_to_string(diagnostic_path.c_str()); + std::string uri = make_path_uri(diagnostic_path); + appstate.workspace.add_document(uri, contents); + auto diagnostics = get_diagnostics(uri, contents, appstate); + fmt::print("diagnostics: {}\n", diagnostics.dump(4)); + } else if (!use_stdin) { struct mg_mgr mgr; struct mg_connection* nc; struct mg_bind_opts bind_opts; @@ -427,5 +727,7 @@ int main(int argc, char* argv[]) appstate.logfile_stream.close(); } + glslang::FinalizeProcess(); + return 0; } diff --git a/src/symbols.cpp b/src/symbols.cpp new file mode 100644 index 0000000..e968e08 --- /dev/null +++ b/src/symbols.cpp @@ -0,0 +1,286 @@ +#include "symbols.hpp" +#include "utils.hpp" + +#include + +void add_builtin_types(SymbolMap& symbols) { + symbols.emplace("bool", Symbol{Symbol::Type, ""}); + symbols.emplace("int", Symbol{Symbol::Type, ""}); + symbols.emplace("uint", Symbol{Symbol::Type, ""}); + symbols.emplace("float", Symbol{Symbol::Type, ""}); + symbols.emplace("double", Symbol{Symbol::Type, ""}); + + std::string vec_buffer = "TvecX"; + for (int i = 2; i <= 4; i++) { + vec_buffer[4] = '0' + i; + + // vec2, vec3, vec4, etc. + symbols.emplace(&vec_buffer[1], Symbol{Symbol::Type, ""}); + // bvecX, ivecX, uvecX, dvecX + vec_buffer[0] = 'b'; + symbols.emplace(vec_buffer, Symbol{Symbol::Type, ""}); + vec_buffer[0] = 'i'; + symbols.emplace(vec_buffer, Symbol{Symbol::Type, ""}); + vec_buffer[0] = 'u'; + symbols.emplace(vec_buffer, Symbol{Symbol::Type, ""}); + vec_buffer[0] = 'd'; + symbols.emplace(vec_buffer, Symbol{Symbol::Type, ""}); + } + + std::string mat_buffer = "dmatXxX"; + for (int col = 2; col <= 4; col++) { + mat_buffer[4] = '0' + col; + + for (int row = 2; row <= 4; row++) { + mat_buffer[6] = '0' + row; + symbols.emplace(&mat_buffer[1], Symbol{Symbol::Type, ""}); + symbols.emplace(mat_buffer, Symbol{Symbol::Type, ""}); + } + + mat_buffer[5] = 0; + symbols.emplace(&mat_buffer[0], Symbol{Symbol::Type, ""}); + symbols.emplace(&mat_buffer[1], Symbol{Symbol::Type, ""}); + mat_buffer[5] = 'x'; + } + + const char* image_kinds[] = { + "1D", + "2D", + "3D", + "Cube", + "2DRect", + "1DArray", + "2DArray", + "CubeArray", + "Buffer", + "2DMS", + "2DMSArray", + }; + + int image_count = sizeof(image_kinds) / sizeof(image_kinds[0]); + for (int i = 0; i < image_count; i++) { + std::string buffer = "gimage"; + buffer += image_kinds[i]; + + symbols.emplace(&buffer[1], Symbol{Symbol::Type, ""}); + buffer[0] = 'i'; + symbols.emplace(buffer, Symbol{Symbol::Type, ""}); + buffer[0] = 'u'; + symbols.emplace(buffer, Symbol{Symbol::Type, ""}); + } + + for (int i = 0; i < image_count; i++) { + std::string buffer = "gsampler"; + buffer += image_kinds[i]; + + symbols.emplace(&buffer[1], Symbol{Symbol::Type, ""}); + buffer[0] = 'i'; + symbols.emplace(buffer, Symbol{Symbol::Type, ""}); + buffer[0] = 'u'; + symbols.emplace(buffer, Symbol{Symbol::Type, ""}); + } + + const char* shadow_samplers[] = { + "sampler1DShadow", + "sampler2DShadow", + "samplerCubeShadow", + "sampler2DRectShadow", + "sampler1DArrayShadow", + "sampler2DArrayShadow", + "samplerCubeArrayShadow", + }; + int shadow_sampler_count = sizeof(shadow_samplers) / sizeof(shadow_samplers[0]); + for (int i = 0; i < shadow_sampler_count; i++) { + symbols.emplace(shadow_samplers[i], Symbol{Symbol::Type, ""}); + } +} + +struct Word { + const char* start = nullptr; + const char* end = nullptr; + + bool is_equal(const char* text) const { + const char* s = start; + while (s != end && *s == *text) { + s++; + text++; + } + return s == end && *text == 0; + } +}; + +bool is_whitespace(char c) { + return c == ' ' || c == '\t' || c == '\n'; +} + +/// Extracts all global symbols from the given string, and inserts them into the symbol map. +/// This will not register symbols within function bodies, as they are context dependent. +/// +/// The current implementation uses naive heuristics and thus may not handle +/// certain cases that well, and also give wrong results. This should be +/// replaced with an actual parser, but is workable for now. +void extract_symbols(const char* text, SymbolMap& symbols, const char* uri) { + std::vector words; + int arguments = 0; + bool had_arguments = false; + Word array{}; + Word inside_block{}; + + const char* p = text; + while (*p) { + if (is_identifier_start_char(*p)) { + const char* start = p; + while (is_identifier_char(*p)) p++; + Word ident{start, p}; + + if (*p == '[') { + const char* array_start = p; + while (*p && *p != ']') p++; + array = Word{array_start, *p == ']' ? p+1 : p}; + } + + // don't confuse `layout(...)` for a function. + if (ident.is_equal("layout")) { + while (is_whitespace(*p)) p++; + if (*p == '(') { + while (*p && *p != ')') p++; + } + continue; + } + + words.push_back(ident); + continue; + } + + // don't confuse numeric literals as identifiers + if ('0' <= *p && *p <= '9') { + p++; + while (is_identifier_char(*p)) p++; + continue; + } + + if (*p == '{') { + // TODO: handle function bodies + + if (words.size() >= 2 && arguments == 0) { + Word kind = words[words.size() - 2]; + if (kind.is_equal("in") + || kind.is_equal("out") + || kind.is_equal("uniform") + || kind.is_equal("buffer")) { + inside_block = words[words.size() - 1]; + words.clear(); + p++; + continue; + } + } + + // skip struct fields and function bodies (their contents are not global) + while (*p && *p != '}') p++; + continue; + } + + if (*p == '}' && inside_block.start) { + words.push_back(inside_block); + inside_block = Word{}; + } + + if (*p == '(') { + had_arguments = true; + p++; + const char* start = nullptr; + const char* end = nullptr; + while (*p) { + if (is_whitespace(*p)) { + p++; + continue; + } + + if (*p == ')' || *p == ',') { + if (start) { + words.push_back({start, p}); + arguments++; + } + + if (*p == ')') break; + + p++; + start = nullptr; + end = nullptr; + continue; + } + + if (!start) start = p; + end = p; + + p++; + } + } + + if (*p == ';' || *p == ')' || *p == '=') { + // end of declaration + int name_index = (int)words.size() - arguments - 1; + int type_index = name_index - 1; + + if (name_index >= 0) { + Word name_word = words[name_index]; + Word type_word = type_index >= 0 ? words[type_index] : Word{}; + + std::string name(name_word.start, name_word.end); + std::string type(type_word.start, type_word.end); + + if (!type.empty()) { + if (symbols.find(type) == symbols.end()) { + symbols.emplace(type, Symbol{Symbol::Type, ""}); + } + } + + if (arguments == 0 && array.start) { + type.append(array.start, array.end); + } + + for (int i = 0; i < arguments; i++) { + if (i == 0) { + type += " ("; + } else { + type += ", "; + } + + Word arg = words[name_index + 1 + i]; + const char* t = arg.start; + while (t != arg.end) { + if (is_whitespace(*t)) { + // only emit a single space + type.push_back(' '); + while (t != arg.end && is_whitespace(*t)) t++; + } else { + type.push_back(*t); + t++; + } + } + + if (i == arguments - 1) { + type += ")"; + } + } + + Symbol::Kind kind = *p == ')' ? Symbol::Function : Symbol::Constant; + int offset = name_word.start - text; + symbols.emplace(name, Symbol{kind, type, {uri, offset}}); + } + + words.clear(); + arguments = 0; + had_arguments = false; + array = Word{}; + + if (*p == '=') { + // if we have a constant assignment, skip over the expression + while (*p && *p != ';') p++; + } + } + + p++; + } +} + diff --git a/src/symbols.hpp b/src/symbols.hpp new file mode 100644 index 0000000..12585cf --- /dev/null +++ b/src/symbols.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +struct Symbol { + enum Kind { + Unknown = 0, + Function = 3, + Type = 7, + Constant = 21, + }; + + Kind kind = Unknown; + std::string details; + + struct Location { + /// Name of the file the symbol is defined in. If `null` this is undefined. + const char* uri = nullptr; + /// If the uri is not `null`, the offset into the file where the symbol is defined. + int offset = -1; + } location; +}; + +typedef std::map SymbolMap; + +/// Add the builtin types to the symbol map. +void add_builtin_types(SymbolMap& symbols); + +/// Extracts symbols from the given file. +void extract_symbols(const char* text, SymbolMap& symbols, const char* uri = nullptr); + diff --git a/src/utils.cpp b/src/utils.cpp index 277f589..a783e29 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,6 +1,10 @@ #include "utils.hpp" #include +#include +#include + +namespace fs = std::filesystem; std::vector split_string(const std::string& string_to_split, const std::string& pattern) { @@ -30,3 +34,105 @@ std::string trim(const std::string& s, const std::string& delimiters = " \f\n\r\ { return trim_left(trim_right(s, delimiters), delimiters); } + +/// Returns the byte offset for the given character on the given line. +// FIXME: use UTF-16 offsets +// https://fasterthanli.me/articles/the-bottom-emoji-breaks-rust-analyzer +int find_position_offset(const char* text, int line, int character) { + int offset = 0; + while (line > 0) { + while (text[offset] && text[offset] != '\n') offset += 1; + offset += text[offset] == '\n'; + line -= 1; + } + + while (character > 0 && text[offset] && text[offset] != '\n') { + offset += 1; + character -= 1; + } + + return offset; +} + +/// Given a byte offset into a file, returns the corresponding line and column. +// FIXME: use UTF-16 offsets +// https://fasterthanli.me/articles/the-bottom-emoji-breaks-rust-analyzer +SourceFileLocation find_source_location(const char* text, int offset) { + SourceFileLocation location{ 0, 0 }; + const char* p = text; + const char* end = text + offset; + while (*p && p < end) { + if (*p == '\n') { + location.line += 1; + location.character = 0; + } else { + location.character += 1; + } + p++; + } + return location; +} + +/// Returns `true` if the character may start an identifier. +bool is_identifier_start_char(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; +} + +/// Returns `true` if the character may be part of an identifier. +bool is_identifier_char(char c) { + return is_identifier_start_char(c) || ('0' <= c && c <= '9'); +} + +/// Returns the offset in `text` where the last word started. +int get_last_word_start(const char* text, int offset) { + int start = offset; + while (start > 0 && is_identifier_char(text[start - 1])) { + start -= 1; + } + + // If `text` was `123abc` and `offset` pointed at `b`, start would point at `1`. + // We want to point to `a`, so advance past any characters that are not a + // valid start of an identifier. + while (start < offset && !is_identifier_start_char(text[start])) { + start += 1; + } + + return start; +} + +int get_word_end(const char* text, int start) { + int end = start; + while (text[end] && is_identifier_char(text[end])) end++; + return end; +} + +std::optional read_file_to_string(const char* path) { + FILE* f = fopen(path, "r"); + if (!f) return std::nullopt; + + fseek(f, 0, SEEK_END); + size_t size = ftell(f); + + std::string contents; + contents.resize(size); + + rewind(f); + size_t actual = fread(&contents[0], sizeof(char), size, f); + contents.resize(actual); + + return contents; +} + + +std::string make_path_uri(const std::string& path) { + return "file://" + std::string(fs::absolute(path)); +} + +const char* strip_prefix(const char* prefix, const char* haystack) { + while (*prefix) { + if (*prefix != *haystack) return nullptr; + prefix++; + haystack++; + } + return haystack; +} diff --git a/src/utils.hpp b/src/utils.hpp index ca04e4c..0c35f15 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -1,3 +1,6 @@ +#pragma once + +#include #include #include @@ -8,3 +11,40 @@ std::string trim_right(const std::string& s, const std::string& delimiters); std::string trim_left(const std::string& s, const std::string& delimiters); std::string trim(const std::string& s, const std::string& delimiters); + +struct SourceFileLocation { + /// Zero indexed line index + int line; + /// Zero indexed character index from the start of the line + int character; +}; + +/// Returns the byte offset for the given character on the given line. +int find_position_offset(const char* text, int line, int character); + +/// Given a byte offset into a file, returns the corresponding line and column. +SourceFileLocation find_source_location(const char* text, int offset); + +/// Returns `true` if the character may start an identifier. +bool is_identifier_start_char(char c); + +/// Returns `true` if the character may be part of an identifier. +bool is_identifier_char(char c); + +/// Returns the offset in `text` where the last word started. +int get_last_word_start(const char* text, int offset); + +/// Given an index inside a word, returns the index of the end of the word (ie. +/// one past the last character) +int get_word_end(const char* text, int start); + +/// Open the file with the given name, and return its contents as a string. +std::optional read_file_to_string(const char* path); + +/// Given a file path, returns its URI +std::string make_path_uri(const std::string& path); + +/// Returns a pointer into `haystack` with the `prefix` removed from the start. +/// If `haystack` does not begin with `prefix`, returns null. +const char* strip_prefix(const char* prefix, const char* haystack); +