From ceef2b3440d381b7f37f1cd6d2f0a7d2a20c1344 Mon Sep 17 00:00:00 2001 From: "chenyingzhuang.cyz" Date: Thu, 30 Nov 2023 15:02:08 +0800 Subject: [PATCH] add python&xml tutorial and fix typos --- .devcontainer/devcontainer.json | 2 +- tutorial/notebook/go_analysis.ipynb | 8 +- tutorial/notebook/python_analysis.ipynb | 214 ++++++++++++++++++ tutorial/notebook/xml_analysis.ipynb | 280 ++++++++++++++++++++++++ 4 files changed, 499 insertions(+), 5 deletions(-) create mode 100644 tutorial/notebook/python_analysis.ipynb create mode 100644 tutorial/notebook/xml_analysis.ipynb diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cf953372..48d0ec20 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -8,4 +8,4 @@ "openFiles": ["tutorial/README.md"] } } - } \ No newline at end of file +} \ No newline at end of file diff --git a/tutorial/notebook/go_analysis.ipynb b/tutorial/notebook/go_analysis.ipynb index ba1078d7..aa03e4ee 100644 --- a/tutorial/notebook/go_analysis.ipynb +++ b/tutorial/notebook/go_analysis.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "这是一个使用CodeFuse-Query分析Go项目的教程。在教程中,你将体验到使用命令行工具对代码仓库进行数据化,然后使用Godel语言来分析这个仓库。" + "这是一个使用 CodeFuse-Query 分析 Go 项目的教程。在教程中,你将体验到使用命令行工具对代码仓库进行数据化,然后使用 Godel 语言来分析这个仓库。" ] }, { @@ -54,7 +54,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "STEP 1: 代码数据化。使用 `sparrow database create` 命令创建一个db文件,指定待分析的仓库地址(当前目录下的gorm子目录),分析的语言(go),以及db文件的存储路径(放置在当前目录下的/db/gorm)。执行该命令之后,竟会生成一份db文件,该文件存储着代码仓库的结构化数据,之后的分析就是针对这份数据进行。" + "STEP 1: 代码数据化。使用 `sparrow database create` 命令创建一个db文件,指定待分析的仓库地址(当前目录下的gorm子目录),分析的语言(go),以及db文件的存储路径(放置在当前目录下的/db/gorm)。执行该命令之后,就会生成一份db文件,该文件存储着代码仓库的结构化数据,之后的分析就是针对这份数据进行。" ] }, { @@ -75,7 +75,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "STEP 2: 使用Godel分析语言分析db文件。在本教程中,可以点击代码左侧的执行按钮,直接运行分析脚本。在命令行中,你可以使用 `sparrow query run` 命令来执行查询脚本,具体可以使用`sparrow query run -h` 来获取详细参数信息。" + "STEP 2: 使用Godel分析语言分析db文件。在本教程中,可以点击代码左侧的执行按钮,或使用快捷键:`Shift+Enter`,直接运行分析脚本。这里使用 `%db /path/to/db` 魔法命令来设置COREF db路径,内核会读取这个值来进行query查询。" ] }, { @@ -150,7 +150,7 @@ "collapsed": false }, "source": [ - "STEP 3: 好了,你可以针对分析生成的结果,进行进一步的代码分析了,比如你可以结合pandas库,使用刚刚生成的 query.json 实现最大函数复杂度的排序查询:" + "STEP 3: 好了,你可以针对分析生成的结果,进行进一步的代码分析了,比如你可以结合pandas库,使用刚刚生成的 query.json 实现最大函数复杂度Top 10的排序查询:" ] }, { diff --git a/tutorial/notebook/python_analysis.ipynb b/tutorial/notebook/python_analysis.ipynb new file mode 100644 index 00000000..79d88848 --- /dev/null +++ b/tutorial/notebook/python_analysis.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "这是一个使用 CodeFuse-Query 分析 Python 项目的教程。在教程中,你将体验到使用命令行工具对代码仓库进行数据化,然后使用 Godel 语言来分析这个仓库。" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "检查cli是否就绪" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!which sparrow" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "STEP 0: 克隆要分析的仓库。我们以 python 仓库 [requests](https://github.com/psf/requests.git) 项目为例。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-22T08:30:58.387715Z", + "start_time": "2023-11-22T08:30:44.572634Z" + } + }, + "outputs": [], + "source": [ + "!git clone https://github.com/psf/requests.git" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "STEP 1: 代码数据化。使用 `sparrow database create` 命令创建一个db文件,指定待分析的仓库地址(当前目录下的requests子目录),分析的语言(python),以及db文件的存储路径(放置在当前目录下的/db/requests)。执行该命令之后,就会生成一份db文件,该文件存储着代码仓库的结构化数据,之后的分析就是针对这份数据进行。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-23T03:46:32.220317Z", + "start_time": "2023-11-23T03:46:12.785705Z" + } + }, + "outputs": [], + "source": [ + "!sparrow database create --source-root requests --data-language-type python --output ./db/requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "STEP 2: 使用Godel分析语言分析db文件。在本教程中,可以点击代码左侧的执行按钮,或使用快捷键:`Shift+Enter`,直接运行分析脚本。这里使用 `%db /path/to/db` 魔法命令来设置COREF db路径,内核会读取这个值来进行query查询。" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "示例 查询 [requests](https://github.com/psf/requests.git) 的文件注释率信息。\n", + "\n", + "第一行通过内核魔法命令指定分析的db路径,后面写查询文件代码注释率 Godel 脚本。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%db ./db/requests\n", + "// script\n", + "use coref::python::*\n", + "\n", + "fn default_db() -> PythonDB {\n", + " return PythonDB::load(\"coref_python_src.db\")\n", + "}\n", + "\n", + "/**\n", + " * Get cyclomatic complexity of functions\n", + " *\n", + " * @param name function name\n", + " * @param value cyclomatic complexity of function\n", + " * @param path path of file including this function\n", + " * @param sline function start line\n", + " * @param eline function end line\n", + " */\n", + "fn getCyclomaticComplexity(\n", + " name: string,\n", + " value: int,\n", + " path: string,\n", + " sline: int,\n", + " eline: int) -> bool {\n", + " // get metric function\n", + " for (c in MetricFunction(default_db())) {\n", + " if (path = c.getLocation().getFile().getRelativePath() &&\n", + " name = c.getQualifiedName() &&\n", + " value = c.getCyclomaticComplexity() &&\n", + " sline = c.getLocation().getStartLineNumber() &&\n", + " eline = c.getLocation().getEndLineNumber()) {\n", + " return true\n", + " }\n", + " }\n", + "}\n", + "\n", + "fn main() {\n", + " output(getCyclomaticComplexity())\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "保存上一次运行的 query 结果保存到一个JSON文件" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%save_to ./query.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "STEP 3: 好了,你可以针对分析生成的结果,进行进一步的代码分析了,比如你可以结合pandas库,使用刚刚生成的 query.json 实现最大函数复杂度Top 10的排序查询:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-23T03:54:56.998681Z", + "start_time": "2023-11-23T03:54:56.976694Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "%%python\n", + "import pandas as pd\n", + "data = pd.read_json('./query.json')\n", + "data.sort_values('value', ascending=False, inplace=True)\n", + "top_10 = data.head(10)\n", + "print(top_10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "Enjoy!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Godel kernel", + "language": "rust", + "name": "godel-jupyter" + }, + "language_info": { + "file_extension": ".gdl", + "help_links": [ + { + "text": "Godel kernel Magics", + "url": "https://sparrow.alipay.com" + } + ], + "mimetype": "text/rust", + "name": "rust", + "version": "0.0.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorial/notebook/xml_analysis.ipynb b/tutorial/notebook/xml_analysis.ipynb new file mode 100644 index 00000000..c81efd2f --- /dev/null +++ b/tutorial/notebook/xml_analysis.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "这是一个使用CodeFuse-Query分析 XML 项目的教程。在教程中,你将体验到使用命令行工具对代码仓库进行数据化,然后使用Godel语言来分析这个仓库。" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "检查cli是否就绪" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!which sparrow" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "STEP 0: 克隆要分析的仓库。我们以 [sofa-boot](https://github.com/sofastack/sofa-boot.git) 项目为例。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-22T08:30:58.387715Z", + "start_time": "2023-11-22T08:30:44.572634Z" + } + }, + "outputs": [], + "source": [ + "!git clone https://github.com/sofastack/sofa-boot.git" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "STEP 1: 代码数据化。使用 `sparrow database create` 命令创建一个db文件,指定待分析的仓库地址(当前目录下的sofa-boot子目录),分析的语言(xml),以及db文件的存储路径(放置在当前目录下的/db/sofa-boot)。执行该命令之后,就会生成一份db文件,该文件存储着代码仓库的结构化数据,之后的分析就是针对这份数据进行。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-23T03:46:32.220317Z", + "start_time": "2023-11-23T03:46:12.785705Z" + } + }, + "outputs": [], + "source": [ + "!sparrow database create --source-root sofa-boot --data-language-type xml --output ./db/sofa-boot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "STEP 2: 使用Godel分析语言分析db文件。在本教程中,可以点击代码左侧的执行按钮,或使用快捷键:`Shift+Enter`,直接运行分析脚本。这里使用 `%db /path/to/db` 魔法命令来设置COREF db路径,内核会读取这个值来进行query查询。" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "示例 查询 [sofa-boot](https://github.com/sofastack/sofa-boot.git) 的POM信息(如文件路径,引用的jar资源,版本信息)。\n", + "\n", + "第一行通过内核魔法命令指定分析的db路径,后面写查询POM信息Godel脚本。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%db ./db/sofa-boot\n", + "// script\n", + "use coref::xml::*\n", + "\n", + "schema DependencyElement extends XmlElement {}\n", + "\n", + "impl DependencyElement {\n", + " @data_constraint\n", + " pub fn __all__(db: XmlDB) -> *DependencyElement {\n", + " for(e in XmlElement(db)) {\n", + " if (e.getElementName() = \"dependency\") {\n", + " yield DependencyElement {\n", + " id: e.id,\n", + " location_id: e.location_id,\n", + " parent_id: e.parent_id,\n", + " index_order: e.index_order\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "schema GroupElement extends XmlElement {}\n", + "\n", + "impl GroupElement {\n", + " @data_constraint\n", + " pub fn __all__(db: XmlDB) -> *GroupElement {\n", + " for(e in XmlElement(db)) {\n", + " if (e.getElementName() = \"groupId\") {\n", + " yield GroupElement {\n", + " id: e.id,\n", + " location_id: e.location_id,\n", + " parent_id: e.parent_id,\n", + " index_order: e.index_order\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "schema VersionElement extends XmlElement {}\n", + "\n", + "impl VersionElement {\n", + " @data_constraint\n", + " pub fn __all__(db: XmlDB) -> *VersionElement {\n", + " for(e in XmlElement(db)) {\n", + " if (e.getElementName() = \"version\") {\n", + " yield VersionElement {\n", + " id: e.id,\n", + " location_id: e.location_id,\n", + " parent_id: e.parent_id,\n", + " index_order: e.index_order\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "schema ArtifactElement extends XmlElement {}\n", + "\n", + "impl ArtifactElement {\n", + " @data_constraint\n", + " pub fn __all__(db: XmlDB) -> *ArtifactElement {\n", + " for(e in XmlElement(db)) {\n", + " if (e.getElementName() = \"artifactId\") {\n", + " yield ArtifactElement {\n", + " id: e.id,\n", + " location_id: e.location_id,\n", + " parent_id: e.parent_id,\n", + " index_order: e.index_order\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "schema PomFile extends XmlFile {}\n", + "\n", + "impl PomFile {\n", + " @data_constraint\n", + " pub fn __all__(db: XmlDB) -> *PomFile {\n", + " for(f in XmlFile(db)) {\n", + " if (f.getFileName() = \"pom.xml\") {\n", + " yield PomFile {\n", + " id: f.id,\n", + " file_name: f.file_name,\n", + " relative_path: f.relative_path\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "// output relative path of the file, referenced jar name and version\n", + "fn out(fileName: string, m1: string, m2: string, m3: string) -> bool {\n", + " let (db = XmlDB::load(\"coref_xml_src.db\")) {\n", + " for (f in PomFile(db),\n", + " e1 in GroupElement(db),\n", + " e2 in VersionElement(db),\n", + " e3 in ArtifactElement(db),\n", + " c1 in XmlCharacter(db),\n", + " c2 in XmlCharacter(db),\n", + " c3 in XmlCharacter(db),\n", + " p in DependencyElement(db)) {\n", + " if (f.key_eq(p.getLocation().getFile()) &&\n", + " fileName = f.getRelativePath() &&\n", + " p.key_eq(e1.getParent()) &&\n", + " e1.key_eq(c1.getBelongedElement()) &&\n", + " m1 = c1.getText() &&\n", + " p.key_eq(e2.getParent()) &&\n", + " e2.key_eq(c2.getBelongedElement()) &&\n", + " m2 = c2.getText() &&\n", + " p.key_eq(e3.getParent()) &&\n", + " e3.key_eq(c3.getBelongedElement()) &&\n", + " m3 = c3.getText()) {\n", + " return true\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "fn main() {\n", + " output(out())\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "保存上一次运行的 query 结果保存到一个 JSON/CSV 文件" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%save_to ./query.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "STEP 3: 好了,你可以针对分析生成的结果,进行进一步的代码分析了。" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "Enjoy!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Godel kernel", + "language": "rust", + "name": "godel-jupyter" + }, + "language_info": { + "file_extension": ".gdl", + "help_links": [ + { + "text": "Godel kernel Magics", + "url": "https://sparrow.alipay.com" + } + ], + "mimetype": "text/rust", + "name": "rust", + "version": "0.0.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}