From c83d81b03e996d0d8044d42e11416b68a2c41a36 Mon Sep 17 00:00:00 2001 From: gouzhonglin Date: Mon, 9 Oct 2023 09:50:26 +0800 Subject: [PATCH] f --- PRREVIEW/.gitignore | 36 +++ PRREVIEW/Dockerfile | 36 +++ PRREVIEW/README.md | 0 PRREVIEW/config.yaml | 5 + PRREVIEW/requirements.txt | 6 + PRREVIEW/src/app.py | 22 ++ PRREVIEW/src/config/init_config.py | 27 +++ PRREVIEW/src/gitee/gitee_api.py | 47 ++++ PRREVIEW/src/gpt/chat_gpt.py | 92 +++++++ PRREVIEW/src/gpt/gpt.py | 28 +++ PRREVIEW/src/gpt/gpt_class_factory.py | 14 ++ PRREVIEW/src/gpt/my_gpt.py | 56 +++++ PRREVIEW/src/handle/comment_command.py | 78 ++++++ PRREVIEW/src/handle/diff.py | 93 ++++++++ PRREVIEW/src/handle/pull_request.py | 44 ++++ PRREVIEW/src/handle/task.py | 19 ++ PRREVIEW/src/main.py | 16 ++ PRREVIEW/src/reviewCode/__init__.py | 0 PRREVIEW/src/reviewCode/bot.py | 14 ++ PRREVIEW/src/reviewCode/commenter.py | 159 +++++++++++++ PRREVIEW/src/reviewCode/giteeApi.py | 80 +++++++ PRREVIEW/src/reviewCode/input.py | 48 ++++ PRREVIEW/src/reviewCode/main.py | 15 ++ PRREVIEW/src/reviewCode/options.py | 28 +++ PRREVIEW/src/reviewCode/prompts.py | 106 +++++++++ PRREVIEW/src/reviewCode/review.py | 316 +++++++++++++++++++++++++ PRREVIEW/src/reviewCode/tokenizer.py | 9 + PRREVIEW/src/router/router.py | 35 +++ PRREVIEW/src/utils/background_task.py | 7 + PRREVIEW/src/utils/utile_tool.py | 3 + 30 files changed, 1439 insertions(+) create mode 100644 PRREVIEW/.gitignore create mode 100644 PRREVIEW/Dockerfile create mode 100644 PRREVIEW/README.md create mode 100644 PRREVIEW/config.yaml create mode 100644 PRREVIEW/requirements.txt create mode 100644 PRREVIEW/src/app.py create mode 100644 PRREVIEW/src/config/init_config.py create mode 100644 PRREVIEW/src/gitee/gitee_api.py create mode 100644 PRREVIEW/src/gpt/chat_gpt.py create mode 100644 PRREVIEW/src/gpt/gpt.py create mode 100644 PRREVIEW/src/gpt/gpt_class_factory.py create mode 100644 PRREVIEW/src/gpt/my_gpt.py create mode 100644 PRREVIEW/src/handle/comment_command.py create mode 100644 PRREVIEW/src/handle/diff.py create mode 100644 PRREVIEW/src/handle/pull_request.py create mode 100644 PRREVIEW/src/handle/task.py create mode 100644 PRREVIEW/src/main.py create mode 100644 PRREVIEW/src/reviewCode/__init__.py create mode 100644 PRREVIEW/src/reviewCode/bot.py create mode 100644 PRREVIEW/src/reviewCode/commenter.py create mode 100644 PRREVIEW/src/reviewCode/giteeApi.py create mode 100644 PRREVIEW/src/reviewCode/input.py create mode 100644 PRREVIEW/src/reviewCode/main.py create mode 100644 PRREVIEW/src/reviewCode/options.py create mode 100644 PRREVIEW/src/reviewCode/prompts.py create mode 100644 PRREVIEW/src/reviewCode/review.py create mode 100644 PRREVIEW/src/reviewCode/tokenizer.py create mode 100644 PRREVIEW/src/router/router.py create mode 100644 PRREVIEW/src/utils/background_task.py create mode 100644 PRREVIEW/src/utils/utile_tool.py diff --git a/PRREVIEW/.gitignore b/PRREVIEW/.gitignore new file mode 100644 index 0000000..160f656 --- /dev/null +++ b/PRREVIEW/.gitignore @@ -0,0 +1,36 @@ +# 忽略Python字节码文件和缓存文件 +__pycache__/ +*.pyc +*.pyo +*.pyd + +# 忽略编辑器生成的文件和目录 +.idea/ +.vscode/ + +# 忽略由开发环境或工具生成的其他文件和目录 +venv/ +env/ +dist/ +build/ +*.egg-info/ + +# 忽略日志文件和临时文件 +*.log +*.tmp + +# 忽略敏感信息和配置文件 +config.py +secret.txt + +# 忽略文档和报告文件 +docs/ +reports/ + +# 忽略测试相关的文件和目录 +tests/ +test_*.py + +# 忽略其他自定义的文件和目录 +custom_directory/ +custom_file.txt diff --git a/PRREVIEW/Dockerfile b/PRREVIEW/Dockerfile new file mode 100644 index 0000000..5751bbf --- /dev/null +++ b/PRREVIEW/Dockerfile @@ -0,0 +1,36 @@ +FROM openeuler/openeuler:22.03 + +RUN groupadd -g 1001 pr \ + && useradd -u 1001 -g pr -s /bin/bash -m pr + +RUN cd /home/pr + +RUN yum update -y \ + && yum install -y make gcc zlib-devel openssl-devel bzip2-devel ncurses-devel gdbm-devel readline-devel sqlite-devel libffi-devel tk-devel xz-devel \ + && yum install -y openssl-devel openssl \ + && yum install -y wget + +RUN wget https://www.python.org/ftp/python/3.11.0/Python-3.11.0.tgz \ + && tar -xzf Python-3.11.0.tgz \ + && yum install readline-devel \ + && cd Python-3.11.0 \ + && ./configure --prefix=/home/pr/python --with-ssl \ + && make \ + && make install + +ENV PATH="/home/pr/python/bin:${PATH}" + + +RUN python3 -V + +WORKDIR /home/pr/pr-message + +COPY . . + +RUN pip3 install --no-cache-dir -r requirements.txt + +USER pr + +EXPOSE 8080 + +CMD ["python3", "src/main.py"] diff --git a/PRREVIEW/README.md b/PRREVIEW/README.md new file mode 100644 index 0000000..e69de29 diff --git a/PRREVIEW/config.yaml b/PRREVIEW/config.yaml new file mode 100644 index 0000000..bed2d15 --- /dev/null +++ b/PRREVIEW/config.yaml @@ -0,0 +1,5 @@ +access_token: 7e68411eb68f4f52834ed8510a0656ce +gitee_host: https://gitee.com + +max_token_length: 1024 +encoding_name: cl100k_base \ No newline at end of file diff --git a/PRREVIEW/requirements.txt b/PRREVIEW/requirements.txt new file mode 100644 index 0000000..730d5fa --- /dev/null +++ b/PRREVIEW/requirements.txt @@ -0,0 +1,6 @@ +click==8.1.3 +Flask==2.2.3 +loguru==0.7.0 +PyYAML==6.0.1 +Requests==2.31.0 +tiktoken==0.3.0 diff --git a/PRREVIEW/src/app.py b/PRREVIEW/src/app.py new file mode 100644 index 0000000..0a071e6 --- /dev/null +++ b/PRREVIEW/src/app.py @@ -0,0 +1,22 @@ +from flask import request +from flask import Flask +from config.init_config import init_config + +from handle.task import assgin_task + +app = Flask(__name__) + +init_config("config.yaml") + + +@app.route("/hook/analyze", methods=["POST"]) +def analyze(): + data = request.get_json() + + assgin_task(data) + + return "Processing completed" + + +def start_router(): + app.run("0.0.0.0", debug=True, port=8080) diff --git a/PRREVIEW/src/config/init_config.py b/PRREVIEW/src/config/init_config.py new file mode 100644 index 0000000..acacb82 --- /dev/null +++ b/PRREVIEW/src/config/init_config.py @@ -0,0 +1,27 @@ +import os +import yaml + +from gitee.gitee_api import GiteeApiCaller +from gpt.gpt import Gpt +from handle.diff import Diff_Prompt + + +def init_config(path): + print(os.getcwd()) + with open(path, "r", encoding="utf-8") as f: + config = yaml.safe_load(f) + + GiteeApiCaller.init_config_attr(config["access_token"], config["gitee_host"]) + + Gpt.init_config_attr( + config["gpt"]["use"], + config["gpt"]["max_token_length"], + config["gpt"]["encoding_name"], + config["gpt"]["host"], + config["gpt"]["Authorization"], + ) + + Diff_Prompt.init_config_attr() + + + diff --git a/PRREVIEW/src/gitee/gitee_api.py b/PRREVIEW/src/gitee/gitee_api.py new file mode 100644 index 0000000..aa7866d --- /dev/null +++ b/PRREVIEW/src/gitee/gitee_api.py @@ -0,0 +1,47 @@ +import requests +from loguru import logger + + +class GiteeApiCaller: + access_token = "" + gitee_host = "" + + def init_config_attr(access_token, gitee_host): + GiteeApiCaller.access_token = access_token + GiteeApiCaller.gitee_host = gitee_host + + +class PullRequestComments(GiteeApiCaller): + submit_pull_request_comments_url_template = ( + "{host}/api/v5/repos/{owner}/{repo}/pulls/{number}/comments" + ) + + def __init__(self, owner, repo, number, body, commit_id, path, position): + self.owner = owner + self.repo = repo + self.number = number + self.body = body + self.commit_id = commit_id + self.path = path + self.position = position + + def submit_pull_request_comments(self): + url = self.submit_pull_request_comments_url_template.format( + host=self.gitee_host, owner=self.owner, repo=self.repo, number=self.number + ) + + form_data = { + "access_token": self.access_token, + "body": self.body, + "commit_id": self.commit_id, + "path": self.path, + "position": self.position, + } + response = requests.post(url, data=form_data) + + if response.status_code == 201: + logger.info("post to gitee success") + else: + logger.info("post to gitee failed") + logger.info(response.status_code) + logger.info(response.text) diff --git a/PRREVIEW/src/gpt/chat_gpt.py b/PRREVIEW/src/gpt/chat_gpt.py new file mode 100644 index 0000000..d84cf44 --- /dev/null +++ b/PRREVIEW/src/gpt/chat_gpt.py @@ -0,0 +1,92 @@ +import requests +import tiktoken + +from gpt.gpt import Gpt + + +class ChatGpt(Gpt): + def get_answer(prompt): + url = "{openai_host}/v1/chat/completions".format(openai_host=ChatGpt.host) + + data = { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "system", + "content": ( + "您将充当 git 中提交消息的作者。" + "您的任务是在传统git提交中创建清晰且全面的提交消息,详细清晰的解释更改内容。 我将向您发送“git diff --staged”命令的输出,然后您将其转换为提交消息。" + "行长度不得超过 74 个字符。" + "用中文回答。" + "使用如下模板:" + "修改了那个文件\n" + "- 修改细节1\n" + "- 修改细节2\n" + ), + }, + { + "role": "user", + "content": prompt, + } + ], + "temperature": 0.7, + } + + + response = requests.post( + url, json=data, headers={"Authorization": "Bearer " + ChatGpt.Authorization} + ) + + if response.status_code != 200: + print("get answer error") + print(response.status_code) + + pr = response.json() + + return pr["choices"][0]["message"]["content"] + + + + def get_summary(content): + url = "{openai_host}/v1/chat/completions".format(openai_host=ChatGpt.host) + data = { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "system", + "content": ( + "您的任务是高度概括总结我给您的输入内容。" + "用中文回答。" + ), + }, + { + "role": "user", + "content": content, + } + ], + "temperature": 0.7, + } + + response = requests.post( + url, json=data, headers={"Authorization": "Bearer " + ChatGpt.Authorization} + ) + + if response.status_code != 200: + print("get answer error") + print(response.status_code) + + pr = response.json() + + return pr["choices"][0]["message"]["content"] + + + + + def num_tokens_from_string(string: str) -> int: + encoding = tiktoken.get_encoding(ChatGpt.encoding_name) + tokens = encoding.encode(string) + num_tokens = len(tokens) + return num_tokens + + def get_max_prompt_length(): + return ChatGpt.max_token_length diff --git a/PRREVIEW/src/gpt/gpt.py b/PRREVIEW/src/gpt/gpt.py new file mode 100644 index 0000000..6d7cbc4 --- /dev/null +++ b/PRREVIEW/src/gpt/gpt.py @@ -0,0 +1,28 @@ +from abc import ABCMeta, abstractmethod + + +class Gpt(metaclass=ABCMeta): + use = "" + max_token_length = 0 + encoding_name = "" + host = "" + Authorization = "" + + def init_config_attr(use, max_token_length, encoding_name, host, Authorization): + Gpt.use = use + Gpt.max_token_length = max_token_length + Gpt.encoding_name = encoding_name + Gpt.host = host + Gpt.Authorization = Authorization + + @abstractmethod + def get_answer(prompt): + pass + + @abstractmethod + def num_tokens_from_string(string: str) -> int: + pass + + @abstractmethod + def get_max_prompt_length(): + pass diff --git a/PRREVIEW/src/gpt/gpt_class_factory.py b/PRREVIEW/src/gpt/gpt_class_factory.py new file mode 100644 index 0000000..5a131c5 --- /dev/null +++ b/PRREVIEW/src/gpt/gpt_class_factory.py @@ -0,0 +1,14 @@ +from gpt.chat_gpt import ChatGpt +from gpt.gpt import Gpt +from gpt.my_gpt import MyGpt + + +class GptClassFactory: + @staticmethod + def create_class(): + if Gpt.use == "my_gpt": + return MyGpt + elif Gpt.use == "open_ai": + return ChatGpt + else: + raise ValueError("Invalid class name") diff --git a/PRREVIEW/src/gpt/my_gpt.py b/PRREVIEW/src/gpt/my_gpt.py new file mode 100644 index 0000000..dc16cbf --- /dev/null +++ b/PRREVIEW/src/gpt/my_gpt.py @@ -0,0 +1,56 @@ +import re +import requests +import tiktoken +from loguru import logger + +from gpt.gpt import Gpt + + +class MyGpt(Gpt): + question = ( + "You are to act as the author of a commit message in git." + "Your mission is to create clean and comprehensive commit messages in the conventional commit convention and explain WHAT were the changes and WHY the changes were done. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message." + "Do not preface the commit with anything." + "Don't add any descriptions to the commit, only commit message." + "Use the present tense. Lines must not be longer than 74 characters." + "Use Chinese to answer." + "The diff is: {diff_content}" + ) + + def get_answer(prompt): + url = "{host}/hcstream".format(host=MyGpt.host) + + data = {"question": MyGpt.question.format(diff_content=prompt), "history": []} + response = requests.post(url, json=data, stream=True) + if response.status_code != 200: + logger.error("get answer error") + logger.error(response.status_code) + return + + data_list = [] + pattern = r'"answer":\s+"([^"]+)"' + for line in response.iter_lines(): + if line: + line_text = line.decode("utf-8") + match = re.search(pattern, line_text) + if match: + result = match.group(1) + data_list.append(result) + + if len(data_list) < 2: + logger.info("no answer") + return + data_list.pop() + + combined_result = "".join(data_list) + + return combined_result + + def num_tokens_from_string(string: str) -> int: + encoding = tiktoken.get_encoding(MyGpt.encoding_name) + tokens = encoding.encode(string) + num_tokens = len(tokens) + return num_tokens + + def get_max_prompt_length(): + return MyGpt.max_token_length - MyGpt.num_tokens_from_string(MyGpt.question) diff --git a/PRREVIEW/src/handle/comment_command.py b/PRREVIEW/src/handle/comment_command.py new file mode 100644 index 0000000..4d6e08b --- /dev/null +++ b/PRREVIEW/src/handle/comment_command.py @@ -0,0 +1,78 @@ +import time +from loguru import logger +import requests +from gitee.gitee_api import PullRequestComments +from gpt.gpt_class_factory import GptClassFactory + +from handle.diff import Diff_Prompt, handle_diff + + +def summary_message(data): + pr = data.get("pull_request", None) + if pr is None: + logger.error("no pull_request") + return + + diff_url = pr.get("diff_url", None) + if diff_url is None: + logger.error("no diff") + return + + diff = requests.get(diff_url) + if diff.status_code != 200: + logger.error("get diff error") + return + + diff.encoding = "utf-8" + diff_text = diff.text + + results = handle_diff(diff_text) + if results is None: + logger.error("can't get prompts") + return + + comment_list = [] + + + for result in results: + answer = GptClassFactory.create_class().get_answer(result) + if answer is None: + continue + comment_list.append(answer) + + time.sleep(10) + + comment = "" + + for single_comment in comment_list: + single_comment = str(single_comment).strip() + if single_comment == "": + continue + comment += single_comment + "\n\n" + + # summarize = GptClassFactory.create_class().get_summary(comment) + # comment += summarize + + project = data.get("project", None) + if project is None: + logger.error("no project") + return + + owner = project.get("namespace", None) + if owner is None: + logger.error("no owner") + return + + repo = project.get("path", None) + if repo is None: + logger.error("no repo") + return + + number = pr.get("number", None) + if number is None: + logger.error("no number") + return + + pr = PullRequestComments(owner, repo, number, comment, None, None, None) + + pr.submit_pull_request_comments() diff --git a/PRREVIEW/src/handle/diff.py b/PRREVIEW/src/handle/diff.py new file mode 100644 index 0000000..da509bf --- /dev/null +++ b/PRREVIEW/src/handle/diff.py @@ -0,0 +1,93 @@ +import re +from loguru import logger +from gpt.gpt_class_factory import GptClassFactory + +from utils.utile_tool import split_string + + +class Diff_Prompt: + max_template_token_length = 1024 + + def init_config_attr(): + Diff_Prompt.max_template_token_length = ( + GptClassFactory.create_class().get_max_prompt_length() + ) + + +def diff_content_out_of_length(diff_content): + return ( + GptClassFactory.create_class().num_tokens_from_string(diff_content) + >= Diff_Prompt.max_template_token_length + ) + + +def handle_diff(diff): + prompt_list = [] + + if diff_content_out_of_length(diff): + diff_array = cut_diff_by_file_diffs(diff) + + for single_diff in diff_array: + if diff_content_out_of_length(single_diff): + result_array = cut_single_diff(single_diff) + prompt_list.extend(result_array) + else: + prompt_list.append(single_diff) + + return prompt_list + + +def cut_diff_by_file_diffs(diff): + separator = "diff --git" + + diff_array = diff.split(separator) + diff_array.pop(0) + + diff_array = [separator + diff for diff in diff_array] + + return diff_array + + +def cut_one_diff_by_change(diff): + separator = "@@ -" + + change_array = diff.split(separator) + diff_title = change_array.pop(0) + + change_array = [separator + diff for diff in change_array] + + change_array[0] = diff_title + change_array[0] + + return change_array + + +def cut_single_diff(diff): + result_array = [] + change_array = cut_one_diff_by_change(diff) + + index = 0 + while index < len(change_array): + if diff_content_out_of_length(change_array[index]): + result_array.extend(cut_change(change_array[index])) + index += 1 + continue + + merge_change = change_array[index] + for i in range(index + 1, len(change_array)): + if diff_content_out_of_length(merge_change + change_array[i]): + index = i + break + else: + index = i + 1 + merge_change += change_array[i] + + result_array.append(merge_change) + + if index >= len(change_array) - 1: + break + + return result_array + + +def cut_change(change): + return split_string(change, Diff_Prompt.max_template_token_length) diff --git a/PRREVIEW/src/handle/pull_request.py b/PRREVIEW/src/handle/pull_request.py new file mode 100644 index 0000000..c1bd92f --- /dev/null +++ b/PRREVIEW/src/handle/pull_request.py @@ -0,0 +1,44 @@ +from loguru import logger + +from handle.comment_command import * +from reviewCode.main import reviewMain + +comment_method = {"/summary-message": summary_message, "@PRReviewAI reivew": reviewMain} + + +def merge_request_hooks(data): + pass + + +def note_hooks(data): + action = data.get("action", None) + if action != "comment": + logger.error("action is not comment") + return + + pr = data.get("pull_request", None) + if pr is None: + logger.error("no pull_request") + return + + state = pr.get("state", None) + if state != "open": + logger.error("pull request is not open") + return + + comment = data.get("comment", None) + if comment is None: + logger.error("no comment") + return + + comment_body = comment.get("body", None) + if comment_body is None: + logger.error("no comment body") + return + + comment_fuc = comment_method.get(str(comment_body).strip()) + if comment_fuc is None: + logger.error("system not suport this comment") + return + + comment_fuc(data) diff --git a/PRREVIEW/src/handle/task.py b/PRREVIEW/src/handle/task.py new file mode 100644 index 0000000..02ac65f --- /dev/null +++ b/PRREVIEW/src/handle/task.py @@ -0,0 +1,19 @@ +from loguru import logger + +from handle.pull_request import * + +hook_method = {"merge_request_hooks": merge_request_hooks, "note_hooks": note_hooks} + + +def assgin_task(data): + hook_name = data.get("hook_name", None) + + if hook_name is None: + logger.error("no hook_name") + return + + handle_fuc = hook_method.get(hook_name) + if handle_fuc is None: + logger.error("system not suport this hook_name") + return + handle_fuc(data) diff --git a/PRREVIEW/src/main.py b/PRREVIEW/src/main.py new file mode 100644 index 0000000..72365c7 --- /dev/null +++ b/PRREVIEW/src/main.py @@ -0,0 +1,16 @@ +import click +from router import router +from config import init_config + + +@click.command() +@click.option("--config", default="config.yaml", help="config file path") +def main(config): + # Init config from yaml file + init_config.init_config(config) + # Set up routing + router.start_router() + + +if __name__ == "__main__": + main() diff --git a/PRREVIEW/src/reviewCode/__init__.py b/PRREVIEW/src/reviewCode/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/PRREVIEW/src/reviewCode/bot.py b/PRREVIEW/src/reviewCode/bot.py new file mode 100644 index 0000000..7a4f47c --- /dev/null +++ b/PRREVIEW/src/reviewCode/bot.py @@ -0,0 +1,14 @@ +import requests +from loguru import logger + +class Bot: + def __init__(self): + self.answer = '' + + + def chat(self, prompt): + url = 'https://modelapi.osinfra.cn/hcstream' + data = {'question': prompt, 'history': []} + response = requests.post(url, json = data, stream = True) + return response + \ No newline at end of file diff --git a/PRREVIEW/src/reviewCode/commenter.py b/PRREVIEW/src/reviewCode/commenter.py new file mode 100644 index 0000000..a199a16 --- /dev/null +++ b/PRREVIEW/src/reviewCode/commenter.py @@ -0,0 +1,159 @@ +from loguru import logger +import requests +import chardet +import json +from reviewCode.giteeApi import GiteeApi + +class Commenter: + COMMENT_GREETING = '(PRReviewAI COMMENT_GREETING)' + + COMMENT_TAG = '(PRReviewAI COMMENT_TAG)' + + COMMENT_REPLY_TAG = '(PRReviewAI COMMENT_REPLY_TAG)' + + SUMMARIZE_TAG = '(PRReviewAI SUMMARIZE_TAG)' + + IN_PROGRESS_START_TAG = '(PRReviewAI IN_PROGRESS_START_TAG)' + + IN_PROGRESS_END_TAG = '(PRReviewAI IN_PROGRESS_END_TAG)' + + DESCRIPTION_START_TAG = '(PRReviewAI DESCRIPTION_START_TAG)' + + DESCRIPTION_END_TAG = '(PRReviewAI DESCRIPTION_END_TAG)' + + RAW_SUMMARY_START_TAG = '(PRReviewAI RAW_SUMMARY_START_TAG)' + + RAW_SUMMARY_END_TAG = '(PRReviewAI RAW_SUMMARY_END_TAG)' + + SHORT_SUMMARY_START_TAG = '(PRReviewAI SHORT_SUMMARY_START_TAG)' + + SHORT_SUMMARY_END_TAG = '(PRReviewAI SHORT_SUMMARY_END_TAG)' + + COMMIT_ID_START_TAG = '(PRReviewAI COMMIT_ID_START_TAG)' + + COMMIT_ID_END_TAG = '(PRReviewAI COMMIT_ID_END_TAG)' + + def __init__(self, giteeApi: GiteeApi): + self.giteeApi = giteeApi + + # def comment(self, message, tag, mode): + # target = -1 + # pr_number = gitee_hook.get('number', None) + # if pr_number is not None: + # target = pr_number + # else: + # logger.error('not pull request number') + + # if tag is None: + # tag = Commenter.COMMENT_GREETING + + # body = '''%s\n%s\n%s'''%(Commenter.COMMENT_GREETING, message, tag) + + # if mode == 'create': + # self.create(body, target) + # elif mode == 'replace': + # self.replace(body, tag, target) + # else: + # logger.warning('unknown mode: %s, use \'replace\' instead') + # self.replace(body, tag, target) + + def listComments(self, prNumber): + # 返回当前prnumber的所有commentR + # res = requests.get('https://gitee.com/api/v5/repos/ggzzll1/temp/pulls/2/comments?access_token=7e68411eb68f4f52834ed8510a0656ce&page=1&per_page=100&direction=desc') + # return json.loads(res.content.decode('utf-8') + return self.giteeApi.listComments() + + + + def findCommentWithTag(self, tag, target): + comments = self.listComments(target) + for comment in comments: + if comment.get('body', None) and tag in comment.get('body', None): + return comment + return '' + + + def getContentWithinTags(self, content: str, startTag: str, endTag: str) -> str: + start = content.find(startTag) + end = content.find(endTag) + if start >= 0 and end >= 0: + return content[start+len(startTag):end] + + def getRawSummary(self, summary): + return self.getContentWithinTags(summary, Commenter.RAW_SUMMARY_START_TAG, Commenter.RAW_SUMMARY_END_TAG) + + def getShortSummary(self, summary): + return self.getContentWithinTags(summary, Commenter.SHORT_SUMMARY_START_TAG, Commenter.SHORT_SUMMARY_END_TAG) + + def getReviewedCommitIdsBlock(self, commentBody): + start = commentBody.find(Commenter.COMMIT_ID_START_TAG) + end = commentBody.find(Commenter.COMMIT_ID_END_TAG) + if start == -1 or end == -1: + return '' + return commentBody[start : end + len(Commenter.COMMIT_ID_END_TAG)] + + def getAllCommitIds(self): + commits = self.giteeApi.getAllCommitIds() + allCommits = [] + for commit in commits: + allCommits.append(commit.get('sha', None)) + return allCommits + + def getHighestReviewedCommitId(self, commitIds, reviewedCommitIds): + for i in range(len(commitIds)): + if commitIds[i] in reviewedCommitIds: + return commitIds[i] + return '' + + def getReviewedCommitIds(self, commentBody): + start = commentBody.find(Commenter.COMMIT_ID_START_TAG) + end = commentBody.find(Commenter.COMMIT_ID_END_TAG) + if start == -1 or end == -1: + return [] + ids = commentBody[start + len(Commenter.COMMIT_ID_START_TAG): end] + return ids.split(',') + + def getCommentChainsWithinRange(self, prNumber, path, startLine, endLine, tag = ''): + existingComments = self.getCommentsWithinRange(prNumber, path, startLine, endLine) + topLevelComments = [] + for comment in existingComments: + if not comment.get('in_reply_to_id', None): + topLevelComments.append(comment) + allChains = '' + chainNum = 0 + for topLevelComment in topLevelComments: + chain = self.composeCommentChain(existingComments, topLevelComment) + if chain and tag in chain: + chainNum += 1 + allChains += 'Conversation Chain {}: {}'.format(chainNum, chain) + return allChains + + def composeCommentChain(self, reviewComments, topLevelComment): + conversationChain = ['{}: {}'.format(topLevelComment.get('user', None).get('login', None), topLevelComment.get('body', None))] + for comment in reviewComments: + if comment.get('in_reply_to_id', None) == topLevelComment.get('id', None): + conversationChain.append('{}: {}'.format(comment.get('user', None).get('login', None), comment.get('body', None))) + return '\n---\n'.join(conversationChain) + + def getCommentsWithinRange(self, prNumber, path, startLine, endLine): + comments = self.listReviewComments(prNumber) + requiredComments = [] + for comment in comments: + if comment.get('path', None) == path and comment.get('body', None) and \ + comment.get('comment_type', None) == 'diff_comment' and startLine <= comment.get('new_line', None) <= endLine: + requiredComments.append(comment) + return requiredComments + + def listReviewComments(self, prNumber): + return self.giteeApi.listReviewComments(prNumber) + + def addReviewedCommitId(self, commentBody, commitId): + start = commentBody.find(Commenter.COMMIT_ID_START_TAG) + end = commentBody.find(Commenter.COMMIT_ID_END_TAG) + if start == -1 or end == -1: + return '{}\n{}\n{}\n{}'.format(commentBody, Commenter.COMMIT_ID_START_TAG, commitId, Commenter.COMMIT_ID_END_TAG) + ids = commentBody[start + len(Commenter.COMMIT_ID_START_TAG): end] + ids = '{},{}'.format(ids, commitId) + return '{}{}{}'.format(commentBody[0:start + len(Commenter.COMMIT_ID_START_TAG)], ids, commentBody[end:]) + + \ No newline at end of file diff --git a/PRREVIEW/src/reviewCode/giteeApi.py b/PRREVIEW/src/reviewCode/giteeApi.py new file mode 100644 index 0000000..18063f8 --- /dev/null +++ b/PRREVIEW/src/reviewCode/giteeApi.py @@ -0,0 +1,80 @@ +from loguru import logger +import requests +import json + +class GiteeApi: + def __init__(self, data): + self.token = None + self.cookies = None + self.owner = None + self.repo = None + project = data.get('project', None) + if project: + self.owner = project.get('namespace', None) + self.repo = project.get('path', None) + self.pr = data.get('pull_request', None) + self.prNumber = self.pr.get('number', None) + if (not self.owner) or (not self.repo) or (not self.pr) or (not self.prNumber): + logger.error('not giteeApi param') + + def listComments(self): + # 此处用的是pr中的comment,而不是issue中的comment + page = 1 + allComments = [] + while True: + url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments?access_token={}&page={}&per_page=100&direction=desc'. \ + format(self.owner, self.repo, self.prNumber, self.token, page) + res = json.loads(requests.get(url = url).content.decode('utf-8')) + # res = requests.get(url = url).json() + allComments.extend(res) + page += 1 + if not res or len(res) < 100: + break + return allComments + + def getAllCommitIds(self): + url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/commits?access_token={}'. \ + format(self.owner, self.repo, self.prNumber, self.token) + return json.loads(requests.get(url = url).content.decode('utf-8')) + + def listReviewComments(self, prNumber): + page = 1 + allComments = [] + while True: + url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments?access_token={}&page={}&per_page=100'. \ + format(self.owner, self.repo, prNumber, self.token, page) + res = json.loads(requests.get(url = url).content.decode('utf-8')) + allComments.extend(res) + page += 1 + if not res or len(res) < 100: + break + return allComments + + def submitReview(self, body, commitId): + url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}/comments'.format(self.owner, self.repo, self.prNumber) + data = { + 'access_token': self.token, + 'body': body, + 'commit_id': commitId + } + res = requests.post(url = url, data = data) + if res.status_code != 201: + logger.error('post to gitee failed') + logger.error(res.text) + logger.error(res.status_code) + else: + logger.info('post to gitee succeed') + + def fetchPR(self): + url = 'https://gitee.com/api/v5/repos/{}/{}/pulls/{}?access_token={}'. \ + format(self.owner, self.repo, self.prNumber, self.token) + return json.loads(requests.get(url = url).content.decode('utf-8')) + + def compare(self, formerSha, latterSha): + url = 'https://gitee.com/api/v5/repos/{}/{}/compare/{}...{}?access_token={}&straight=true'. \ + format(self.owner, self.repo, formerSha, latterSha, self.token) + return json.loads(requests.get(url = url).content.decode('utf-8')) + + def fetchFileContent(self, rawUrl): + url = '{}?access_token={}'.format(rawUrl, self.token) + return requests.get(url, cookies = self.cookies).content.decode('utf-8') diff --git a/PRREVIEW/src/reviewCode/input.py b/PRREVIEW/src/reviewCode/input.py new file mode 100644 index 0000000..f12af0e --- /dev/null +++ b/PRREVIEW/src/reviewCode/input.py @@ -0,0 +1,48 @@ +from loguru import logger +import requests + +class Input: + def __init__(self, data): + self.systemMessage = '' + self.title = data.get("title", None) + self.description = '' + if data.get("pull_request", None): + self.description = data.get("pull_request", None).get("body", None) + self.rawSummary = '' + self.shortSummary = '' + self.filename = '' + self.fileContent = '' + self.fileDiff = '' + self.patches = '' + self.diff = '' + self.commentChain = '' + self.comment = '' + + def render(self, content): + if not content: + return '' + if self.systemMessage: + content = content.replace('$systemMessage', self.systemMessage) + if self.title: + content = content.replace('$title', self.title) + if self.description: + content = content.replace('$description', self.description) + if self.rawSummary: + content = content.replace('$rawSummary', self.rawSummary) + if self.shortSummary: + content = content.replace('$shortSummary', self.shortSummary) + if self.filename: + content = content.replace('$filename', self.filename) + if self.fileDiff: + content = content.replace('$fileDiff', self.fileDiff) + if self.patches: + content = content.replace('$patches', self.patches) + if self.diff: + content = content.replace('$diff', self.diff) + if self.commentChain: + content = content.replace('$commentChain', self.commentChain) + if self.comment: + content = content.replace('$comment', self.comment) + return content + + diff --git a/PRREVIEW/src/reviewCode/main.py b/PRREVIEW/src/reviewCode/main.py new file mode 100644 index 0000000..c4afc11 --- /dev/null +++ b/PRREVIEW/src/reviewCode/main.py @@ -0,0 +1,15 @@ +from reviewCode.review import codeReview +from reviewCode.options import Options +from reviewCode.prompts import Prompts +from reviewCode.giteeApi import GiteeApi +from reviewCode.bot import Bot + +def reviewMain(data): + # options = Options() + # prompts = Prompts() + # bot = Bot() + # giteeApi = GiteeApi() + + + if data.get('noteable_type', None) == 'PullRequest': + codeReview(data) diff --git a/PRREVIEW/src/reviewCode/options.py b/PRREVIEW/src/reviewCode/options.py new file mode 100644 index 0000000..654c936 --- /dev/null +++ b/PRREVIEW/src/reviewCode/options.py @@ -0,0 +1,28 @@ +import fnmatch + + +class Options: + def __init__(self): + self.maxFiles = 1000 + self.pathFilters= '' + self.TokenLimits = 1024 + self.rules = {'*.txt': False, '*.py': False} + self.debug = False + + def checkPath(self, path): + # 默认所有路径均符合rules,值为True的路径才是过滤掉的路径 + if len(self.rules) == 0: + return True + included = False + excluded = False + inclusionRuleExists = False + + for aRule, exclude in self.rules.items(): + if fnmatch.fnmatch(path, aRule): + if exclude: + excluded = True + else: + included = True + if not exclude: + inclusionRuleExists = True + return ((not inclusionRuleExists) or included) and (not excluded) \ No newline at end of file diff --git a/PRREVIEW/src/reviewCode/prompts.py b/PRREVIEW/src/reviewCode/prompts.py new file mode 100644 index 0000000..e8e4a8e --- /dev/null +++ b/PRREVIEW/src/reviewCode/prompts.py @@ -0,0 +1,106 @@ +from reviewCode.input import Input + +class Prompts: + + def __init__(self): + self.summarize = '' + self.summarizeReleaseNotes = '' + self.summarizeFileDiff = ''' + Please summarize the following codes + ### pr title + $title + ### description + \'\'\'$description\'\'\' + ### diff + \'\'\'#fileDiff\'\'\' + ''' + self.triageFileDiff = ''' + Please triagle the diff as \'NEEDS_REVIEW\' or \'APPROVED\'. + ''' + self.reviewFileDiff = ''' + Input: New hunks annotated with line numbers and old hunks (replaced code). Hunks represent incomplete code fragments. + Additional Context: PR title, description, summaries and comment chains. + Task: Review new hunks for substantive issues using provided context and respond with comments if necessary. + Output: Review comments in markdown with exact line number ranges in new hunks. Start and end line numbers must be within the same hunk. For single-line comments, start=end line number. Must use example response format below. + Use fenced code blocks using the relevant language identifier where applicable. + Don't annotate code snippets with line numbers. Format and indent code correctly. + Do not use `suggestion` code blocks. + For fixes, use `diff` code blocks, marking changes with `+` or `-`. The line number range for comments with fix snippets must exactly match the range to replace in the new hunk. + + - Do NOT provide general feedback, summaries, explanations of changes, or praises + for making good additions. + - Focus solely on offering specific, objective insights based on the + given context and refrain from making broad comments about potential impacts on + the system or question intentions behind the changes. + + If there are no issues found on a line range, you MUST respond with the + text `LGTM!` for that line range in the review section. + + ## Example + + ### Example changes + + ---new_hunk--- + ``` + z = x / y + return z + + 20: def add(x, y): + 21: z = x + y + 22: retrn z + 23: + 24: def multiply(x, y): + 25: return x * y + + def subtract(x, y): + z = x - y + ``` + + ---old_hunk--- + ``` + z = x / y + return z + + def add(x, y): + return x + y + + def subtract(x, y): + z = x - y + ``` + + ---comment_chains--- + ``` + Please review this change. + ``` + + ---end_change_section--- + + ### Example response + + 22-22: + There's a syntax error in the add function. + ```diff + - retrn z + + return z + ``` + --- + 24-25: + LGTM! + --- + + ## Changes made to `$filename` for your review + + $patches + ''' + + def renderSummarizeFileDiff(self, inputIn, reviewSimpleChanges): + prompt = self.summarizeFileDiff + if not reviewSimpleChanges: + prompt += self.triageFileDiff + return inputIn.render(prompt) # input对象中的具体内容填充到prompt字符串的{}中 + + def renderReviewFileDiff(self, inputIn: Input): + return inputIn.render(self.reviewFileDiff) + + + diff --git a/PRREVIEW/src/reviewCode/review.py b/PRREVIEW/src/reviewCode/review.py new file mode 100644 index 0000000..6d620d5 --- /dev/null +++ b/PRREVIEW/src/reviewCode/review.py @@ -0,0 +1,316 @@ +from loguru import logger +import json +import requests +# from handle.diff import Diff_prompt, handle_diff +from gpt.gpt import Gpt +import time +# from gitee.gitee_api import pull_request_comments +from diff import diff +from git.repo import Repo +import base64 +import re +import copy +from reviewCode.prompts import Prompts +from reviewCode.options import Options +from reviewCode.prompts import Prompts +from reviewCode.giteeApi import GiteeApi +from reviewCode.bot import Bot +from reviewCode.input import Input +from reviewCode.commenter import Commenter +from reviewCode.tokenizer import getTokenCount + +IGNORE_KEYWORD = '@PRReviewAI: ignore' + +def codeReview(data): + options = Options() + prompts = Prompts() + bot = Bot() + giteeApi = GiteeApi(data) + input = Input(data) + commenter = Commenter(giteeApi) + # 如果body中包含IGNORE_KEYWORD,跳过本次reivew + if (IGNORE_KEYWORD in input.description): + logger.info("skipped: body contains ignore_keyword") + return + + # 已经在pr中出现的comment + existingSummarizeComment = commenter.findCommentWithTag(Commenter.SUMMARIZE_TAG, giteeApi.prNumber) + existingCommitIdsBlock = '' + existingSummarizeCommentBody = '' + if existingSummarizeComment: + existingSummarizeCommentBody = existingSummarizeComment.get('body', None) + input.rawSummary = commenter.getRawSummary(existingSummarizeCommentBody) + input.shortSummary = commenter.getShortSummary(existingSummarizeCommentBody) + existingCommitIdsBlock = commenter.getReviewedCommitIdsBlock(existingSummarizeCommentBody) + + allCommitIds = commenter.getAllCommitIds() + highrestReviewedCommitId = '' + if existingCommitIdsBlock: + highrestReviewedCommitId = commenter.getHighestReviewedCommitId(allCommitIds, commenter.getReviewedCommitIds(existingCommitIdsBlock)) + + # 获取PR + prs = giteeApi.fetchPR() + # head:补丁分支,作者修改代码后提交PR对应的commit + # base:基准分支,接受修改的分支 + # PR:将补丁分支head中的代码合入基准分支base + headSha = None + baseSha = None + if prs.get('head', None): + headSha = prs.get('head', None).get('sha', None) + if prs.get('base', None): + baseSha = prs.get('base', None).get('sha', None) + + if (not highrestReviewedCommitId) or highrestReviewedCommitId == headSha: + logger.info('will review from the base commit: {}'.format(baseSha)) + highrestReviewedCommitId = baseSha + else: + logger.info('will review from commit: {}'.format(highrestReviewedCommitId)) + + # 比较highrestReviewedCommitId及headSha + incrementalDiff = giteeApi.compare(highrestReviewedCommitId, headSha) + targetBranchDiff = giteeApi.compare(baseSha, headSha) + + incrementalFiles = incrementalDiff.get('files', None) + targetBranchFiles = targetBranchDiff.get('files', None) + if (not incrementalFiles) and (not targetBranchFiles): + logger.warning('skipped: files data is missing') + return + + incrementalFilesNames = [] + for incrementalFile in incrementalFiles: + if incrementalFile.get('filename', None): + incrementalFilesNames.append(incrementalFile.get('filename')) + files = [] + for targetBranchFile in targetBranchFiles: + if targetBranchFile.get('filename', None) in incrementalFilesNames: + files.append(targetBranchFile) + if len(files) == 0: + logger.warning('skipped: files is null') + return + + filterSelectedFiles = [] + filterIgnoredFiles = [] + for aFile in files: + if options.checkPath(aFile.get('filename', None)): + filterSelectedFiles.append(aFile) + else: + logger.info('skip for excluded path: %s'%(aFile.get('filename', None))) + filterIgnoredFiles.append(aFile) + if len(filterSelectedFiles) == 0: + logger.warning('skipped: filterSelectedFiles is null') + return + + # 获取本次pr的所有commitid + commits = [] + if incrementalDiff.get('commits', None): + for commit in incrementalDiff.get('commits', None): + commits.append(commit.get('sha', None)) + + if not commits: + logger.warning('skipped: commits is null') + return + + filteredFiles = [] + # 把patch切割成hunk + for aFile in filterSelectedFiles: + if not giteeApi.prNumber: + logger('skipped: pr is null') + continue + + fileContent = '' + try: + # contentUrl = aFile.get('content_url', None) + # contents = json.loads(requests.get(contentUrl).content.decode('utf-8')) + # if contents and contents.get('type', None) == 'file' and contents.get('content', None): + # fileContent = base64.b64decode(contents.get('content', None)).decode('utf-8') + # content_url没有结果,换成raw_url + rawUrl = aFile.get('raw_url', None) + if rawUrl: + fileContent = giteeApi.fetchFileContent(rawUrl) + except Exception as e: + logger.warning('failed to get file contents: %s'%(e)) + + fileDiff = aFile.get('patch', '') + patches = [] + for patch in splitPatch(aFile.get('patch', '')): + patchLines = patchStartEndLine(patch) + if not patchLines: + continue + hunks = parsePatch(patch) + if not hunks: + continue + hunksStr = '''---new_hunk---\n\'\'\'\n%s\n\'\'\'\n---old_hunk---\n\'\'\'\n%s\n\'\'\''''%(hunks.get('newHunk', None), hunks.get('oldHunk', None)) + patches.append([patchLines.get('newHunk', None).get('startLine', None), patchLines.get('newHunk', None).get('endLine', None), hunksStr]) + if len(patches) > 0: + filteredFiles.append([aFile.get('filename', None), fileContent, fileDiff, patches]) + + filesAndChanges = filteredFiles + if len(filesAndChanges) == 0: + logger.error('skipped: no files to review') + return + + statusMsg = { + 'highrestReviewedCommitId': highrestReviewedCommitId, + 'headCommitId': allCommitIds[0], + 'filesAndChanges': filesAndChanges, + 'filterIgnoredFiles': filterIgnoredFiles, + } + + # 跳过summary,直接review + filesAndChangesReview = filesAndChanges + reviewsSkipped = [] + reviewsFailed = [] + skippedFiles = [] + reviewContent = [] + + def doReview(filename, fileContent, patches): + logger.info('reviewing: {}'.format(filename)) + ins = copy.deepcopy(input) + ins.filename = filename + ins.fileDiff = fileDiff + + tokens = getTokenCount(prompts.renderReviewFileDiff(ins)) + + # 计算有多少个hunkstr可以放入prompt + patchesToPack = 0 + for _, _, patch in patches: + patchTokens = getTokenCount(patch) + if tokens + patchTokens > options.TokenLimits: + logger.info('only packing {}/{} patches, tokens: {}/{}'.format(patchesToPack, len(patches), tokens, options.requestTokens)) + break + tokens += patchTokens + patchesToPack += 1 + + patchesPacked = 0 + for startLine, endLine, patch in patches: + if patchesPacked >= patchesToPack: + logger.info('unable to pack more patches into this request, packed: {}, total patches: {}, skipping'.format(patchesPacked, len(patches))) + if options.debug: + logger.info('prompt so far: {}'.format(prompts.renderReviewFileDiff(ins))) + break + patchesPacked += 1 + commentChain = '' + allChians = commenter.getCommentChainsWithinRange(giteeApi.prNumber, filename, startLine, endLine, commenter.COMMENT_REPLY_TAG) + if len(allChians) > 0: + logger.info('Found comment chains: {} for {}'.format(allChians, filename)) + commentChain = allChians + commentChainTokens = getTokenCount(commentChain) + if tokens + commentChainTokens > options.TokenLimits: + commentChain = '' + else: + tokens += commentChainTokens + + ins.patches += patch + if commentChain: + ins.patches += '---comment_chains---\n\'\'\'{}\'\'\'---end_change_section---'.format(commentChain) + if patchesPacked > 0: + print(prompts.renderReviewFileDiff(ins)) + exit() + res = bot.chat(prompts.renderReviewFileDiff(ins)) + + if res.status_code != 200: + logger.info('review: nothing obtained from openai') + reviewsFailed.append('{} (no response)'.format(filename)) + return + ans = parseReview(res) + print('-\n'*5) + print(ans) + reviewContent.append(ans) + + for filename, fileContent, _, patches in filesAndChangesReview: + doReview(filename, fileContent, patches) + + # 添加headSha到COMMIT_ID_TAG中 + summarizeComment = '' + summarizeComment += commenter.addReviewedCommitId(existingCommitIdsBlock, headSha) + + + giteeApi.submitReview(body = reviewContent, commitId = commits[-1]) + giteeApi.submitReview(body = summarizeComment, commitId = commits[-1]) + +def parseReview(response): + data_list = [] + pattern = r'"answer":\s+"([^"]+)"' + for line in response.iter_lines(): + if line: + line_text = line.decode("utf-8") + match = re.search(pattern, line_text) + if match: + result = match.group(1) + data_list.append(result) + + if len(data_list) < 2: + logger.info("no answer") + return + data_list.pop() + combined_result = "".join(data_list) + return combined_result + +def splitPatch(patch): + if not patch: + return [] + results = [] + splitLines = patch.split('\n') + # 去掉最后两行,一行是空格,一行是 \ No newline at end of file + splitLines = splitLines[:-2] + lastLine = -1 + for iLine in range(len(splitLines)): + # 当前行数据格式是否满足:@@ -0,0 +0,0 @@ + reSplit = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', splitLines[iLine]) + if len(reSplit) > 1: + if lastLine == -1: + lastLine = iLine + else: + results.append('\n'.join(splitLines[lastLine: iLine])) + lastLine = iLine + if lastLine != -1: + results.append('\n'.join(splitLines[lastLine:])) + return results + +def patchStartEndLine(patch): + reSplit = re.split('^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', patch) + if len(reSplit) > 1: + oldBegin = int(reSplit[1]) + oldDiff = int(reSplit[2]) + newBegin = int(reSplit[3]) + newDiff = int(reSplit[4]) + return {'oldHunk': {'startLine': oldBegin, 'endLine': oldDiff}, 'newHunk': {'startLine': newBegin, 'endLine': newDiff}} + else: + return None + +def parsePatch(patch): + hunkInfo = patchStartEndLine(patch) + if not hunkInfo: + return + oldHunkLines = [] + newHunkLines = [] + newLine = hunkInfo.get('newHunk', None).get('startLine', None) + lines = patch.split('\n') [1:] # 去除第一行@@ + if lines[-1] == '': # 去除最后一行空格 + lines = lines[:-1] + skipStart = 3 + skipEnd = 3 + currentLine = 0 + + # reamovalOnly=True代表只删除内容,没有新增内容 + removalOnly = True + for line in lines: + if line.startswith('+'): + removalOnly = False + break + + for line in lines: + currentLine += 1 + if line.startswith('-'): + oldHunkLines.append(line[1:]) + elif line.startswith('+'): + newHunkLines.append(line[1:]) + newLine += 1 + else: + oldHunkLines.append(line) + if removalOnly or (currentLine > skipStart and currentLine <= len(lines) - skipEnd): + newHunkLines.append(str(newLine) + ': ' + line) + else: + newHunkLines.append(line) + newLine += 1 + return {"oldHunk": '\n'.join(oldHunkLines), "newHunk": '\n'.join(newHunkLines)} \ No newline at end of file diff --git a/PRREVIEW/src/reviewCode/tokenizer.py b/PRREVIEW/src/reviewCode/tokenizer.py new file mode 100644 index 0000000..3823f06 --- /dev/null +++ b/PRREVIEW/src/reviewCode/tokenizer.py @@ -0,0 +1,9 @@ +import tiktoken + + +def getTokenCount(strIn): + encoding = tiktoken.get_encoding('cl100k_base') + tokens = encoding.encode(strIn) + return len(tokens) + + diff --git a/PRREVIEW/src/router/router.py b/PRREVIEW/src/router/router.py new file mode 100644 index 0000000..86da142 --- /dev/null +++ b/PRREVIEW/src/router/router.py @@ -0,0 +1,35 @@ +from flask import request +from flask import Flask + +from handle.task import assgin_task +from utils.background_task import start_thread + + +app = Flask(__name__) + + +@app.route("/hook/analyze", methods=["POST"]) +def analyze(): + data = request.get_json() + + start_thread(assgin_task, data) + + return "Processing completed" + + +@app.before_request +def before_request(): + headers = request.headers + # if headers.get("User-Agent") != "Robot-Gitee-Access": + # return "Bad Request: unknown User-Agent Header", 400 + + # if headers.get("X-Gitee-Event") == "": + # return "Bad Request: Missing X-Gitee-Event Header", 400 + + # uuid = headers.get("X-Gitee-Timestamp") + # if uuid == "": + # return "Bad Request: Missing X-Gitee-Timestamp Header", 400 + + +def start_router(): + app.run("0.0.0.0", debug=True, port=8080) diff --git a/PRREVIEW/src/utils/background_task.py b/PRREVIEW/src/utils/background_task.py new file mode 100644 index 0000000..bef529f --- /dev/null +++ b/PRREVIEW/src/utils/background_task.py @@ -0,0 +1,7 @@ +from concurrent.futures import ThreadPoolExecutor + +executor = ThreadPoolExecutor(2) + + +def start_thread(target, *args, **kwargs): + return executor.submit(target, *args, **kwargs) diff --git a/PRREVIEW/src/utils/utile_tool.py b/PRREVIEW/src/utils/utile_tool.py new file mode 100644 index 0000000..700d27f --- /dev/null +++ b/PRREVIEW/src/utils/utile_tool.py @@ -0,0 +1,3 @@ + +def split_string(string, length): + return [string[i:i+length] for i in range(0, len(string), length)] \ No newline at end of file -- Gitee