如何使用 Python 检索 Github 存储库数据
您的组织是否拥有太多 github 存储库,并且您需要一种简单的方法来总结和记录每个存储库的内容以用于报告、仪表板或审计目的?下面是一个使用 github api 完成该操作的快速脚本。
功能:
get_repo_info(所有者,回购):
get_collaborators(collaborators_url):
get_languages(languages_url):
立即学习“Python免费学习笔记(深入)”;
get_open_issues(所有者,回购):
get_repo_data(repo_url):
import jsonimport requestsfrom pymongo import MongoClient# MongoDB setup (replace with your actual connection details)client = MongoClient("mongodb://localhost:27017/")db = client["github_repos"] # Database namecollection = db["repos"] # Collection namedef get_repo_info(owner, repo): url = f"https://api.github.com/repos/{owner}/{repo}" headers = {"Accept": "application/vnd.github+json"} response = requests.get(url, headers=headers) if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") return Nonedef get_collaborators(collaborators_url): response = requests.get(collaborators_url) if response.status_code == 200: return [collaborator["login"] for collaborator in response.json()] else: return []def get_languages(languages_url): response = requests.get(languages_url) if response.status_code == 200: return list(response.json().keys()) else: return []def get_open_issues(owner, repo): url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=open" headers = {"Accept": "application/vnd.github+json"} response = requests.get(url, headers=headers) if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") return []def get_repo_data(repo_url): owner, repo = repo_url.split("/")[-2:] repo_info = get_repo_info(owner, repo) if repo_info: data = { "Github URL": repo_url, "Project name": repo_info["name"], "Project owner": repo_info["owner"]["login"], "List users with access": get_collaborators(repo_info["collaborators_url"].split("{")[0]), # remove template part of URL "Programming languages used": get_languages(repo_info["languages_url"]), "Security/visibility level": repo_info["visibility"], "Summary": repo_info["description"], "Last maintained": repo_info["pushed_at"], "Last release": repo_info["default_branch"], "Open issues": get_open_issues(owner, repo), } # Insert the data into MongoDB collection.insert_one(data) print("Data inserted into MongoDB successfully.") return data else: return None# Example usagerepo_url = "https://github.com/URL"repo_data = get_repo_data(repo_url)if repo_data: print(json.dumps(repo_data, indent=4))