fix: remove redundant code

X-lab2017 · Dec 5, 2024 · bc18fac · bc18fac
1 parent a046607
commit bc18fac
Show file tree

Hide file tree

Showing 3 changed files with 0 additions and 294 deletions.
diff --git a/dashboard/company/scripts/workflow_1.py b/dashboard/company/scripts/workflow_1.py
@@ -235,104 +235,6 @@ def fetch_and_upload_mergedPRcount_to_clickhouse(csv_file_path):
 
     print("CSV 数据已成功上传到目标 ClickHouse 数据库")
 
-    # 获取最近6个月的日期
-
-    def get_last_six_months():
-        today = datetime.today()
-        return [(today - timedelta(days=30 * i)).strftime('%Y-%m') for i in range(6)][::-1]
-
-    last_six_months = get_last_six_months()
-
-    # API请求的基础URL模板
-    activity_url_template = "https://oss.x-lab.info/open_digger/github/{}/activity.json"
-    openrank_url_template = "https://oss.x-lab.info/open_digger/github/{}/openrank.json"
-
-    # 数据存储
-    data = []
-
-    # 从URL获取数据并过滤最近六个月的数据
-    def fetch_and_filter(url, months):
-        try:
-            response = requests.get(url)
-            response.raise_for_status()
-            json_data = response.json()
-            return {month: json_data.get(month, None) for month in months}
-        except requests.RequestException as e:
-            print(f"请求失败 {url}: {e}")
-            return {month: None for month in months}
-
-    # 遍历每个社区和仓库
-    for community, repos in data.items():
-        for repo_name in repos:
-            print(f"处理仓库: {repo_name}")
-
-            # 获取数据
-            activity_url = activity_url_template.format(repo_name)
-            openrank_url = openrank_url_template.format(repo_name)
-
-            activity_data = fetch_and_filter(activity_url, last_six_months)
-            openrank_data = fetch_and_filter(openrank_url, last_six_months)
-
-            # 将数据存储到目标格式
-            for month in last_six_months:
-                data.append({
-                    "community": community,
-                    "repo_name": repo_name,
-                    "month": month,
-                    "activity": activity_data.get(month),
-                    "openrank": openrank_data.get(month)
-                })
-
-    # 转换为DataFrame
-    df = pd.DataFrame(data)
-
-    # 删除 activity 和 openrank 列中有空值的行
-    df_cleaned = df.dropna(subset=['activity', 'openrank'])
-
-    # 保存清洗后的结果到 CSV 文件
-    df_cleaned.to_csv(csv_file_path, index=False)
-    print(f"数据已保存到 {csv_file_path}")
-
-    # Step 2: 删除目标表并重新创建
-    try:
-        # 删除现有表
-        target_client.execute(
-            "DROP TABLE IF EXISTS community_repository_data_cleaned")
-        print("旧表已删除")
-    except Exception as e:
-        print(f"删除表时出错: {e}")
-
-    # 重新创建目标表
-    create_table_query = """
-    CREATE TABLE community_repository_data_cleaned
-    (
-        community String,
-        repo_name String,
-        month String,
-        activity String,
-        openrank String
-    ) ENGINE = MergeTree()
-    ORDER BY (community, repo_name, month);
-    """
-    target_client.execute(create_table_query)
-    print("目标表已重新创建")
-
-    # Step 3: 读取清洗后的 CSV 文件并将数据插入到目标数据库
-    with open(csv_file_path, 'r') as csvfile:
-        csv_reader = csv.DictReader(csvfile)
-
-        # 将清洗后的 CSV 数据插入到目标 ClickHouse 表
-        for row in csv_reader:
-            # 构造插入的 SQL 语句
-            insert_query = """
-                INSERT INTO community_repository_data_cleaned (community, repo_name, month, activity, openrank)
-                VALUES
-            """
-            values = f"('{row['community']}', '{row['repo_name']}', '{row['month']}', '{row['activity']}', '{row['openrank']}')"
-            target_client.execute(insert_query + values)
-
-    print("清洗后的 CSV 数据已成功上传到目标 ClickHouse 数据库")
-
 
 # 配置源 ClickHouse 的连接参数
 source_client = Client(

diff --git a/dashboard/company/scripts/workflow_2.py b/dashboard/company/scripts/workflow_2.py
@@ -255,104 +255,6 @@ def fetch_and_upload_issueinfo_to_clickhouse(csv_file_path):
 
     print("CSV 数据已成功上传到目标 ClickHouse 数据库")
 
-    # 获取最近6个月的日期
-
-    def get_last_six_months():
-        today = datetime.today()
-        return [(today - timedelta(days=30 * i)).strftime('%Y-%m') for i in range(6)][::-1]
-
-    last_six_months = get_last_six_months()
-
-    # API请求的基础URL模板
-    activity_url_template = "https://oss.x-lab.info/open_digger/github/{}/activity.json"
-    openrank_url_template = "https://oss.x-lab.info/open_digger/github/{}/openrank.json"
-
-    # 数据存储
-    data = []
-
-    # 从URL获取数据并过滤最近六个月的数据
-    def fetch_and_filter(url, months):
-        try:
-            response = requests.get(url)
-            response.raise_for_status()
-            json_data = response.json()
-            return {month: json_data.get(month, None) for month in months}
-        except requests.RequestException as e:
-            print(f"请求失败 {url}: {e}")
-            return {month: None for month in months}
-
-    # 遍历每个社区和仓库
-    for community, repos in data.items():
-        for repo_name in repos:
-            print(f"处理仓库: {repo_name}")
-
-            # 获取数据
-            activity_url = activity_url_template.format(repo_name)
-            openrank_url = openrank_url_template.format(repo_name)
-
-            activity_data = fetch_and_filter(activity_url, last_six_months)
-            openrank_data = fetch_and_filter(openrank_url, last_six_months)
-
-            # 将数据存储到目标格式
-            for month in last_six_months:
-                data.append({
-                    "community": community,
-                    "repo_name": repo_name,
-                    "month": month,
-                    "activity": activity_data.get(month),
-                    "openrank": openrank_data.get(month)
-                })
-
-    # 转换为DataFrame
-    df = pd.DataFrame(data)
-
-    # 删除 activity 和 openrank 列中有空值的行
-    df_cleaned = df.dropna(subset=['activity', 'openrank'])
-
-    # 保存清洗后的结果到 CSV 文件
-    df_cleaned.to_csv(csv_file_path, index=False)
-    print(f"数据已保存到 {csv_file_path}")
-
-    # Step 2: 删除目标表并重新创建
-    try:
-        # 删除现有表
-        target_client.execute(
-            "DROP TABLE IF EXISTS community_repository_data_cleaned")
-        print("旧表已删除")
-    except Exception as e:
-        print(f"删除表时出错: {e}")
-
-    # 重新创建目标表
-    create_table_query = """
-    CREATE TABLE community_repository_data_cleaned
-    (
-        community String,
-        repo_name String,
-        month String,
-        activity String,
-        openrank String
-    ) ENGINE = MergeTree()
-    ORDER BY (community, repo_name, month);
-    """
-    target_client.execute(create_table_query)
-    print("目标表已重新创建")
-
-    # Step 3: 读取清洗后的 CSV 文件并将数据插入到目标数据库
-    with open(csv_file_path, 'r') as csvfile:
-        csv_reader = csv.DictReader(csvfile)
-
-        # 将清洗后的 CSV 数据插入到目标 ClickHouse 表
-        for row in csv_reader:
-            # 构造插入的 SQL 语句
-            insert_query = """
-                INSERT INTO community_repository_data_cleaned (community, repo_name, month, activity, openrank)
-                VALUES
-            """
-            values = f"('{row['community']}', '{row['repo_name']}', '{row['month']}', '{row['activity']}', '{row['openrank']}')"
-            target_client.execute(insert_query + values)
-
-    print("清洗后的 CSV 数据已成功上传到目标 ClickHouse 数据库")
-
 
 # 配置源 ClickHouse 的连接参数
 source_client = Client(

diff --git a/dashboard/company/scripts/workflow_3.py b/dashboard/company/scripts/workflow_3.py
@@ -262,104 +262,6 @@ def fetch_and_upload_contributorinfo_to_clickhouse(csv_file_path):
 
     print("CSV 数据已成功上传到目标 ClickHouse 数据库")
 
-    # 获取最近6个月的日期
-
-    def get_last_six_months():
-        today = datetime.today()
-        return [(today - timedelta(days=30 * i)).strftime('%Y-%m') for i in range(6)][::-1]
-
-    last_six_months = get_last_six_months()
-
-    # API请求的基础URL模板
-    activity_url_template = "https://oss.x-lab.info/open_digger/github/{}/activity.json"
-    openrank_url_template = "https://oss.x-lab.info/open_digger/github/{}/openrank.json"
-
-    # 数据存储
-    data = []
-
-    # 从URL获取数据并过滤最近六个月的数据
-    def fetch_and_filter(url, months):
-        try:
-            response = requests.get(url)
-            response.raise_for_status()
-            json_data = response.json()
-            return {month: json_data.get(month, None) for month in months}
-        except requests.RequestException as e:
-            print(f"请求失败 {url}: {e}")
-            return {month: None for month in months}
-
-    # 遍历每个社区和仓库
-    for community, repos in data.items():
-        for repo_name in repos:
-            print(f"处理仓库: {repo_name}")
-
-            # 获取数据
-            activity_url = activity_url_template.format(repo_name)
-            openrank_url = openrank_url_template.format(repo_name)
-
-            activity_data = fetch_and_filter(activity_url, last_six_months)
-            openrank_data = fetch_and_filter(openrank_url, last_six_months)
-
-            # 将数据存储到目标格式
-            for month in last_six_months:
-                data.append({
-                    "community": community,
-                    "repo_name": repo_name,
-                    "month": month,
-                    "activity": activity_data.get(month),
-                    "openrank": openrank_data.get(month)
-                })
-
-    # 转换为DataFrame
-    df = pd.DataFrame(data)
-
-    # 删除 activity 和 openrank 列中有空值的行
-    df_cleaned = df.dropna(subset=['activity', 'openrank'])
-
-    # 保存清洗后的结果到 CSV 文件
-    df_cleaned.to_csv(csv_file_path, index=False)
-    print(f"数据已保存到 {csv_file_path}")
-
-    # Step 2: 删除目标表并重新创建
-    try:
-        # 删除现有表
-        target_client.execute(
-            "DROP TABLE IF EXISTS community_repository_data_cleaned")
-        print("旧表已删除")
-    except Exception as e:
-        print(f"删除表时出错: {e}")
-
-    # 重新创建目标表
-    create_table_query = """
-    CREATE TABLE community_repository_data_cleaned
-    (
-        community String,
-        repo_name String,
-        month String,
-        activity String,
-        openrank String
-    ) ENGINE = MergeTree()
-    ORDER BY (community, repo_name, month);
-    """
-    target_client.execute(create_table_query)
-    print("目标表已重新创建")
-
-    # Step 3: 读取清洗后的 CSV 文件并将数据插入到目标数据库
-    with open(csv_file_path, 'r') as csvfile:
-        csv_reader = csv.DictReader(csvfile)
-
-        # 将清洗后的 CSV 数据插入到目标 ClickHouse 表
-        for row in csv_reader:
-            # 构造插入的 SQL 语句
-            insert_query = """
-                INSERT INTO community_repository_data_cleaned (community, repo_name, month, activity, openrank)
-                VALUES
-            """
-            values = f"('{row['community']}', '{row['repo_name']}', '{row['month']}', '{row['activity']}', '{row['openrank']}')"
-            target_client.execute(insert_query + values)
-
-    print("清洗后的 CSV 数据已成功上传到目标 ClickHouse 数据库")
-
 
 # 配置源 ClickHouse 的连接参数
 source_client = Client(