forked from cotes2020/jekyll-theme-chirpy
-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_files_by_tag.py
143 lines (118 loc) · 4.76 KB
/
find_files_by_tag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python3
import os
import re
import yaml
import sys
import argparse
from pathlib import Path
def extract_tags_from_md(file_path):
"""Extract tags from a markdown file's front matter."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Look for YAML front matter between --- markers
match = re.search(r'^---\s+(.*?)\s+---', content, re.DOTALL)
if not match:
return []
# Extract the YAML front matter
front_matter = match.group(1)
# Parse the YAML
try:
metadata = yaml.safe_load(front_matter)
if not metadata or 'tags' not in metadata:
return []
# Handle different tag formats
tags = metadata['tags']
if isinstance(tags, list):
return [tag.strip().lower() for tag in tags]
elif isinstance(tags, str):
# Check if comma-separated
if ',' in tags:
return [tag.strip().lower() for tag in tags.split(',')]
# Space-separated
return [tag.strip().lower() for tag in tags.split()]
else:
return [str(tags).lower()]
except yaml.YAMLError:
print(f"Error parsing YAML front matter in {file_path}")
return []
except Exception as e:
print(f"Error processing {file_path}: {e}")
return []
def find_files_by_tag(tag, posts_dir):
"""Find all files that contain the specified tag."""
matching_files = []
for root, _, files in os.walk(posts_dir):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
tags = extract_tags_from_md(file_path)
if tag.lower() in tags:
matching_files.append(file_path)
return matching_files
def list_all_tags(posts_dir):
"""List all available tags in the blog posts."""
all_tags = set()
for root, _, files in os.walk(posts_dir):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
tags = extract_tags_from_md(file_path)
all_tags.update(tags)
return sorted(all_tags)
def main():
parser = argparse.ArgumentParser(description='Find blog posts containing specific tags')
parser.add_argument('tag', nargs='?', help='The tag to search for')
parser.add_argument('--list-tags', action='store_true', help='List all available tags')
args = parser.parse_args()
blog_root = os.path.dirname(os.path.abspath(__file__))
posts_dir = os.path.join(blog_root, "_posts")
# Check if _posts directory exists
if not os.path.isdir(posts_dir):
print(f"Error: Posts directory not found at {posts_dir}")
return 1
# List all available tags if requested
if args.list_tags:
tags = list_all_tags(posts_dir)
print("\nAvailable tags:")
for tag in tags:
print(f" {tag}")
return 0
# Check if tag argument is provided
if not args.tag:
print("Please provide a tag to search for, or use --list-tags to see all available tags.")
print("Usage: python find_files_by_tag.py <tag>")
return 1
# Find files with the specified tag
tag = args.tag
matching_files = find_files_by_tag(tag, posts_dir)
if matching_files:
print(f"\nFound {len(matching_files)} files with tag '{tag}':")
for file_path in matching_files:
# Print full path
print(f" {os.path.abspath(file_path)}")
# Extract and print title from the file (optional)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
match = re.search(r'^---\s+(.*?)\s+---', content, re.DOTALL)
if match:
front_matter = match.group(1)
metadata = yaml.safe_load(front_matter)
if metadata and 'title' in metadata:
print(f" Title: {metadata['title']}")
print() # Empty line for better readability
except:
pass
else:
print(f"\nNo files found with tag '{tag}'")
# Suggest similar tags
all_tags = list_all_tags(posts_dir)
similar_tags = [t for t in all_tags if tag.lower() in t.lower()]
if similar_tags:
print("\nDid you mean one of these tags?")
for similar_tag in similar_tags:
print(f" {similar_tag}")
return 0
if __name__ == "__main__":
sys.exit(main())