Skip to content

Commit

Permalink
sync changes:
Browse files Browse the repository at this point in the history
- M  "site/categories/algorithm analysis.html"

- M  site/categories/algorithms.html

- M  "site/categories/computer science.html"
  • Loading branch information
elimelt committed Jan 11, 2025
1 parent e60f6ed commit 6003248
Show file tree
Hide file tree
Showing 95 changed files with 223 additions and 95 deletions.
2 changes: 1 addition & 1 deletion site/categories/algorithm analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Algorithm Analysis</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/algorithms.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: algorithms</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/computer science.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Computer Science</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/database design.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Database Design</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/database systems.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Database Systems</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/distributed systems.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Distributed Systems</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/graph theory.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Graph Theory</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
</div>
<h1>Categories</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/mathematics.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Mathematics</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/operations research.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Operations Research</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/categories/software engineering.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Category: Software Engineering</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
4 changes: 2 additions & 2 deletions site/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
</div>
<h1>My Notes</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand All @@ -203,7 +203,7 @@ <h2>Recent Notes</h2>
<ul class='recent-posts'>
<li>
<a href="/systems-research/hints-for-computer-system-design.html">Hints For Computer System Design</a>
<span class="date">2025-01-08</span>
<span class="date">2025-01-09</span>

</li>
<li>
Expand Down
128 changes: 128 additions & 0 deletions site/natural-language-processing/tf-idf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import numpy as np
import os
from typing import Dict, List, Tuple


def read_markdown_files(root_dir: str, max_depth: int = 8) -> Dict[str, str]:

corpus = {}

def process_dir(path: str, depth: int = 0):
if depth > max_depth:
return

try:
for entry in os.listdir(path):
full_path = os.path.join(path, entry)
if os.path.isdir(full_path):
process_dir(full_path, depth + 1)
elif entry.endswith(".md"):
try:
with open(full_path, "r") as f:
corpus[full_path] = f.read()
except Exception:
continue
except Exception:
return

process_dir(root_dir)
return corpus


def create_index(corpus: Dict[str, str]) -> Tuple[Dict[str, Dict[str, int]], List[str]]:

inv_idx = {}
for doc_name, content in corpus.items():
words = content.split()
for word in words:
if word not in inv_idx:
inv_idx[word] = {}
inv_idx[word][doc_name] = inv_idx[word].get(doc_name, 0) + 1

return inv_idx, list(inv_idx.keys())


def calculate_tfidf(
corpus: Dict[str, str], inv_idx: Dict[str, Dict[str, int]], word_list: List[str]
) -> Dict[str, np.ndarray]:

N = len(corpus)
tfidf = {}

for doc_name, content in corpus.items():
doc_words = content.split()
doc_len = len(doc_words)
tfidf[doc_name] = np.zeros(len(word_list))

for i, word in enumerate(word_list):
if word in inv_idx and doc_name in inv_idx[word]:
tf = inv_idx[word][doc_name] / doc_len
idf = np.log(N / len(inv_idx[word]))
tfidf[doc_name][i] = tf * idf

return tfidf


def search(
query: str,
tfidf: Dict[str, np.ndarray],
word_list: List[str],
inv_idx: Dict[str, Dict[str, int]],
num_results: int = 5,
) -> List[Tuple[str, float]]:

N = len(tfidf)
query_vec = np.zeros(len(word_list))

for i, word in enumerate(word_list):
if word in query.split() and word in inv_idx:
query_vec[i] = np.log(N / len(inv_idx[word]))

similarities = [
(doc_name, float(np.dot(doc_vec, query_vec)))
for doc_name, doc_vec in tfidf.items()
]

return sorted(similarities, key=lambda x: x[1], reverse=True)[:num_results]


def main():

try:
root_dir = (
input("Enter root directory to search (default '.'): ").strip() or "."
)
print(f"Reading markdown files from {root_dir}...")

corpus = read_markdown_files(root_dir)
if not corpus:
print("No markdown files found!")
return

print(f"Found {len(corpus)} markdown files")
print("Building search index...")

inv_idx, word_list = create_index(corpus)
tfidf = calculate_tfidf(corpus, inv_idx, word_list)

print("Search system ready!")

while True:
query = input('\nEnter search query (or "exit" to quit): ').strip()
if query.lower() == "exit":
break

results = search(query, tfidf, word_list, inv_idx)

print("\nSearch results:")
for doc_name, similarity in results:
print(f"{doc_name}: {similarity:.4f}")

except KeyboardInterrupt:
print("\nExiting...")
except Exception as e:
print(f"\nError: {str(e)}")


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Hints For Computer System Design</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-09</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/acyclic graphs.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: acyclic graphs</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/algorithm-analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: algorithm-analysis</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/algorithm.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: algorithm</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/algorithms.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: algorithms</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/approximation.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: approximation</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/asymptotic notation.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: asymptotic notation</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/batch processing.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: batch processing</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/bipartite graphs.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: bipartite graphs</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/bipartite matching.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: bipartite matching</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/breadth-first search.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: breadth-first search</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/column-oriented storage.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: column-oriented storage</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/compatibility.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: compatibility</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/complexity analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: complexity analysis</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/complexity-analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: complexity-analysis</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/connected components.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: connected components</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
2 changes: 1 addition & 1 deletion site/tags/connected graphs.html
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
</div>
<h1>Tag: connected graphs</h1>
<div class="meta">
<span>Last modified: 2025-01-08</span>
<span>Last modified: 2025-01-11</span>

</div>
<div class="content">
Expand Down
Loading

0 comments on commit 6003248

Please sign in to comment.