-
Notifications
You must be signed in to change notification settings - Fork 10
/
easier_website_editing.py
100 lines (72 loc) · 2.91 KB
/
easier_website_editing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
'''
Use 1: Edit the text of an entire jupyterbook is a PITA.
My plan:
1. This file combines all markdown content in the book (text) into one long
ipynb file, and then converts that into a Word file using pandoc.
2. Open the word file and let grammarly do the work to find issues!
Use 2: Edit the text of several ipynb files (but not the whole book). This
is valuable when adding or tweaking some pages but a full book edit is not
necessary.
(not implemented yet)
1. new input: only_these = None or list of pages
2. if only_these:
convert_these = [s for s in convert_these if s in only_these]
'''
# We will look here for textbook files
directory = 'C:/Users/DonsLaptop/Desktop/GitHub/ledatascifi-2024/content/'
# skip files with these strings in the path
filters = ['old ', 'ipynb_checkpoint']
# Produce this file
out_doc = 'jupyter_book_to_edit.docx'
import nbformat, os, subprocess
def extract_markdown_cells(nb):
# This is used to extract all markdown cells in an ipynb file.
cells = []
for cell in nb.cells:
if cell.cell_type == 'markdown':
cells.append(cell)
return cells
def markdown_to_cell(markdown_filename):
# This is used to convert a .md file to a ipynb cell.
with open(markdown_filename, 'r', encoding='utf-8') as f:
markdown_text = f.read()
cell = nbformat.v4.new_markdown_cell(markdown_text)
return cell
def combine_markdown_cells(markdown_cells):
# Convert a list of markdown cells to a ipyn notebook
combined_nb = nbformat.v4.new_notebook()
for cell in markdown_cells:
combined_nb.cells.append(cell)
return combined_nb
# assemble list of files to convert
convert_these = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.ipynb') or file.endswith('.md'):
convert_these.append(os.path.join(root, file))
convert_these = [s for s in convert_these if not any(f in s for f in filters)]
# extract markdown from all
markdown_cells = []
for filename in convert_these:
header = f'# FILENAME: {os.path.basename(filename)}'
header_cell = nbformat.v4.new_markdown_cell(header)
markdown_cells.append(header_cell)
if filename[-3:] == '.md':
markdown_cells.append(markdown_to_cell(filename))
else:
with open(filename, 'r', encoding='utf-8') as f:
nb = nbformat.reads(f.read(), as_version=4)
markdown_cells.extend(extract_markdown_cells(nb))
# output comined ipynb
combined_nb = combine_markdown_cells(markdown_cells)
with open('temp.ipynb', 'w', encoding='utf-8') as f:
f.write(nbformat.writes(combined_nb))
# convert ipynb to docx
if os.path.exists(out_doc):
os.remove(out_doc)
result = subprocess.run(['pandoc', 'temp.ipynb', '-s', '-o', out_doc])
if result.returncode == 0:
print('Conversion successful!')
else:
print('Conversion failed with error code', result.returncode)
os.remove('temp.ipynb')