-
Notifications
You must be signed in to change notification settings - Fork 2
/
dcmpandas.py
200 lines (176 loc) · 7.19 KB
/
dcmpandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python
'''dcmpandas is a scraper and query system for interacting with a
directory of dicom images. It builds a database of the tags
of all images in the database, and then allows querying
these tags through the flexible pandas syntax.
The following example imports dcmpandas and builds a database of all
dicom images in the current directory. (Obviously the building of the
database only needs to be performed once.) It then loads the database
into memory, and performs a few queries and views the images of one
series.
In [1]: import dcmpandas as dp
In [2]: dp.scrape('.')
Processing 48862992
Processing 48857684
Processing 48858202
Processing 48862824
:
In [3]: tags,df = dp.load()
In [4]: df.SeriesDescription.drop_duplicates()
Out[4]:
0 Ax t2_tse_spair
1 Ax t2_tse
2 Ax 3D GRE T1
180 Ax Diff ep2d_mddw_30
181 Ax Diff ep2d_mddw_30_a/p rpt
Name: SeriesDescription, dtype: object
'''
#
# a utility library for reading dicom files and place their
# information in a pandas database.
#
# Copyright (c) 2014 Dov Grobgeld <[email protected]>
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation; either version 2.1 of the License, or (at your
# option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
# for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# An xnm module in python with an interface similar to json
#
# Utility functions for using pandas with a directory of dicom
# images.
from __future__ import print_function
import os
import sys
import glob
import pydicom
import pandas as pd
import pickle
import fnmatch
import numpy as np
import pdb
# Default viewer is giv. See github.com/dov/giv .
viewer = 'giv'
def view(filenames):
"""View the filenames with an external viewer"""
os.system(viewer + ' ' + ' '.join(filenames) + '&')
def scrape(directory = '.',
database_file='dicom.pickle',
glob_pattern = '*',
verbose=1,
guess_convert = True,
sort_slice_location = True,
recursive = True):
"""Scrape a directory of data
Flags:
guess_convert -- Translate some common fields into floats and ints.
"""
# Create A db of the entire dataset
db = []
s = []
tags = {}
for root, dirnames, filenames in os.walk(directory):
if verbose:
print('Visiting ' + root)
if root != directory and not recursive:
continue
for fn in fnmatch.filter(filenames, glob_pattern):
f = os.path.join(root, fn)
if not pydicom.misc.is_dicom(f):
if verbose:
print('Skipping non-dicom file: '+f)
continue
if verbose:
print('Processing', f)
try:
ds = pydicom.read_file(f,defer_size='10KB')
h = {}
h['Filename'] = f
for k in ds.keys():
# Skip images
if (k.group,k.elem) == (0x7fe0,0x0010):
continue
v = ds[k]
key = (v.name
.replace(' ','')
.replace('\'','')
.replace('/','')
.replace('[','')
.replace(']','')
.replace('(','')
.replace(')','')
.replace('-','')
)
value = v.value # Do I need to encode to bytes?
# Carry out some common conversion.
if guess_convert:
if (k.group,k.elem) in [(0x20,0x1041), # Slice location
(0x18,0x50) # Slice thickness
]:
value = float(value)
elif (k.group,k.elem) in [(0x28,0x30), # Pixel spacing
]:
value = tuple(float(f) for f in v.value)
elif not ('\\' in str(value) or '[' in str(value)):
if v.VR in ['IS','SL','US']:
value = int(value)
elif isinstance(value, pydicom.valuerep.DSfloat):
value = float(value)
elif isinstance(value, bytes):
value = value.decode('utf-8')
else:
value = str(value)
# Hack to get around pickle limitation
if not type(value) in [str,list,dict,tuple,int,float]:
value = str(value)
# Turn lists to tuples to make immutable
if type(value)==list:
value = tuple(value)
h[key] = value
h['X%04x_%04x'%(k.group,k.elem)]=h[key] # Use both name and group,element syntax
tags[key] = (k.group,
k.element,
'(%04x_%04x)'%(k.group,k.elem),
v.VR)
db += [h]
if verbose:
print(' Successful', h['Filename'])
except RuntimeError as e:
failed_placeholder = {'AccessionNumber': ds.AccessionNumber, 'Filename': h['Filename'], 'ReadError': e} # collect
db.append(failed_placeholder)
if verbose:
print(' Failed ', h['Filename'], e)
# Create a dataset of everything
if sort_slice_location:
db = sorted(db, key=lambda x: 0 if not 'SliceLocation' in x else x['SliceLocation'])
df = pd.DataFrame(db)
if 'ReadError' not in df.columns: # if this is true, then there were no failures
df['ReadError'] = np.nan
tags = pd.DataFrame(tags,index=['Group','Element','Tag','VR'])
# Save to disk
if database_file is not None:
pickle.dump([tags, df],open(database_file,'wb'))
else:
return tags,df
if verbose:
print('Scanning done. Load result by `tags,df = dp.load()`')
def load(database_file='dicom.pickle'):
'''Load a database file from the disk'''
return pd.read_pickle(database_file)
def load_image(filename):
'''Load an image into a series'''
tags,df = scrape(database_file = None,
glob_pattern = filename,
verbose=0)
return df.iloc[0]
if __name__=='__main__':
pass