|
89 | 89 | " [False, False, True, True, True, True],\n",
|
90 | 90 | " [False, False, True, False, True, True],\n",
|
91 | 91 | " [False, False, True, False, True, False],\n",
|
92 |
| - " [ True, True, False, False, False, False]], dtype=bool)" |
| 92 | + " [ True, True, False, False, False, False]])" |
93 | 93 | ]
|
94 | 94 | },
|
95 | 95 | "execution_count": 2,
|
|
141 | 141 | "cell_type": "markdown",
|
142 | 142 | "metadata": {},
|
143 | 143 | "source": [
|
144 |
| - "After fitting, the unique column names that correspond to the data array shown above can be accessed via the `columns_` attribute:" |
| 144 | + "After fitting, the unique column names that correspond to the data array shown above can be accessed via the `columns_` attribute, or the `get_feature_names_out` method:" |
145 | 145 | ]
|
146 | 146 | },
|
147 | 147 | {
|
|
161 | 161 | }
|
162 | 162 | ],
|
163 | 163 | "source": [
|
164 |
| - "te.columns_" |
| 164 | + "te.columns_ # list of strings" |
| 165 | + ] |
| 166 | + }, |
| 167 | + { |
| 168 | + "cell_type": "code", |
| 169 | + "execution_count": 5, |
| 170 | + "metadata": {}, |
| 171 | + "outputs": [ |
| 172 | + { |
| 173 | + "data": { |
| 174 | + "text/plain": [ |
| 175 | + "array(['Apple', 'Bananas', 'Beer', 'Chicken', 'Milk', 'Rice'],\n", |
| 176 | + " dtype=object)" |
| 177 | + ] |
| 178 | + }, |
| 179 | + "execution_count": 5, |
| 180 | + "metadata": {}, |
| 181 | + "output_type": "execute_result" |
| 182 | + } |
| 183 | + ], |
| 184 | + "source": [ |
| 185 | + "te.get_feature_names_out() # numpy.array of strings (objects)." |
165 | 186 | ]
|
166 | 187 | },
|
167 | 188 | {
|
168 | 189 | "cell_type": "markdown",
|
169 | 190 | "metadata": {},
|
170 | 191 | "source": [
|
171 |
| - "For our convenience, we can turn theencoded array into a pandas `DataFrame`:" |
| 192 | + "If we desire, we can turn the one-hot encoded array back into a transaction list of lists via the `inverse_transform` function:" |
172 | 193 | ]
|
173 | 194 | },
|
174 | 195 | {
|
175 | 196 | "cell_type": "code",
|
176 |
| - "execution_count": 5, |
| 197 | + "execution_count": 6, |
| 198 | + "metadata": {}, |
| 199 | + "outputs": [ |
| 200 | + { |
| 201 | + "data": { |
| 202 | + "text/plain": [ |
| 203 | + "[['Apple', 'Beer', 'Chicken', 'Rice'],\n", |
| 204 | + " ['Apple', 'Beer', 'Rice'],\n", |
| 205 | + " ['Apple', 'Beer'],\n", |
| 206 | + " ['Apple', 'Bananas']]" |
| 207 | + ] |
| 208 | + }, |
| 209 | + "execution_count": 6, |
| 210 | + "metadata": {}, |
| 211 | + "output_type": "execute_result" |
| 212 | + } |
| 213 | + ], |
| 214 | + "source": [ |
| 215 | + "first4 = te_ary[:4]\n", |
| 216 | + "te.inverse_transform(first4)" |
| 217 | + ] |
| 218 | + }, |
| 219 | + { |
| 220 | + "cell_type": "markdown", |
| 221 | + "metadata": {}, |
| 222 | + "source": [ |
| 223 | + "For our convenience, we can set the default output to a pandas `DataFrame` with the `set_output` method:" |
| 224 | + ] |
| 225 | + }, |
| 226 | + { |
| 227 | + "cell_type": "code", |
| 228 | + "execution_count": 7, |
177 | 229 | "metadata": {},
|
178 | 230 | "outputs": [
|
179 | 231 | {
|
|
294 | 346 | "7 True True False False False False"
|
295 | 347 | ]
|
296 | 348 | },
|
297 |
| - "execution_count": 5, |
| 349 | + "execution_count": 7, |
298 | 350 | "metadata": {},
|
299 | 351 | "output_type": "execute_result"
|
300 | 352 | }
|
301 | 353 | ],
|
302 | 354 | "source": [
|
303 |
| - "import pandas as pd\n", |
304 |
| - "\n", |
305 |
| - "pd.DataFrame(te_ary, columns=te.columns_)" |
306 |
| - ] |
307 |
| - }, |
308 |
| - { |
309 |
| - "cell_type": "markdown", |
310 |
| - "metadata": {}, |
311 |
| - "source": [ |
312 |
| - "If we desire, we can turn the one-hot encoded array back into a transaction list of lists via the `inverse_transform` function:" |
313 |
| - ] |
314 |
| - }, |
315 |
| - { |
316 |
| - "cell_type": "code", |
317 |
| - "execution_count": 6, |
318 |
| - "metadata": {}, |
319 |
| - "outputs": [ |
320 |
| - { |
321 |
| - "data": { |
322 |
| - "text/plain": [ |
323 |
| - "[['Apple', 'Beer', 'Chicken', 'Rice'],\n", |
324 |
| - " ['Apple', 'Beer', 'Rice'],\n", |
325 |
| - " ['Apple', 'Beer'],\n", |
326 |
| - " ['Apple', 'Bananas']]" |
327 |
| - ] |
328 |
| - }, |
329 |
| - "execution_count": 6, |
330 |
| - "metadata": {}, |
331 |
| - "output_type": "execute_result" |
332 |
| - } |
333 |
| - ], |
334 |
| - "source": [ |
335 |
| - "first4 = te_ary[:4]\n", |
336 |
| - "te.inverse_transform(first4)" |
| 355 | + "te = TransactionEncoder().set_output(transform=\"pandas\")\n", |
| 356 | + "te_df = te.fit(dataset).transform(dataset)\n", |
| 357 | + "te_df" |
337 | 358 | ]
|
338 | 359 | },
|
339 | 360 | {
|
|
346 | 367 | {
|
347 | 368 | "cell_type": "code",
|
348 | 369 | "execution_count": 3,
|
349 |
| - "metadata": {}, |
| 370 | + "metadata": { |
| 371 | + "scrolled": true |
| 372 | + }, |
350 | 373 | "outputs": [
|
351 | 374 | {
|
352 | 375 | "name": "stdout",
|
|
545 | 568 | "with open('../../api_modules/mlxtend.preprocessing/TransactionEncoder.md', 'r') as f:\n",
|
546 | 569 | " print(f.read())"
|
547 | 570 | ]
|
548 |
| - }, |
549 |
| - { |
550 |
| - "cell_type": "code", |
551 |
| - "execution_count": null, |
552 |
| - "metadata": {}, |
553 |
| - "outputs": [], |
554 |
| - "source": [] |
555 | 571 | }
|
556 | 572 | ],
|
557 | 573 | "metadata": {
|
|
571 | 587 | "name": "python",
|
572 | 588 | "nbconvert_exporter": "python",
|
573 | 589 | "pygments_lexer": "ipython3",
|
574 |
| - "version": "3.9.7" |
| 590 | + "version": "3.11.7" |
575 | 591 | },
|
576 | 592 | "toc": {
|
577 | 593 | "nav_menu": {},
|
|
0 commit comments