From 30c03a2e5341ec808c299c72dbd9da4fb414b874 Mon Sep 17 00:00:00 2001 From: Zaheer Ud Din Date: Sun, 5 Dec 2021 04:27:36 +0500 Subject: [PATCH] Strip quoted delims in file type magic 0x3C --- .gitignore | 3 ++- .spelling | 9 +++++++++ Makefile | 2 +- bin/xlsx.njs | 2 +- bits/41_lotus.js | 13 +++++++++++-- bits/75_xlml.js | 1 + bits/76_xls.js | 2 ++ bits/87_read.js | 14 +++++++++++--- bits/99_footer.js | 2 +- docbits/80_parseopts.md | 4 +++- docbits/85_filetype.md | 25 +++++++++++++++++++++++++ 11 files changed, 67 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 785798e3a..9a1526669 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,7 @@ tmp *.[pP][dD][fF] *.[sS][lL][kK] *.socialcalc -*.[xX][lL][sSwWcCaAtTmM] +*.[xX][lL][sSwWcCaAtTmMrR] *.[xX][lL][sSaAtT][xXmMbB] *.[oO][dD][sS] *.[fF][oO][dD][sS] @@ -23,6 +23,7 @@ tmp *.[uU][oO][sS] *.[wW][kKqQbB][S1234567890] *.[qQ][pP][wW] +*.[fF][mM][3tT] *.[bB][iI][fF][fF][23458] *.[rR][tT][fF] *.[eE][tT][hH] diff --git a/.spelling b/.spelling index 896b6599c..8e9930925 100644 --- a/.spelling +++ b/.spelling @@ -59,6 +59,15 @@ webpack weex # Other terms +1.x +2.x +3.x +4.x +5.x +6.x +7.x +8.x +9.x ActiveX APIs ArrayBuffer diff --git a/Makefile b/Makefile index 628726a94..93485e95b 100644 --- a/Makefile +++ b/Makefile @@ -73,7 +73,7 @@ DISTHDR=misc/suppress_export.js .PHONY: dist dist: dist-deps $(TARGET) bower.json ## Prepare JS files for distribution mkdir -p dist - <$(TARGET) sed "s/require('stream')/{}/g;s/require('....*')/undefined/g" > dist/$(TARGET) + <$(TARGET) sed "s/require('....*')/undefined/g" > dist/$(TARGET) cp LICENSE dist/ uglifyjs shim.js $(UGLIFYOPTS) -o dist/shim.min.js --preamble "$$(head -n 1 bits/00_header.js)" uglifyjs $(DISTHDR) dist/$(TARGET) $(UGLIFYOPTS) -o dist/$(LIB).min.js --source-map dist/$(LIB).min.map --preamble "$$(head -n 1 bits/00_header.js)" diff --git a/bin/xlsx.njs b/bin/xlsx.njs index 1231aa514..2f5c18a8c 100755 --- a/bin/xlsx.njs +++ b/bin/xlsx.njs @@ -5,7 +5,7 @@ var n = "xlsx"; var X = require('../'); try { X = require('../xlsx.flow'); } catch(e) {} -require('exit-on-epipe'); +try { require('exit-on-epipe'); } catch(e) {} var fs = require('fs'), program; try { program = require('commander'); } catch(e) { [ diff --git a/bits/41_lotus.js b/bits/41_lotus.js index 54f3d2b57..473c0333a 100644 --- a/bits/41_lotus.js +++ b/bits/41_lotus.js @@ -34,6 +34,12 @@ var WK_ = /*#__PURE__*/ (function() { var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; var sheetRows = o.sheetRows || 0; + if(d[2] == 0x00) { + if(d[3] == 0x08 || d[3] == 0x09) { + if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file"); + } + } + if(d[2] == 0x02) { o.Enum = WK1Enum; lotushopper(d, function(val, R, RT) { switch(RT) { @@ -126,7 +132,8 @@ var WK_ = /*#__PURE__*/ (function() { write_biff_rec(ba, 0x00, write_BOF_WK1(0x0406)); write_biff_rec(ba, 0x06, write_RANGE(range)); - for(var R = range.s.r; R <= range.e.r; ++R) { + var max_R = Math.min(range.e.r, 8191); + for(var R = range.s.r; R <= max_R; ++R) { var rr = encode_row(R); for(var C = range.s.c; C <= range.e.c; ++C) { if(R === range.s.r) cols[C] = encode_col(C); @@ -165,7 +172,8 @@ var WK_ = /*#__PURE__*/ (function() { var range = safe_decode_range(ws["!ref"]); var dense = Array.isArray(ws); var cols = []; - for(var R = range.s.r; R <= range.e.r; ++R) { + var max_R = Math.min(range.e.r, 8191); + for(var R = range.s.r; R <= max_R; ++R) { var rr = encode_row(R); for(var C = range.s.c; C <= range.e.c; ++C) { if(R === range.s.r) cols[C] = encode_col(C); @@ -211,6 +219,7 @@ var WK_ = /*#__PURE__*/ (function() { if(rows < range.e.r) rows = range.e.r; if(cols < range.e.c) cols = range.e.c; } + if(rows > 8191) rows = 8191; out.write_shift(2, rows); out.write_shift(1, wscnt); out.write_shift(1, cols); diff --git a/bits/75_xlml.js b/bits/75_xlml.js index 2487ca7b9..3fdd98af6 100644 --- a/bits/75_xlml.js +++ b/bits/75_xlml.js @@ -183,6 +183,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ { else str = utf8read(str); } var opening = str.slice(0, 1024).toLowerCase(), ishtml = false; + opening = opening.replace(/".*?"/g, ""); if((opening.indexOf(">") & 1023) > Math.min((opening.indexOf(",") & 1023), (opening.indexOf(";")&1023))) { var _o = dup(opts); _o.type = "string"; return PRN.to_workbook(str, _o); } if(opening.indexOf("= 0) ishtml = true; }); if(ishtml) return HTML_.to_workbook(str, opts); diff --git a/bits/76_xls.js b/bits/76_xls.js index b2b591665..d71285dd3 100644 --- a/bits/76_xls.js +++ b/bits/76_xls.js @@ -950,6 +950,8 @@ else/*:: if(cfb instanceof CFBContainer) */ { else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, (options.type = T, options)); /* Quattro Pro 9 */ else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, (options.type = T, options)); + /* Works 4 for Mac */ + else if((_data=CFB.find(cfb, 'MN0')) && _data.content) throw new Error("Unsupported Works 4 for Mac file"); else throw new Error("Cannot find Workbook stream"); if(options.bookVBA && cfb.FullPaths && CFB.find(cfb, '/_VBA_PROJECT_CUR/VBA/dir')) WorkbookP.vbaraw = make_vba_xls(cfb); } diff --git a/bits/87_read.js b/bits/87_read.js index bb680a226..5ddcd1bf7 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -91,12 +91,20 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str); - case 0xFF: if(n[1] === 0xFE) { return read_utf16(d, o); } break; - case 0x00: if(n[1] === 0x00 && n[2] >= 0x02 && n[3] === 0x00) return WK_.to_workbook(d, o); break; + case 0xFF: + if(n[1] === 0xFE) { return read_utf16(d, o); } + else if(n[1] === 0x00 && n[2] === 0x02 && n[3] === 0x00) return WK_.to_workbook(d, o); + break; + case 0x00: + if(n[1] === 0x00) { + if(n[2] >= 0x02 && n[3] === 0x00) return WK_.to_workbook(d, o); + if(n[2] === 0x00 && (n[3] === 0x08 || n[3] === 0x09)) return WK_.to_workbook(d, o); + } + break; case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o); case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break; case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); - case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break; + case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break; } if(DBF.versions.indexOf(n[0]) > -1 && n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o); return read_prn(data, d, o, str); diff --git a/bits/99_footer.js b/bits/99_footer.js index 75b3151e6..dc20acb70 100644 --- a/bits/99_footer.js +++ b/bits/99_footer.js @@ -6,6 +6,6 @@ else if(typeof module !== 'undefined' && module.exports) make_xlsx_lib(module.ex else if(typeof define === 'function' && define.amd) define('xlsx', function() { if(!XLSX.version) make_xlsx_lib(XLSX); return XLSX; }); else make_xlsx_lib(XLSX); /* NOTE: the following extra line is needed for "Lightning Locker Service" */ -if(typeof window !== 'undefined' && !window.XLSX) window.XLSX = XLSX; +if(typeof window !== 'undefined' && !window.XLSX) try { window.XLSX = XLSX; } catch(e) {} /*exported XLS, ODS */ var XLS = XLSX, ODS = XLSX; diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md index e9374c68c..d0dc641ad 100644 --- a/docbits/80_parseopts.md +++ b/docbits/80_parseopts.md @@ -85,7 +85,7 @@ file but Excel will know how to handle it. This library applies similar logic: | Byte 0 | Raw File Type | Spreadsheet Types | |:-------|:--------------|:----------------------------------------------------| -| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB or WQ3/QPW | +| `0xD0` | CFB Container | BIFF 5/8 or protected XLSX/XLSB or WQ3/QPW or XLR | | `0x09` | BIFF Stream | BIFF 2/3/4/5 | | `0x3C` | XML/HTML | SpreadsheetML / Flat ODS / UOS1 / HTML / plain text | | `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plain text | @@ -102,6 +102,8 @@ file but Excel will know how to handle it. This library applies similar logic: DBF files are detected based on the first byte as well as the third and fourth bytes (corresponding to month and day of the file date) +Works for Windows files are detected based on the BOF record with type `0xFF` + Plain text format guessing follows the priority order: | Format | Test | diff --git a/docbits/85_filetype.md b/docbits/85_filetype.md index 9beec6d80..0e4fe83e0 100644 --- a/docbits/85_filetype.md +++ b/docbits/85_filetype.md @@ -27,6 +27,8 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats: | Lotus 1-2-3 (WK1/WK3) | ✔ | ✔ | | Lotus 1-2-3 (WKS/WK2/WK4/123) | ✔ | | | Quattro Pro Spreadsheet (WQ1/WQ2/WB1/WB2/WB3/QPW) | ✔ | | +| Works 1.x-3.x DOS / 2.x-5.x Windows Spreadsheet (WKS) | ✔ | | +| Works 6.x-9.x Spreadsheet (XLR) | ✔ | | | **Other Common Spreadsheet Output Formats** |:-----:|:-----:| | HTML Tables | ✔ | ✔ | | Rich Text Format tables (RTF) | | ✔ | @@ -44,6 +46,8 @@ range limits will be silently truncated: | Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 | | Excel 3.0 (XLS BIFF3) | IV16384 | 256 | 16384 | | Excel 2.0/2.1 (XLS BIFF2) | IV16384 | 256 | 16384 | +| Lotus 1-2-3 R2-R5 (WK1/WK3/WK4) | IV8192 | 256 | 8192 | +| Lotus 1-2-3 R1 (WKS) | IV2048 | 256 | 2048 | Excel 2003 SpreadsheetML range limits are governed by the version of Excel and are not enforced by the writer. @@ -180,6 +184,27 @@ BIFF8 XLS. +#### Works for DOS / Windows Spreadsheet (WKS/XLR) + +
+ (click to show) + +All versions of Works were limited to a single worksheet. + +Works for DOS 1.x - 3.x and Works for Windows 2.x extends the Lotus WKS format +with additional record types. + +Works for Windows 3.x - 5.x uses the same format and WKS extension. The BOF +record has type `FF` + +Works for Windows 6.x - 9.x use the XLR format. XLR is nearly identical to +BIFF8 XLS: it uses the CFB container with a Workbook stream. Works 9 saves the +exact Workbook stream for the XLR and the 97-2003 XLS export. Works 6 XLS +includes two empty worksheets but the main worksheet has an identical encoding. +XLR also includes a `WksSSWorkBook` stream similar to Lotus FM3/FMT files. + +
+ #### OpenDocument Spreadsheet (ODS/FODS)