docs: Move examples to Discussions

"reading from a buffer of an online PDF" --> https://github.com/adrienjoly/npm-pdfreader/discussions/categories/examples-of-use
adrienjoly · Mar 25, 2022 · 7dcf2e3 · 7dcf2e3
1 parent 4afea2c
commit 7dcf2e3
Showing 1 changed file with 19 additions and 140 deletions.
diff --git a/README.md b/README.md
@@ -54,7 +54,20 @@ new PdfReader().parseFileItems("test/sample.pdf", (err, item) => {
 });
 ```
 
-### Raw PDF reading from a PDF already in memory (buffer)
+### Parsing a password-protected PDF file
+
+```javascript
+new PdfReader({ password: "YOUR_PASSWORD" }).parseFileItems(
+  "test/sample-with-password.pdf",
+  function (err, item) {
+    if (err) console.error(err);
+    else if (!item) console.warn("end of file");
+    else if (item.text) console.log(item.text);
+  }
+);
+```
+
+### Raw PDF reading from a PDF buffer
 
 As above, but reading from a buffer in memory rather than from a file referenced by path. For example:
 
@@ -72,155 +85,21 @@ fs.readFile("test/sample.pdf", (err, pdfBuffer) => {
 });
 ```
 
-### Example: reading from a buffer of an online PDF
-
-```javascript
-const get = (url) =>
-  new Promise((resolve, reject) =>
-    https
-      .get(url, (res) => {
-        const data = [];
-        res
-          .on("data", (chunk) => data.push(chunk))
-          .on("end", () => resolve(Buffer.concat(data)));
-      })
-      .on("error", reject)
-  );
-
-function addTextToLines(textLines, item) {
-  const existingLine = textLines.find(({ y }) => y === item.y);
-  if (existingLine) {
-    existingLine.text += " " + item.text;
-  } else {
-    textLines.push(item);
-  }
-}
-
-const parseLinesPerPage = (buffer) =>
-  new Promise((resolve, reject) => {
-    const linesPerPage = [];
-    let pageNumber = 0;
-    new PdfReader().parseBuffer(buffer, (err, item) => {
-      if (err) reject(err);
-      else if (!item) {
-        resolve(linesPerPage.map((page) => page.map((line) => line.text)));
-      } else if (item.page) {
-        pageNumber = item.page - 1;
-        linesPerPage[pageNumber] = [];
-      } else if (item.text) {
-        addTextToLines(linesPerPage[pageNumber], item);
-      }
-    });
-  });
-
-const url = new URL(
-  "https://raw.githubusercontent.com/adrienjoly/npm-pdfreader/master/test/sample.pdf"
-);
-const buffer = get(url)
-  .then((buffer) => parseLinesPerPage(buffer))
-  .then((linesPerPage) => console.log(linesPerPage));
-```
-
-### Example: parsing lines of text from a PDF file
+### Other examples of use
 
 ![example cv resume parse convert pdf to text](https://github.com/adrienjoly/npm-pdfreader-example/raw/master/parseRows.png)
 
-Here is the code required to convert this PDF file into text:
-
-```js
-const { PdfReader } = require("pdfreader");
-
-let rows = {}; // indexed by y-position
-
-function flushRows() {
-  Object.keys(rows) // => array of y-positions (type: float)
-    .sort((y1, y2) => parseFloat(y1) - parseFloat(y2)) // sort float positions
-    .forEach((y) => console.log((rows[y] || []).join("")));
-  rows = {}; // clear rows for next page
-}
-
-new PdfReader().parseFileItems("test/sample.pdf", (err, item) => {
-  if (err) {
-    console.error({ err });
-  } else if (!item) {
-    flushRows();
-    console.log("END OF FILE");
-  } else if (item.page) {
-    flushRows(); // print the rows of the previous page
-    console.log("PAGE:", item.page);
-  } else if (item.text) {
-    // accumulate text items into rows object, per line
-    (rows[item.y] = rows[item.y] || []).push(item.text);
-  }
-});
-```
-
-Fork this example from [parsing a CV/résumé](https://github.com/adrienjoly/npm-pdfreader-example).
-
-### Example: parsing a table from a PDF file
-
 ![example cv resume parse convert pdf table to text](https://github.com/adrienjoly/npm-pdfreader-example/raw/master/parseTable.png)
 
-Here is the code required to convert this PDF file into a textual table:
-
-```js
-var pdfreader = require("pdfreader");
-
-const nbCols = 2;
-const cellPadding = 40; // each cell is padded to fit 40 characters
-const columnQuantitizer = (item) => parseFloat(item.x) >= 20;
-
-const padColumns = (array, nb) =>
-  Array.apply(null, { length: nb }).map((val, i) => array[i] || []);
-// .. because map() skips undefined elements
-
-const mergeCells = (cells) =>
-  (cells || [])
-    .map((cell) => cell.text)
-    .join("") // merge cells
-    .substr(0, cellPadding)
-    .padEnd(cellPadding, " "); // padding
-
-const renderMatrix = (matrix) =>
-  (matrix || [])
-    .map((row, y) => padColumns(row, nbCols).map(mergeCells).join(" | "))
-    .join("\n");
-
-var table = new pdfreader.TableParser();
-
-new pdfreader.PdfReader().parseFileItems(filename, function (err, item) {
-  if (!item || item.page) {
-    // end of file, or page
-    console.log(renderMatrix(table.getMatrix()));
-    console.log("PAGE:", item.page);
-    table = new pdfreader.TableParser(); // new/clear table for next page
-  } else if (item.text) {
-    // accumulate text items into rows object, per line
-    table.processItem(item, columnQuantitizer(item));
-  }
-});
-```
+Source code of the examples above: [parsing a CV/résumé](https://github.com/adrienjoly/npm-pdfreader-example).
 
-Fork this example from [parsing a CV/résumé](https://github.com/adrienjoly/npm-pdfreader-example).
-
-## Example: opening a PDF file with a password
-
-```javascript
-new PdfReader({ password: "YOUR_PASSWORD" }).parseFileItems(
-  "test/sample-with-password.pdf",
-  function (err, item) {
-    if (err) console.error(err);
-    else if (!item) console.warn("end of file");
-    else if (item.text) console.log(item.text);
-  }
-);
-```
+For more, see [Examples of use](https://github.com/adrienjoly/npm-pdfreader/discussions/categories/examples-of-use).
 
 ## Rule-based data extraction
 
-The Rule class can be used to define and process data extraction rules, while parsing a PDF document.
+The `Rule` class can be used to define and process data extraction rules, while parsing a PDF document.
 
-Rule instances expose "accumulators": methods that defines the data extraction strategy to be used for each rule.
+`Rule` instances expose "accumulators": methods that defines the data extraction strategy to be used for each rule.
 
 Example: