Merge pull request #17 from timed-and-secured-assets/watermark

Invisible Watermarking
timed-and-secured-assets · Feb 18, 2024 · 0b4465c · 0b4465c
2 parents eb28c87 + a4fecc4
commit 0b4465c
Show file tree

Hide file tree

Showing 14 changed files with 1,276 additions and 82 deletions.
diff --git a/.gitignore b/.gitignore
@@ -35,3 +35,4 @@ webroot/assets/lib/bower_components
 /bin/migra.php
 /*.key
 docker/db/*
+.idea/
diff --git a/Dockerfile b/Dockerfile
@@ -6,10 +6,10 @@
 FROM ubuntu:23.04
 
 # Update apt repository
-RUN apt update
+RUN apt-get update
 
 # Install dependencies
-RUN apt install -y git php apache2 mysql-server composer php-curl php-fpm php-mbstring php-mysql php-zip php-xml php-gd php-intl php-xml pandoc
+RUN apt-get install -y --fix-missing git php apache2 mysql-server composer php-curl php-fpm php-mbstring php-mysql php-zip php-xml php-gd php-intl php-xml pandoc libgl1-mesa-glx
 
 #Composer error
 ENV COMPOSER_ALLOW_SUPERUSER=1
@@ -25,11 +25,18 @@ RUN echo "<Directory /var/www/html>" >> /etc/apache2/sites-enabled/000-default.c
     echo "    allow from all" >> /etc/apache2/sites-enabled/000-default.conf && \
     echo "</Directory>" >> /etc/apache2/sites-enabled/000-default.conf
 
+RUN apt-get install python3-pip -y
+
 # Copy formr source into the container
 # TODO: Think about another way to avoid a rebuild for changes to apply. Currently a volume 
 # mount is problematic because we create a symlink to the webroot directory in the build step.
 COPY . /var/www/formr.org/
 
+# Install python dependencies (Without venv to avoid issues with exec, ignore the crazy flag)
+RUN cd /var/www/formr.org/scripts/watermark/ && \
+    pip install --upgrade pip --break-system-packages \
+    pip install -r requirements.txt --break-system-packages
+
 # Create symbolic link to webroot
 RUN ln -s /var/www/formr.org/webroot /var/www/html/formr
 

diff --git a/application/Model/Run.php b/application/Model/Run.php
@@ -62,6 +62,8 @@ class Run extends Model {
         'months' => 'Months',
         'years' => 'Years',
     );
+    public $watermark_method = 0;
+    public $watermark_content = "";
     protected $description_parsed = null;
     protected $footer_text_parsed = null;
     protected $public_blurb_parsed = null;
@@ -78,6 +80,7 @@ class Run extends Model {
         "use_material_design", "expire_cookie",
         "expire_cookie_value", "expire_cookie_unit",
         "expiresOn",
+        "watermark_method", "watermark_content",
     );
     public $renderedDescAndFooterAlready = false;
     public $expiresOn = null;
@@ -117,7 +120,7 @@ protected function load() {
             return;
         }
 
-        $columns = "id, user_id, created, modified, name, api_secret_hash, public, cron_active, cron_fork, locked, header_image_path, title, description, description_parsed, footer_text, footer_text_parsed, public_blurb, public_blurb_parsed, privacy, privacy_parsed, tos, tos_parsed, imprint, imprint_parsed, custom_css_path, custom_js_path, osf_project_id, use_material_design, expire_cookie, expiresOn";
+        $columns = "id, user_id, created, modified, name, api_secret_hash, public, cron_active, cron_fork, locked, header_image_path, title, description, description_parsed, footer_text, footer_text_parsed, public_blurb, public_blurb_parsed, privacy, privacy_parsed, tos, tos_parsed, imprint, imprint_parsed, custom_css_path, custom_js_path, osf_project_id, use_material_design, expire_cookie, expiresOn, watermark_method, watermark_content";
         $where = $this->id ? array('id' => $this->id) : array('name' => $this->name);
         $vars = $this->db->findRow('survey_runs', $where, $columns);
 
@@ -233,6 +236,8 @@ public function create($options) {
             'public' => 0,
             'footer_text' => "Remember to add your contact info here! Contact the [study administration](mailto:[email protected]) in case of questions.",
             'footer_text_parsed' => "Remember to add your contact info here! Contact the <a href='mailto:[email protected]'>study administration</a> in case of questions.",
+            'watermark_method' => "none",
+            'watermark_content' => "formr.org",
         ));
         $this->id = $this->db->pdo()->lastInsertId();
         $this->name = $name;
@@ -303,9 +308,47 @@ public function uploadFiles($files) {
                 $new_file_path = 'assets/tmp/admin/' . crypto_token(33, true) . $this->file_endings[$mime];
             }
 
+            $fileSaved = false;
+
             // save file
-            $destination_dir = APPLICATION_ROOT . 'webroot/' . $new_file_path;
-            if (move_uploaded_file($files['tmp_name'][$i], $destination_dir)) {
+            $destination_path = APPLICATION_ROOT . 'webroot/' . $new_file_path;
+            if (move_uploaded_file($files['tmp_name'][$i], $destination_path)) {
+                $fileSaved = true;
+            } else {
+                $this->errors[] = __("Unable to move uploaded file '%s' to storage location.", $files['name'][$i]);
+            }
+
+            if($fileSaved) {
+                // generate watermark version of image via python script if enabled in settings
+                if (($mime === 'image/jpeg' || $mime === 'image/png' || $mime === 'image/gif') && $this->watermark_method != "none") {
+                    $scriptPath = '/var/www/formr.org/scripts/watermark/main.py';
+                    $originalImage = escapeshellarg($destination_path);
+                    $watermarkedImage = $destination_path; // overwrite original file
+                    $content = escapeshellarg($this->watermark_content);
+
+                    // watermarking method, default to sift method
+                    $method = "sift";
+                    if(str_contains($this->watermark_method, "blind")) {
+                        $method = "blind";
+                    }
+
+                    $cmd = "/usr/bin/python3 $scriptPath embed -i $originalImage -o " . escapeshellarg($watermarkedImage) . " -w '$content' -m '$method'";
+                    exec($cmd, $output, $return_var);
+
+                    // cli returns 0 if successful, 1 if failed and 2 lines of output if successful
+                    if ($return_var === 0 && count($output) === 2) {
+                        // watermarking was successful
+                        $watermark_content = $output[1];
+
+                        // write additional watermark data to file (used for later detection of the watermark)
+                        $file = fopen($watermarkedImage . "_watermarkdata", 'w');
+                        fwrite($file, $watermark_content);
+                        fclose($file);
+                    } else {
+                        // watermarking failed
+                        $this->errors[] = __("Unable to watermark uploaded file '%s'.", $files['name'][$i]);
+                    }
+                }
                 $this->db->insert_update('survey_uploaded_files', array(
                     'run_id' => $this->id,
                     'created' => mysql_now(),
@@ -314,8 +357,6 @@ public function uploadFiles($files) {
                         ), array(
                     'modified' => mysql_now()
                 ));
-            } else {
-                $this->errors[] = __("Unable to move uploaded file '%s' to storage location.", $files['name'][$i]);
             }
         }
 

diff --git a/docker/init.sql b/docker/init.sql
@@ -191,6 +191,8 @@ CREATE TABLE `survey_runs` (
   `cron_fork` tinyint(3) unsigned NOT NULL DEFAULT '1',
   `use_material_design` tinyint(1) NOT NULL DEFAULT '0',
   `expire_cookie` INT UNSIGNED NOT NULL DEFAULT '0',
+  `watermark_method` ENUM('none', 'only_visible', 'only_sift', 'only_blind', 'visible_and_sift', 'visible_and_blind') NOT NULL DEFAULT 'none',
+  `watermark_content` varchar(255) NOT NULL DEFAULT "formr.org";
   PRIMARY KEY (`id`),
   KEY `fk_runs_survey_users1_idx` (`user_id`),
   KEY `fk_survey_runs_survey_units1_idx` (`reminder_email`),

diff --git a/scripts/watermark/README.md b/scripts/watermark/README.md
@@ -0,0 +1,79 @@
+# Image Watermarking - Python
+
+With this tool, watermarks that are not visible to humans can be embedded or extracted from images. This can be used,
+for example, to
+prove ownership of the image.
+
+A user-friendly Command Line Interface (CLI) is available for use.
+
+The tool currently offers two methods for embedding/extracting:
+
+* Blind Watermark
+* SIFT Watermark
+
+## Setup
+
+Make sure that [Python 3.10](https://www.python.org/downloads/) or newer is installed.
+
+Run the following commands:
+
+```bash
+python3 -m venv venv
+```
+
+```bash
+source venv/bin/activate
+```
+
+```bash
+python3 -m pip install --upgrade pip
+```
+
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+## Embed watermark
+
+To embed a watermark, run the following command:
+
+Blind watermark:
+
+```bash
+python3 main.py embed -i path/to/image.png -o path/to/output.png -w "watermark" -m "blind"
+```
+
+SIFT watermark:
+
+```bash
+python3 main.py embed -i path/to/image.png -o path/to/output.png -w "watermark" -m "sift"
+```
+
+If successful, the watermarked image will be saved at the output path and the output path and a watermark key will be
+printed. This watermark key is method-specific and needed for the extraction.
+
+## Extract watermark
+
+To extract a watermark, run the following command:
+
+Blind watermark:
+
+```bash
+python3 main.py extract -i path/to/image.png -m "blind" -w "21" -o path/to/output.png
+```
+
+SIFT watermark:
+
+```bash
+python3 main.py extract -i path/to/image.png -m "sift" -w "[...]"
+```
+
+Use the method-specific watermark key as -w. If successful, the detected watermark(s) will be printed.
+
+## Help
+
+To print the help page, run the following command:
+
+```bash
+ python3 main.py -h
+```
diff --git a/scripts/watermark/blind_watermark.py b/scripts/watermark/blind_watermark.py
@@ -0,0 +1,109 @@
+import pathlib
+import math
+
+import cv2
+import qrcode
+import numpy as np
+
+import blind_watermark_core
+
+
+def embed_watermark(image_path, watermark_text):
+    """
+    Embed a watermark text into an image.
+
+    The embedding process uses a QR-Code to embed the text as an image and make it more robust.
+    The QR-Code image is then embedded using a robust and blind watermark algorithm using DWT, DCT and SVD.
+
+    :param image_path: The path to the image to add the watermark to.
+    :param watermark_text: The watermark text to embed.
+    :return: The watermarked image, the watermark key needed to extract the watermark.
+    """
+    # Create image path and check if the file exists
+    image_path = pathlib.Path(image_path)
+    if not image_path.exists():
+        raise FileNotFoundError(f"{image_path} does not exist")
+
+    # Read image and check if the image was read successfully
+    image = cv2.imread(str(image_path), flags=cv2.IMREAD_UNCHANGED)
+    if image is None:
+        raise IOError(f"Could not read image at {image_path}")
+
+    # Define QR-Code with maximum error correction and create it using the watermark text
+    # Make the QR-Code as small as possible by setting version=None, box_size=1, border=0 and fit=True
+    qr = qrcode.QRCode(version=None, error_correction=qrcode.constants.ERROR_CORRECT_H, box_size=1, border=0)
+    qr.add_data(watermark_text)
+    qr.make(fit=True)
+    qr_code = qr.make_image(fill_color=(0, 0, 0), back_color=(255, 255, 255))
+
+    # Convert the QR-Code to a grayscale CV2 image
+    watermark = cv2.cvtColor(np.array(qr_code), cv2.COLOR_RGB2GRAY)
+
+    # Calculate the minimal width and height to store the watermark 4 times in the image
+    # Respect the original aspect ratio and block shape used in the embedding
+    watermark_size = (watermark.shape[0] + 1) * (watermark.shape[1] + 1) * 4
+    min_width = math.sqrt(watermark_size * (image.shape[0] / image.shape[1]))
+    min_height = watermark_size / min_width
+    min_width = math.ceil(min_width) * 4 * 2
+    min_height = math.ceil(min_height) * 4 * 2
+
+    # If the image is smaller than the minimal width and height, resize it to these dimensions
+    if image.shape[0] < min_height or image.shape[1] < min_width:
+        image = cv2.resize(image, (min_height, min_width), interpolation=cv2.INTER_LINEAR)
+
+    # Embed the watermark into the image
+    embedded_image = blind_watermark_core.embed_watermark(image, watermark)
+
+    return embedded_image, watermark.shape[0]
+
+
+def extract_watermark(image_path, watermark_key, output_path=None):
+    """
+    Extract a watermark text from an image.
+
+    The extraction process extracts a QR-Code from the image which stores the watermark text.
+    The QR-Code image is extracted using an inverse robust and blind watermark algorithm using DWT, DCT and SVD.
+
+    If an output_path is given the extracted QR-Code is saved to this path.
+
+    :param image_path: The path to the image to extract the watermark from.
+    :param watermark_key: The watermark key returned by the embedding process.
+    :param output_path: The path where to store extracted QR-Code (optional).
+    :return: The watermark text, the used output path.
+    """
+    # Create image path and check if the file exists
+    image_path = pathlib.Path(image_path)
+    if not image_path.exists():
+        raise FileNotFoundError(f"{image_path} does not exist")
+
+    # Read image and check if the image was read successfully
+    image = cv2.imread(str(image_path), flags=cv2.IMREAD_UNCHANGED)
+    if image is None:
+        raise IOError(f"Could not read image at {image_path}")
+
+    # Extract the watermark from the image
+    watermark = blind_watermark_core.extract_watermark(image, (watermark_key, watermark_key))
+
+    # Convert the extracted watermark image to a standard QR-Code
+    # Therefore convert to BW, add a border of 4 boxes and use a box size of 10
+    watermark = np.where(watermark > 127, 255, 0).astype(np.uint8)
+    watermark = np.pad(watermark, 4, constant_values=255)
+    watermark = np.repeat(watermark, 10, axis=0)
+    watermark = np.repeat(watermark, 10, axis=1)
+
+    # If an output path is given, write the QR-Code image to that path
+    if output_path is not None:
+        output_path = pathlib.Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path = str(output_path)
+        successful = cv2.imwrite(output_path, img=watermark)
+        if not successful:
+            raise IOError(f"Could not write image to {output_path}")
+
+    # Detect and decode the QR-Code data from the QR-Code image and check if the QR-Code was detected successfully
+    detector = cv2.QRCodeDetector()
+    data, vertices_array, _ = detector.detectAndDecode(watermark)
+    if vertices_array is None:
+        raise Exception("Could not detect QR-Code")
+
+    return data, output_path