From 4274cb5d2142c0f5951670cd9c5d692ad21c53d6 Mon Sep 17 00:00:00 2001
From: Benjamin Bellamy <ben@podlibre.org>
Date: Fri, 1 Apr 2022 13:42:17 +0000
Subject: [PATCH] docs(gdpr.txt): add purpose block for analytics data

---
 GDPR.txt                    | 23 +++++++++++++++++++++++
 public/.well-known/GDPR.yml | 24 ++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/GDPR.txt b/GDPR.txt
index 9cce01c3d5..f8f7f13c83 100644
--- a/GDPR.txt
+++ b/GDPR.txt
@@ -6,6 +6,29 @@
 # in particular. As a hosting provider, you must inform your users of their
 # rights and how their data are used and protected.
 
+purpose:
+    Deduplicate number of audio file downloads made by the same listener
+    for analytics purposes
+lawfulness: legitimate interest
+
+data: (User IP address + Browser User Agent)
+required: yes
+visibility: none
+description:
+    In order to produce analytics data comparable to the podcasting
+    ecosystem standards, the User IP address (REMOTE_ADDR) with the
+    browser User Agent (HTTP_USER_AGENT) are stored when an audio file
+    is downloaded.
+mitigation:
+    The data (User IP address + Browser User Agent) is never stored in plain
+    format.
+    The data is concatenated with a cryptographic salt, the current date,
+    and the podcast or episode IDs.
+    The data is hashed (using sha1) after being concatenated and before
+    being stored.
+    The data is stored in a cache database (eg. Redis).
+    The data expires every day at midnight (server time).
+
 purpose: Connect users to their accounts
 lawfulness: legitimate interest
 
diff --git a/public/.well-known/GDPR.yml b/public/.well-known/GDPR.yml
index 8ef0206382..40c822bafb 100644
--- a/public/.well-known/GDPR.yml
+++ b/public/.well-known/GDPR.yml
@@ -7,6 +7,30 @@
 # rights and how their data are used and protected.
 
 purposes:
+  - description: |
+      Deduplicate number of audio file downloads made by the same listener for
+      analytics purposes
+    lawfulness: legitimate interest
+    data:
+      - field: (User IP address + Browser User Agent)
+        required: yes
+        visibility: none
+        description: |
+          In order to produce analytics data comparable to the podcasting
+          ecosystem standards, the User IP address (REMOTE_ADDR) with the
+          browser User Agent (HTTP_USER_AGENT) are stored when an audio file
+          is downloaded.
+        mitigation: |
+          The data (User IP address + Browser User Agent) is never stored in
+          plain format.
+          The data is concatenated with a cryptographic salt, the current date,
+          and the podcast or episode IDs.
+          The data is hashed (using sha1) after being concatenated and before
+          being stored.
+          The data is stored in a cache database (eg. Redis).
+          The data expires every day at midnight (server time).
+        retention: 24 hours maximum
+
   - description: Connect users to their accounts
     lawfulness: legitimate interest
     data:
-- 
GitLab