Skip to content
Snippets Groups Projects
analytics_helper.php 11 KiB
Newer Older
  • Learn to ignore specific revisions
  • /**
     * @copyright  2020 Podlibre
     * @license    https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
     * @link       https://castopod.org/
     */
    
    
    /**
     * For compatibility with PHP-FPM v7.2 and below:
     */
    if (!function_exists('getallheaders')) {
        function getallheaders()
        {
            $headers = [];
            foreach ($_SERVER as $name => $value) {
                if (substr($name, 0, 5) == 'HTTP_') {
                    $headers[
                        str_replace(
                            ' ',
                            '-',
                            ucwords(
                                strtolower(str_replace('_', ' ', substr($name, 5)))
                            )
                        )
                    ] = $value;
                }
            }
            return $headers;
        }
    }
    
    
    /**
     * Set user country in session variable, for analytics purpose
     */
    
    function set_user_session_deny_list_ip()
    
    {
        $session = \Config\Services::session();
        $session->start();
    
    
        if (!$session->has('denyListIp')) {
            $session->set(
                'denyListIp',
                \Podlibre\Ipcat\IpDb::find($_SERVER['REMOTE_ADDR']) != null
            );
        }
    }
    
    /**
     * Set user country in session variable, for analytics purpose
     */
    function set_user_session_location()
    {
        $session = \Config\Services::session();
        $session->start();
    
        $location = [
            'countryCode' => 'N/A',
            'regionCode' => 'N/A',
            'latitude' => null,
            'longitude' => null,
        ];
    
        // Finds location:
        if (!$session->has('location')) {
    
                $cityReader = new \GeoIp2\Database\Reader(
                    WRITEPATH . 'uploads/GeoLite2-City/GeoLite2-City.mmdb'
    
                $city = $cityReader->city($_SERVER['REMOTE_ADDR']);
    
                $location = [
                    'countryCode' => empty($city->country->isoCode)
                        ? 'N/A'
                        : $city->country->isoCode,
                    'regionCode' => empty($city->subdivisions[0]->isoCode)
                        ? 'N/A'
                        : $city->subdivisions[0]->isoCode,
                    'latitude' => round($city->location->latitude, 3),
                    'longitude' => round($city->location->longitude, 3),
                ];
    
            } catch (\Exception $e) {
                // If things go wrong the show must go on and the user must be able to download the file
            }
    
            $session->set('location', $location);
    
        }
    }
    
    /**
     * Set user player in session variable, for analytics purpose
     */
    function set_user_session_player()
    {
        $session = \Config\Services::session();
        $session->start();
    
        if (!$session->has('player')) {
            $session = \Config\Services::session();
            $session->start();
    
    
            $playerFound = null;
            $userAgent = $_SERVER['HTTP_USER_AGENT'];
    
                $playerFound = \Podlibre\UserAgentsPhp\UserAgents::find($userAgent);
    
            } catch (\Exception $e) {
                // If things go wrong the show must go on and the user must be able to download the file
            }
    
            if ($playerFound) {
                $session->set('player', $playerFound);
            } else {
                $session->set('player', [
                    'app' => '- unknown -',
                    'device' => '',
                    'os' => '',
                    'bot' => 0,
                ]);
    
                // Add to unknown list
                try {
                    $db = \Config\Database::connect();
    
                    $procedureNameAnalyticsUnknownUseragents = $db->prefixTable(
    
                        'analytics_unknown_useragents'
                    );
    
                    $db->query("CALL $procedureNameAnalyticsUnknownUseragents(?)", [
                        $userAgent,
                    ]);
    
                } catch (\Exception $e) {
                    // If things go wrong the show must go on and the user must be able to download the file
                }
            }
        }
    }
    
    /**
     * Set user browser in session variable, for analytics purpose
     */
    function set_user_session_browser()
    {
        $session = \Config\Services::session();
        $session->start();
    
        if (!$session->has('browser')) {
            $browserName = '- Other -';
            try {
                $whichbrowser = new \WhichBrowser\Parser(getallheaders());
                $browserName = $whichbrowser->browser->name;
            } catch (\Exception $e) {
                $browserName = '- Could not get browser name -';
            }
            if ($browserName == null) {
                $browserName = '- Could not get browser name -';
            }
            $session->set('browser', $browserName);
        }
    }
    
    /**
     * Set user referer in session variable, for analytics purpose
     */
    function set_user_session_referer()
    {
        $session = \Config\Services::session();
        $session->start();
    
        $newreferer = isset($_SERVER['HTTP_REFERER'])
    
            ? $_SERVER['HTTP_REFERER']
    
            : '- Direct -';
        $newreferer =
    
            parse_url($newreferer, PHP_URL_HOST) ==
            parse_url(current_url(false), PHP_URL_HOST)
    
                ? '- Direct -'
                : $newreferer;
        if (!$session->has('referer') or $newreferer != '- Direct -') {
            $session->set('referer', $newreferer);
        }
    }
    
    
    /**
     * Set user entry page in session variable, for analytics purpose
     */
    function set_user_session_entry_page()
    {
        $session = \Config\Services::session();
        $session->start();
    
        $entryPage = $_SERVER['REQUEST_URI'];
        if (!$session->has('entryPage')) {
            $session->set('entryPage', $entryPage);
        }
    }
    
    
    function webpage_hit($podcast_id)
    
    {
        $session = \Config\Services::session();
        $session->start();
    
    
        if (!$session->get('denyListIp')) {
            $db = \Config\Database::connect();
    
            $referer = $session->get('referer');
            $domain = empty(parse_url($referer, PHP_URL_HOST))
                ? null
                : parse_url($referer, PHP_URL_HOST);
            parse_str(parse_url($referer, PHP_URL_QUERY), $queries);
            $keywords = empty($queries['q']) ? null : $queries['q'];
    
            $procedureName = $db->prefixTable('analytics_website');
            $db->query("call $procedureName(?,?,?,?,?,?)", [
                $podcast_id,
                $session->get('browser'),
                $session->get('entryPage'),
                $referer,
                $domain,
                $keywords,
            ]);
        }
    
    /**
     * Counting podcast episode downloads for analytics purposes
     * ✅ No IP address is ever stored on the server.
     * ✅ Only aggregate data is stored in the database.
     * We follow IAB Podcast Measurement Technical Guidelines Version 2.0:
     *   https://iabtechlab.com/standards/podcast-measurement-guidelines/
     *   https://iabtechlab.com/wp-content/uploads/2017/12/Podcast_Measurement_v2-Dec-20-2017.pdf
     *   ✅ Rolling 24-hour window
     *   ✅ Castopod does not do pre-load
     *   ✅ IP deny list https://github.com/client9/ipcat
     *   ✅ User-agent Filtering https://github.com/opawg/user-agents
     *   ✅ Ignores 2 bytes range "Range: 0-1"  (performed by official Apple iOS Podcast app)
     *   ✅ In case of partial content, adds up all requests to check >1mn was downloaded
     *   ✅ Identifying Uniques is done with a combination of IP Address and User Agent
     * @param int $podcastId The podcast ID
     * @param int $episodeId The Episode ID
     * @param int $bytesThreshold The minimum total number of bytes that must be downloaded so that an episode is counted (>1mn)
     * @param int $fileSize The podcast complete file size
     *
     * @return void
     */
    function podcast_hit($podcastId, $episodeId, $bytesThreshold, $fileSize)
    
    {
        $session = \Config\Services::session();
        $session->start();
    
    
        // We try to count (but if things went wrong the show should go on and the user should be able to download the file):
        try {
            // If the user IP is denied it's probably a bot:
            if ($session->get('denyListIp')) {
                $session->get('player')['bot'] = true;
            }
            $httpRange = $_SERVER['HTTP_RANGE'];
            // We create a sha1 hash for this IP_Address+User_Agent+Episode_ID:
            $hashID =
                '_IpUaEp_' .
                sha1(
                    $_SERVER['REMOTE_ADDR'] .
                        '_' .
                        $_SERVER['HTTP_USER_AGENT'] .
                        '_' .
                        $episodeId
                );
            // Was this episode downloaded in the past 24h:
            $downloadedBytes = cache($hashID);
            // Rolling window is 24 hours (86400 seconds):
            $ttl = 86400;
            if ($downloadedBytes) {
                // In case it was already downloaded, TTL should be adjusted (rolling window is 24h since 1st download):
                $ttl = cache()->getMetadata($hashID)['expire'] - time();
    
                // If it was never downloaded that means that zero byte were downloaded:
                $downloadedBytes = 0;
    
            // If the number of downloaded bytes was previously below the 1mn threshold we go on:
            // (Otherwise it means that this was already counted, therefore we don't do anything)
            if ($downloadedBytes < $bytesThreshold) {
                // If HTTP_RANGE is null we are downloading the complete file:
                if (!isset($httpRange)) {
                    $downloadedBytes = $fileSize;
                } else {
                    // [0-1] bytes range requests are used (by Apple) to check that file exists and that 206 partial content is working.
                    // We don't count these requests:
                    if ($httpRange != 'bytes=0-1') {
                        // We calculate how many bytes are being downloaded based on HTTP_RANGE values:
                        $ranges = explode(',', substr($httpRange, 6));
                        foreach ($ranges as $range) {
                            $parts = explode('-', $range);
                            $downloadedBytes += empty($parts[1])
                                ? $fileSize
                                : $parts[1] - (empty($parts[0]) ? 0 : $parts[0]);
                        }
                    }
                }
                // We save the number of downloaded bytes for this user and this episode:
                cache()->save($hashID, $downloadedBytes, $ttl);
    
                // If more that 1mn was downloaded, we send that to the database:
                if ($downloadedBytes >= $bytesThreshold) {
                    $db = \Config\Database::connect();
                    $procedureName = $db->prefixTable('analytics_podcasts');
    
                    $app = $session->get('player')['app'];
                    $device = $session->get('player')['device'];
                    $os = $session->get('player')['os'];
                    $bot = $session->get('player')['bot'];
    
                    $db->query("CALL $procedureName(?,?,?,?,?,?,?,?,?,?);", [
                        $podcastId,
                        $episodeId,
                        $session->get('location')['countryCode'],
                        $session->get('location')['regionCode'],
                        $session->get('location')['latitude'],
                        $session->get('location')['longitude'],
                        $app == null ? '' : $app,
                        $device == null ? '' : $device,
                        $os == null ? '' : $os,
                        $bot == null ? 0 : $bot,
                    ]);
                }
    
        } catch (\Exception $e) {
            // If things go wrong the show must go on and the user must be able to download the file