Skip to content
Snippets Groups Projects
analytics_helper.php 14.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • use CodeIgniter\Router\Exceptions\RouterException;
    use Config\Database;
    
    use Config\Services;
    use GeoIp2\Database\Reader;
    use Opawg\UserAgentsPhp\UserAgents;
    
    /**
     * @copyright  2020 Podlibre
     * @license    https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
     * @link       https://castopod.org/
     */
    
    
    if (! function_exists('base64_url_encode')) {
    
        function base64_url_encode(string $input): string
    
        {
            return strtr(base64_encode($input), '+/=', '._-');
        }
    }
    
    
    if (! function_exists('base64_url_decode')) {
    
        function base64_url_decode(string $input): string
    
            return base64_decode(strtr($input, '._-', '+/='), true);
    
    if (! function_exists('generate_episode_analytics_url')) {
    
         * Builds the episode analytics url that redirects to the audio file url after analytics hit.
    
         *
         * @throws RouterException
         */
        function generate_episode_analytics_url(
    
            int $audioFileSize,
            int $audioFileHeaderSize,
            \CodeIgniter\I18n\Time $publicationDate
        ): string {
    
            return url_to(
                'episode-analytics-hit',
                base64_url_encode(
                    pack(
                        'I*',
                        $podcastId,
                        $episodeId,
                        // bytes_threshold: number of bytes that must be downloaded for an episode to be counted in download analytics
    
                        // - if file is shorter than 60sec, then it's audio_file_size
                        // - if file is longer than 60 seconds then it's audio_file_header_size + 60 seconds
                        $audioFileDuration <= 60
    
                                floor((($audioFileSize - $audioFileHeaderSize) / $audioFileDuration) * 60),
    
    if (! function_exists('set_user_session_deny_list_ip')) {
    
        /**
         * Set user country in session variable, for analytic purposes
         */
    
                $session->set('denyListIp', IpDb::find($_SERVER['REMOTE_ADDR']) !== null);
    
    if (! function_exists('set_user_session_location')) {
    
        /**
         * Set user country in session variable, for analytic purposes
         */
    
            $session->start();
    
            $location = [
                'countryCode' => 'N/A',
                'regionCode' => 'N/A',
                'latitude' => null,
                'longitude' => null,
            ];
    
            // Finds location:
    
                    $cityReader = new Reader(WRITEPATH . 'uploads/GeoLite2-City/GeoLite2-City.mmdb');
    
                    $city = $cityReader->city($_SERVER['REMOTE_ADDR']);
    
                    $location = [
    
                        'countryCode' => $city->country->isoCode === null
    
                        'regionCode' => $city->subdivisions[0]->isoCode === null
    
                            ? 'N/A'
                            : $city->subdivisions[0]->isoCode,
                        'latitude' => round($city->location->latitude, 3),
                        'longitude' => round($city->location->longitude, 3),
                    ];
                    // If things go wrong the show must go on and the user must be able to download the file
    
    if (! function_exists('set_user_session_player')) {
    
        /**
         * Set user player in session variable, for analytic purposes
         */
    
                $playerFound = null;
                $userAgent = $_SERVER['HTTP_USER_AGENT'];
    
                try {
    
                    // If things go wrong the show must go on and the user must be able to download the file
    
                }
                if ($playerFound) {
                    $session->set('player', $playerFound);
                } else {
                    $session->set('player', [
                        'app' => '- unknown -',
                        'device' => '',
                        'os' => '',
                        'bot' => 0,
                    ]);
                    // Add to unknown list
                    try {
    
                        $procedureNameAnalyticsUnknownUseragents = $db->prefixTable('analytics_unknown_useragents');
                        $db->query("CALL {$procedureNameAnalyticsUnknownUseragents}(?)", [$userAgent]);
    
                        // If things go wrong the show must go on and the user must be able to download the file
    
    if (! function_exists('set_user_session_browser')) {
    
        /**
         * Set user browser in session variable, for analytic purposes
    
         *
         * FIXME: session key should be null instead of "Could not get browser name"
    
                    $browserName = $whichbrowser->browser->name;
    
                    $browserName = '- Could not get browser name -';
                }
    
                    $browserName = '- Could not get browser name -';
                }
                $session->set('browser', $browserName);
            }
        }
    }
    
    
    if (! function_exists('set_user_session_referer')) {
    
        /**
         * Set user referer in session variable, for analytic purposes
         */
    
            $session->start();
    
            $newreferer = isset($_SERVER['HTTP_REFERER'])
                ? $_SERVER['HTTP_REFERER']
                : '- Direct -';
            $newreferer =
    
                parse_url(current_url(false), PHP_URL_HOST)
                    ? '- Direct -'
                    : $newreferer;
    
            if (! $session->has('referer') || $newreferer !== '- Direct -') {
    
    if (! function_exists('set_user_session_entry_page')) {
    
        /**
         * Set user entry page in session variable, for analytic purposes
         */
    
            $session->start();
    
            $entryPage = $_SERVER['REQUEST_URI'];
    
    if (! function_exists('podcast_hit')) {
    
         * Counting podcast episode downloads for analytic purposes ✅ No IP address is ever stored on the server. ✅ Only
         * aggregate data is stored in the database. We follow IAB Podcast Measurement Technical Guidelines Version 2.0:
         * https://iabtechlab.com/standards/podcast-measurement-guidelines/
         * https://iabtechlab.com/wp-content/uploads/2017/12/Podcast_Measurement_v2-Dec-20-2017.pdf ✅ Rolling 24-hour
         * window ✅ Castopod does not do pre-load ✅ IP deny list https://github.com/client9/ipcat ✅ User-agent
         * Filtering https://github.com/opawg/user-agents ✅ RSS User-agent https://github.com/opawg/podcast-rss-useragents
         * ✅ Ignores 2 bytes range "Range: 0-1"  (performed by official Apple iOS Podcast app) ✅ In case of partial
         * content, adds up all requests to check >1mn was downloaded ✅ Identifying Uniques is done with a combination of
         * IP Address and User Agent
         *
    
         * @param integer $podcastId The podcast ID
         * @param integer $episodeId The Episode ID
         * @param integer $bytesThreshold The minimum total number of bytes that must be downloaded so that an episode is counted (>1mn)
         * @param integer $fileSize The podcast complete file size
    
         * @param double $duration The episode duration in seconds
    
         * @param int $publicationTime The episode's publication time as a UNIX timestamp
    
         * @param string $serviceName The name of the service that had fetched the RSS feed
         */
        function podcast_hit(
    
            int $podcastId,
            int $episodeId,
            int $bytesThreshold,
            int $fileSize,
    
            string $serviceName
        ): void {
            $session = Services::session();
    
            $session->start();
    
            // We try to count (but if things went wrong the show should go on and the user should be able to download the file):
            try {
                // If the user IP is denied it's probably a bot:
                if ($session->get('denyListIp')) {
                    $session->get('player')['bot'] = true;
                }
                //We get the HTTP header field `Range`:
                $httpRange = isset($_SERVER['HTTP_RANGE'])
                    ? $_SERVER['HTTP_RANGE']
                    : null;
    
                // We create a sha1 hash for this IP_Address+User_Agent+Episode_ID (used to count only once multiple episode downloads):
                $episodeHashId =
                    '_IpUaEp_' .
    
                    sha1($_SERVER['REMOTE_ADDR'] . '_' . $_SERVER['HTTP_USER_AGENT'] . '_' . $episodeId);
    
                // Was this episode downloaded in the past 24h:
                $downloadedBytes = cache($episodeHashId);
                // Rolling window is 24 hours (86400 seconds):
                $rollingTTL = 86400;
                if ($downloadedBytes) {
                    // In case it was already downloaded, TTL should be adjusted (rolling window is 24h since 1st download):
                    $rollingTTL =
    
                        cache()
                            ->getMetadata($episodeHashId)['expire'] - time();
    
                } else {
                    // If it was never downloaded that means that zero byte were downloaded:
                    $downloadedBytes = 0;
                }
                // If the number of downloaded bytes was previously below the 1mn threshold we go on:
                // (Otherwise it means that this was already counted, therefore we don't do anything)
                if ($downloadedBytes < $bytesThreshold) {
                    // If HTTP_RANGE is null we are downloading the complete file:
    
                    } elseif ($httpRange !== 'bytes=0-1') {
    
                        // [0-1] bytes range requests are used (by Apple) to check that file exists and that 206 partial content is working.
    
                        // We don't count these requests.
                        // We calculate how many bytes are being downloaded based on HTTP_RANGE values:
                        $ranges = explode(',', substr($httpRange, 6));
                        foreach ($ranges as $range) {
                            $parts = explode('-', $range);
    
                            $downloadedBytes += array_key_exists(1, $parts)
    
                                    (array_key_exists(0, $parts) ? 0 : (int) $parts[0]);
    
                        }
                    }
                    // We save the number of downloaded bytes for this user and this episode:
    
                    cache()
                        ->save($episodeHashId, $downloadedBytes, $rollingTTL);
    
    
                    // If more that 1mn was downloaded, that's a hit, we send that to the database:
                    if ($downloadedBytes >= $bytesThreshold) {
    
                        $procedureName = $db->prefixTable('analytics_podcasts');
    
    
                        $age = intdiv(time() - $publicationTime, 86400);
    
    
                        // We create a sha1 hash for this IP_Address+User_Agent+Podcast_ID (used to count unique listeners):
                        $listenerHashId =
                            '_IpUaPo_' .
    
                            sha1($_SERVER['REMOTE_ADDR'] . '_' . $_SERVER['HTTP_USER_AGENT'] . '_' . $podcastId);
    
                        $newListener = 1;
                        // Has this listener already downloaded an episode today:
                        $downloadsByUser = cache($listenerHashId);
                        // We add one download
                        if ($downloadsByUser) {
                            $newListener = 0;
    
                        } else {
                            $downloadsByUser = 1;
                        }
                        // Listener count is calculated from 00h00 to 23h59:
                        $midnightTTL = strtotime('tomorrow') - time();
                        // We save the download count for this user until midnight:
    
                            ->save($listenerHashId, $downloadsByUser, $midnightTTL);
    
                            "CALL {$procedureName}(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);",
    
                            [
                                $podcastId,
                                $episodeId,
                                $session->get('location')['countryCode'],
                                $session->get('location')['regionCode'],
                                $session->get('location')['latitude'],
                                $session->get('location')['longitude'],
                                $serviceName,
                                $session->get('player')['app'],
                                $session->get('player')['device'],
                                $session->get('player')['os'],
                                $session->get('player')['bot'],
                                $fileSize,
                                $duration,
                                $age,
                                $newListener,
                            ],
                        );
                    }
                }
    
                // If things go wrong the show must go on and the user must be able to download the file
    
                log_message('critical', $exception->getMessage());