Commit 3a492581 authored by Benjamin Bellamy's avatar Benjamin Bellamy 💬 Committed by Yassine Doghri
Browse files

feat: add unique listeners analytics

- add unique listener
- add some charts
- correct minor bugs
parent 9660aa97
......@@ -121,6 +121,14 @@ $routes->group(
'as' => 'podcast-analytics',
'filter' => 'permission:podcasts-view,podcast-view',
]);
$routes->get(
'analytics-data/(:segment)',
'AnalyticsData::getData/$1/$2',
[
'as' => 'analytics-full-data',
'filter' => 'permission:podcasts-view,podcast-view',
]
);
$routes->get(
'analytics-data/(:segment)/(:segment)',
'AnalyticsData::getData/$1/$2/$3',
......
......@@ -23,14 +23,15 @@ class AnalyticsData extends BaseController
public function _remap($method, ...$params)
{
if (count($params) > 2) {
if (count($params) > 1) {
if (!($this->podcast = (new PodcastModel())->find($params[0]))) {
throw \CodeIgniter\Exceptions\PageNotFoundException::forPageNotFound(
'Podcast not found: ' . $params[0]
);
}
$this->className = '\App\Models\Analytics' . $params[1] . 'Model';
$this->methodName = 'getData' . $params[2];
$this->methodName =
'getData' . (empty($params[2]) ? '' : $params[2]);
if (count($params) > 3) {
if (
!($this->episode = (new EpisodeModel())
......
<?php
/**
* Class AddAnalyticsPodcastsByCountry
* Creates analytics_podcasts_by_country table in database
* Class AddAnalyticsPodcasts
* Creates analytics_podcasts table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
......@@ -30,6 +30,11 @@ class AddAnalyticsPodcasts extends Migration
'constraint' => 10,
'default' => 1,
],
'unique_listeners' => [
'type' => 'INT',
'constraint' => 10,
'default' => 1,
],
]);
$this->forge->addPrimaryKey(['podcast_id', 'date']);
$this->forge->addField(
......
<?php
/**
* Class AddAnalyticsEpisodesByCountry
* Creates analytics_episodes_by_country table in database
* Class AddAnalyticsPodcastsByEpisode
* Creates analytics_episodes_by_episode table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
......
<?php
/**
* Class AddAnalyticsWebsiteByReferer
* Creates analytics_website_by_referer table in database
* Class AddAnalyticsWebsiteByEntryPage
* Creates analytics_website_by_entry_page table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
......
......@@ -17,7 +17,7 @@ class AddAnalyticsPodcastsStoredProcedure extends Migration
public function up()
{
// Creates Stored Procedure for data insertion
// Example: CALL analytics_podcasts(1,2,'FR','phone/android/Deezer');
// Example: CALL analytics_podcasts(1, 2, 'FR', 'IDF', 48.853, 2.349, PodcastAddict, 'phone', 'android', 0, 1);
$prefix = $this->db->getPrefix();
$createQuery = <<<EOD
......@@ -31,7 +31,8 @@ CREATE PROCEDURE `{$prefix}analytics_podcasts` (
IN `p_app` VARCHAR(128) CHARSET utf8mb4,
IN `p_device` VARCHAR(32) CHARSET utf8mb4,
IN `p_os` VARCHAR(32) CHARSET utf8mb4,
IN `p_bot` TINYINT(1) UNSIGNED
IN `p_bot` TINYINT(1) UNSIGNED,
IN `p_new_listener` TINYINT(1) UNSIGNED
) MODIFIES SQL DATA
DETERMINISTIC
SQL SECURITY INVOKER
......@@ -40,7 +41,7 @@ BEGIN
IF NOT `p_bot` THEN
INSERT INTO `{$prefix}analytics_podcasts`(`podcast_id`, `date`)
VALUES (p_podcast_id, DATE(NOW()))
ON DUPLICATE KEY UPDATE `hits`=`hits`+1;
ON DUPLICATE KEY UPDATE `hits`=`hits`+1, `unique_listeners`=`unique_listeners`+`p_new_listener`;
INSERT INTO `{$prefix}analytics_podcasts_by_episode`(`podcast_id`, `episode_id`, `date`, `age`)
SELECT p_podcast_id, p_episode_id, DATE(NOW()), datediff(now(),`published_at`) FROM `{$prefix}episodes` WHERE `id`= p_episode_id
ON DUPLICATE KEY UPDATE `hits`=`hits`+1;
......
......@@ -114,6 +114,7 @@ class FakePodcastsAnalyticsSeeder extends Seeder
'podcast_id' => $podcast->id,
'date' => date('Y-m-d', $date),
'hits' => $hits,
'unique_listeners' => $hits,
];
$analytics_podcasts_by_country[] = [
'podcast_id' => $podcast->id,
......
......@@ -18,5 +18,6 @@ class AnalyticsPodcasts extends Entity
'podcast_id' => 'integer',
'date' => 'datetime',
'hits' => 'integer',
'unique_listeners' => 'integer',
];
}
......@@ -199,7 +199,7 @@ function webpage_hit($podcast_id)
$referer = $session->get('referer');
$domain = empty(parse_url($referer, PHP_URL_HOST))
? null
? '- Direct -'
: parse_url($referer, PHP_URL_HOST);
parse_str(parse_url($referer, PHP_URL_QUERY), $queries);
$keywords = empty($queries['q']) ? null : $queries['q'];
......@@ -248,9 +248,13 @@ function podcast_hit($podcastId, $episodeId, $bytesThreshold, $fileSize)
if ($session->get('denyListIp')) {
$session->get('player')['bot'] = true;
}
$httpRange = $_SERVER['HTTP_RANGE'];
// We create a sha1 hash for this IP_Address+User_Agent+Episode_ID:
$hashID =
//We get the HTTP header field `Range`:
$httpRange = isset($_SERVER['HTTP_RANGE'])
? $_SERVER['HTTP_RANGE']
: null;
// We create a sha1 hash for this IP_Address+User_Agent+Episode_ID (used to count only once multiple episode downloads):
$episodeHashId =
'_IpUaEp_' .
sha1(
$_SERVER['REMOTE_ADDR'] .
......@@ -260,12 +264,13 @@ function podcast_hit($podcastId, $episodeId, $bytesThreshold, $fileSize)
$episodeId
);
// Was this episode downloaded in the past 24h:
$downloadedBytes = cache($hashID);
$downloadedBytes = cache($episodeHashId);
// Rolling window is 24 hours (86400 seconds):
$ttl = 86400;
$rollingTTL = 86400;
if ($downloadedBytes) {
// In case it was already downloaded, TTL should be adjusted (rolling window is 24h since 1st download):
$ttl = cache()->getMetadata($hashID)['expire'] - time();
$rollingTTL =
cache()->getMetadata($episodeHashId)['expire'] - time();
} else {
// If it was never downloaded that means that zero byte were downloaded:
$downloadedBytes = 0;
......@@ -274,7 +279,7 @@ function podcast_hit($podcastId, $episodeId, $bytesThreshold, $fileSize)
// (Otherwise it means that this was already counted, therefore we don't do anything)
if ($downloadedBytes < $bytesThreshold) {
// If HTTP_RANGE is null we are downloading the complete file:
if (!isset($httpRange)) {
if (!$httpRange) {
$downloadedBytes = $fileSize;
} else {
// [0-1] bytes range requests are used (by Apple) to check that file exists and that 206 partial content is working.
......@@ -291,19 +296,44 @@ function podcast_hit($podcastId, $episodeId, $bytesThreshold, $fileSize)
}
}
// We save the number of downloaded bytes for this user and this episode:
cache()->save($hashID, $downloadedBytes, $ttl);
cache()->save($episodeHashId, $downloadedBytes, $rollingTTL);
// If more that 1mn was downloaded, we send that to the database:
// If more that 1mn was downloaded, that's a hit, we send that to the database:
if ($downloadedBytes >= $bytesThreshold) {
$db = \Config\Database::connect();
$procedureName = $db->prefixTable('analytics_podcasts');
// We create a sha1 hash for this IP_Address+User_Agent+Podcast_ID (used to count unique listeners):
$listenerHashId =
'_IpUaPo_' .
sha1(
$_SERVER['REMOTE_ADDR'] .
'_' .
$_SERVER['HTTP_USER_AGENT'] .
'_' .
$podcastId
);
$newListener = 1;
// Has this listener already downloaded an episode today:
$downloadsByUser = cache($listenerHashId);
// We add one download
if ($downloadsByUser) {
$newListener = 0;
$downloadsByUser++;
} else {
$downloadsByUser = 1;
}
// Listener count is calculated from 00h00 to 23h59:
$midnightTTL = strtotime('tomorrow') - time();
// We save the download count for this user until midnight:
cache()->save($listenerHashId, $downloadsByUser, $midnightTTL);
$app = $session->get('player')['app'];
$device = $session->get('player')['device'];
$os = $session->get('player')['os'];
$bot = $session->get('player')['bot'];
$db->query("CALL $procedureName(?,?,?,?,?,?,?,?,?,?);", [
$db->query("CALL $procedureName(?,?,?,?,?,?,?,?,?,?,?);", [
$podcastId,
$episodeId,
$session->get('location')['countryCode'],
......@@ -314,10 +344,12 @@ function podcast_hit($podcastId, $episodeId, $bytesThreshold, $fileSize)
$device == null ? '' : $device,
$os == null ? '' : $os,
$bot == null ? 0 : $bot,
$newListener,
]);
}
}
} catch (\Exception $e) {
// If things go wrong the show must go on and the user must be able to download the file
log_message('critical', $e);
}
}
<?php
/**
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
*/
return [
'by_player' => 'Podcast downloads by player (for the past week)',
'unique_daily_listeners' => 'Daily unique listeners',
'unique_monthly_listeners' => 'Monthly unique listeners',
'by_browser' => 'Website usage by browser (for the past week)',
'podcast_by_day' => 'Podcast daily downloads',
'podcast_by_month' => 'Podcast monthly downloads',
'episodes_by_day' =>
'5 latest episodes downloads (during their first 60 days)',
'by_country' => 'Podcast downloads by country (for the past week)',
'by_domain' => 'Website visits by origin (for the past week)',
];
<?php
/**
* Class AnalyticsEpisodesByCountry
* Model for analytics_episodes_by_country table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
*/
namespace App\Models;
use CodeIgniter\Model;
class AnalyticsEpisodesByCountryModel extends Model
{
protected $table = 'analytics_episodes_by_country';
protected $primaryKey = 'id';
protected $allowedFields = [];
protected $returnType = \App\Entities\AnalyticsEpisodesByCountry::class;
protected $useSoftDeletes = false;
protected $useTimestamps = false;
}
<?php
/**
* Class AnalyticsEpisodesByPlayerModel
* Model for analytics_episodes_by_player table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
*/
namespace App\Models;
use CodeIgniter\Model;
class AnalyticsEpisodesByPlayerModel extends Model
{
protected $table = 'analytics_episodes_by_player';
protected $primaryKey = 'id';
protected $allowedFields = [];
protected $returnType = \App\Entities\AnalyticsEpisodesByPlayer::class;
protected $useSoftDeletes = false;
protected $useTimestamps = false;
}
<?php
/**
* Class AnalyticsPodcastsModel
* Model for analytics_podcasts table in database
* Class AnalyticsPodcastByCountryModel
* Model for analytics_podcasts_by_country table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
......@@ -12,44 +12,43 @@ namespace App\Models;
use CodeIgniter\Model;
class AnalyticsPodcastsModel extends Model
class AnalyticsPodcastByCountryModel extends Model
{
protected $table = 'analytics_podcasts';
protected $table = 'analytics_podcasts_by_country';
protected $allowedFields = [];
protected $returnType = \App\Entities\AnalyticsPodcasts::class;
protected $returnType = \App\Entities\AnalyticsPodcastsByCountry::class;
protected $useSoftDeletes = false;
protected $useTimestamps = false;
/**
* Gets all data for a podcast
* Gets country data for a podcast
*
* @param int $podcastId
*
* @return array
*/
public function getDataByDay(int $podcastId): array
public function getData(int $podcastId): array
{
if (!($found = cache("{$podcastId}_analytics_podcast_by_day"))) {
$found = $this->select('`date` as `labels`')
if (!($found = cache("{$podcastId}_analytics_podcast_by_country"))) {
$found = $this->select('`country_code` as `labels`')
->selectSum('`hits`', '`values`')
->groupBy('`country_code`')
->where([
'`podcast_id`' => $podcastId,
'`date` >' => date('Y-m-d', strtotime('-1 year')),
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->groupBy('`labels`')
->orderBy('`labels``', 'ASC')
->orderBy('`labels`', 'ASC')
->findAll();
cache()->save(
"{$podcastId}_analytics_podcast_by_day",
"{$podcastId}_analytics_podcast_by_country",
$found,
14400
600
);
}
return $found;
}
}
<?php
/**
* Class AnalyticsPodcastsByEpisodeModel
* Class AnalyticsPodcastByEpisodeModel
* Model for analytics_podcasts_by_episodes table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
......@@ -12,7 +12,7 @@ namespace App\Models;
use CodeIgniter\Model;
class AnalyticsPodcastsByEpisodeModel extends Model
class AnalyticsPodcastByEpisodeModel extends Model
{
protected $table = 'analytics_podcasts_by_episode';
......@@ -81,7 +81,7 @@ class AnalyticsPodcastsByEpisodeModel extends Model
cache()->save(
"{$podcastId}_analytics_podcast_by_episode_by_day",
$found,
14400
600
);
}
return $found;
......@@ -104,7 +104,7 @@ class AnalyticsPodcastsByEpisodeModel extends Model
cache()->save(
"{$podcastId}_{$episodeId}_analytics_podcast_by_episode_by_day",
$found,
14400
600
);
}
return $found;
......
<?php
/**
* Class AnalyticsPodcastsByPlayerModel
* Class AnalyticsPodcastByPlayerModel
* Model for analytics_podcasts_by_player table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
......@@ -12,7 +12,7 @@ namespace App\Models;
use CodeIgniter\Model;
class AnalyticsPodcastsByPlayerModel extends Model
class AnalyticsPodcastByPlayerModel extends Model
{
protected $table = 'analytics_podcasts_by_player';
......@@ -24,7 +24,7 @@ class AnalyticsPodcastsByPlayerModel extends Model
protected $useTimestamps = false;
/**
* Gets all data for a podcast
* Gets player data for a podcast
*
* @param int $podcastId
*
......@@ -41,18 +41,18 @@ class AnalyticsPodcastsByPlayerModel extends Model
->selectSum('`hits`', '`values`')
->where([
'`podcast_id`' => $podcastId,
'`app` !=' => null,
'`app` !=' => '',
'`bot`' => 0,
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->groupBy('`labels`')
->orderBy('`values``', 'DESC')
->orderBy('`values`', 'DESC')
->findAll(10);
cache()->save(
"{$podcastId}_analytics_podcasts_by_player_by_app",
$found,
14400
600
);
}
......@@ -60,7 +60,7 @@ class AnalyticsPodcastsByPlayerModel extends Model
}
/**
* Gets all data for a podcast
* Gets device data for a podcast
*
* @param int $podcastId
*
......@@ -84,7 +84,7 @@ class AnalyticsPodcastsByPlayerModel extends Model
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->groupBy('`ids`')
->orderBy('`values``', 'DESC')
->orderBy('`values`', 'DESC')
->findAll();
$foundOs = $this->select(
......@@ -98,7 +98,7 @@ class AnalyticsPodcastsByPlayerModel extends Model
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->groupBy('`ids`')
->orderBy('`values``', 'DESC')
->orderBy('`values`', 'DESC')
->findAll();
$foundDevice = $this->select(
......@@ -112,7 +112,7 @@ class AnalyticsPodcastsByPlayerModel extends Model
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->groupBy('`ids`')
->orderBy('`values``', 'DESC')
->orderBy('`values`', 'DESC')
->findAll();
$foundBot = $this->select(
......@@ -125,14 +125,14 @@ class AnalyticsPodcastsByPlayerModel extends Model
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->groupBy('`ids`')
->orderBy('`values``', 'DESC')
->orderBy('`values`', 'DESC')
->findAll();
$found = array_merge($foundApp, $foundOs, $foundDevice, $foundBot);
cache()->save(
"{$podcastId}_analytics_podcasts_by_player_by_device",
$found,
14400
600
);
}
......
<?php
/**
* Class AnalyticsPodcastsByRegionModel
* Class AnalyticsPodcastByRegionModel
* Model for analytics_podcasts_by_region table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
......@@ -12,7 +12,7 @@ namespace App\Models;
use CodeIgniter\Model;
class AnalyticsPodcastsByRegionModel extends Model
class AnalyticsPodcastByRegionModel extends Model
{
protected $table = 'analytics_podcasts_by_region';
......@@ -22,4 +22,37 @@ class AnalyticsPodcastsByRegionModel extends Model
protected $useSoftDeletes = false;
protected $useTimestamps = false;
/**
* Gets region data for a podcast
*
* @param int $podcastId
*
* @return array
*/
public function getData(int $podcastId): array
{
if (!($found = cache("{$podcastId}_analytics_podcast_by_region"))) {
$found = $this->select(
'`country_code`, `region_code`, `latitude`, `longitude`'
)
->selectSum('`hits`', '`values`')
->groupBy(
'`country_code`, `region_code`, `latitude`, `longitude`'
)
->where([
'`podcast_id`' => $podcastId,
'`date` >' => date('Y-m-d', strtotime('-1 week')),
])
->orderBy('`country_code`, `region_code`', 'ASC')
->findAll();
cache()->save(
"{$podcastId}_analytics_podcast_by_region",
$found,
600
);
}
return $found;
}
}
<?php
/**
* Class AnalyticsPodcastModel
* Model for analytics_podcasts table in database
* @copyright 2020 Podlibre
* @license https://www.gnu.org/licenses/agpl-3.0.en.html AGPL3
* @link https://castopod.org/
*/
namespace App\Models;
use CodeIgniter\Model;
class AnalyticsPodcastModel extends Model
{
protected $table = 'analytics_podcasts';
protected $allowedFields = [];
protected $returnType = \App\Entities\AnalyticsPodcasts::class;
protected $useSoftDeletes = false;
protected $useTimestamps = false;
/**
* Gets hits data for a podcast
*
* @param int $podcastId
*
* @return array
*/
public function getDataByDay(int $podcastId): array
{
if (!($found = cache("{$podcastId}_analytics_podcast_by_day"))) {
$found = $this->select('`date` as `labels`, `hits` as `values`')
->where([
'`podcast_id`' => $podcastId,
'`date` >' => date('Y-m-d', strtotime('-1 year')),
])