Fix incorrect analytics count. Closes #712.

This commit is contained in:
Kailash Nadh 2022-07-30 19:01:20 +05:30
parent fb3c429116
commit 3550d5453d
3 changed files with 43 additions and 30 deletions

View file

@ -283,19 +283,25 @@ func readQueries(sqlFile string, db *sqlx.DB, fs stuffbin.FileSystem) goyesql.Qu
// prepareQueries queries prepares a query map and returns a *Queries
func prepareQueries(qMap goyesql.Queries, db *sqlx.DB, ko *koanf.Koanf) *models.Queries {
// The campaign view/click count queries have a COUNT(%s) placeholder that should either
// be substituted with * to pull non-unique rows when individual subscriber tracking is off
// as all subscriber_ids will be null, or with DISTINCT subscriber_id when tracking is on
// to only pull unique rows per subscriber.
sel := "*"
var (
countQuery = "get-campaign-analytics-counts"
linkSel = "*"
)
if ko.Bool("privacy.individual_tracking") {
sel = "DISTINCT subscriber_id"
countQuery = "get-campaign-analytics-unique-counts"
linkSel = "DISTINCT subscriber_id"
}
keys := []string{"get-campaign-view-counts", "get-campaign-click-counts", "get-campaign-link-counts"}
for _, k := range keys {
qMap[k].Query = fmt.Sprintf(qMap[k].Query, sel)
// These don't exist in the SQL file but are in the queries struct to be prepared.
qMap["get-campaign-view-counts"] = &goyesql.Query{
Query: fmt.Sprintf(qMap[countQuery].Query, "campaign_views"),
Tags: map[string]string{"name": "get-campaign-view-counts"},
}
qMap["get-campaign-click-counts"] = &goyesql.Query{
Query: fmt.Sprintf(qMap[countQuery].Query, "link_clicks"),
Tags: map[string]string{"name": "get-campaign-click-counts"},
}
qMap["get-campaign-link-counts"].Query = fmt.Sprintf(qMap["get-campaign-link-counts"].Query, linkSel)
// Scan and prepare all queries.
var q models.Queries

View file

@ -56,10 +56,16 @@ type Queries struct {
GetCampaignForPreview *sqlx.Stmt `query:"get-campaign-for-preview"`
GetCampaignStats *sqlx.Stmt `query:"get-campaign-stats"`
GetCampaignStatus *sqlx.Stmt `query:"get-campaign-status"`
// These two queries are read as strings and based on settings.individual_tracking=on/off,
// are interpolated and copied to view and click counts. Same query, different tables.
GetCampaignAnalyticsCounts string `query:"get-campaign-analytics-counts"`
GetCampaignAnalyticsCountsUnique string `query:"get-campaign-analytics-unique-counts"`
GetCampaignViewCounts *sqlx.Stmt `query:"get-campaign-view-counts"`
GetCampaignClickCounts *sqlx.Stmt `query:"get-campaign-click-counts"`
GetCampaignLinkCounts *sqlx.Stmt `query:"get-campaign-link-counts"`
GetCampaignBounceCounts *sqlx.Stmt `query:"get-campaign-bounce-counts"`
NextCampaigns *sqlx.Stmt `query:"next-campaigns"`
NextCampaignSubscribers *sqlx.Stmt `query:"next-campaign-subscribers"`
GetOneCampaignSubscriber *sqlx.Stmt `query:"get-one-campaign-subscriber"`

View file

@ -569,27 +569,28 @@ u AS (
)
SELECT * FROM camps;
-- name: get-campaign-view-counts
-- raw: true
-- %s = * or DISTINCT subscriber_id (prepared based on based on individual tracking=on/off). Prepared on boot.
-- name: get-campaign-analytics-unique-counts
WITH intval AS (
-- For intervals < a week, aggregate counts hourly, otherwise daily.
SELECT CASE WHEN (EXTRACT (EPOCH FROM ($3::TIMESTAMP - $2::TIMESTAMP)) / 86400) >= 7 THEN 'day' ELSE 'hour' END
)
SELECT campaign_id, COUNT(%s) AS "count", DATE_TRUNC((SELECT * FROM intval), created_at) AS "timestamp"
FROM campaign_views
),
uniqIDs AS (
SELECT DISTINCT ON(subscriber_id) subscriber_id, campaign_id, DATE_TRUNC((SELECT * FROM intval), created_at) AS "timestamp"
FROM %s
WHERE campaign_id=ANY($1) AND created_at >= $2 AND created_at <= $3
GROUP BY campaign_id, "timestamp" ORDER BY "timestamp" ASC;
ORDER BY subscriber_id, "timestamp"
)
SELECT COUNT(*) AS "count", campaign_id, "timestamp"
FROM uniqIDs GROUP BY campaign_id, "timestamp";
-- name: get-campaign-click-counts
-- name: get-campaign-analytics-counts
-- raw: true
-- %s = * or DISTINCT subscriber_id (prepared based on based on individual tracking=on/off). Prepared on boot.
WITH intval AS (
-- For intervals < a week, aggregate counts hourly, otherwise daily.
SELECT CASE WHEN (EXTRACT (EPOCH FROM ($3::TIMESTAMP - $2::TIMESTAMP)) / 86400) >= 7 THEN 'day' ELSE 'hour' END
)
SELECT campaign_id, COUNT(%s) AS "count", DATE_TRUNC((SELECT * FROM intval), created_at) AS "timestamp"
FROM link_clicks
SELECT campaign_id, COUNT(*) AS "count", DATE_TRUNC((SELECT * FROM intval), created_at) AS "timestamp"
FROM %s
WHERE campaign_id=ANY($1) AND created_at >= $2 AND created_at <= $3
GROUP BY campaign_id, "timestamp" ORDER BY "timestamp" ASC;