Get rid of archive table & introduce total_* tables for each metric.

This commit is contained in:
Danny van Kooten 2016-12-24 13:07:33 +02:00
parent c07b220c2d
commit 1b107a55d5
12 changed files with 209 additions and 216 deletions

View File

@ -5,9 +5,7 @@ This is a general draft document for thoughts and todo's, without any structure
### What's cooking?
- Never query `pageviews` table directly.
- Process tracking requests in bulk (Redis or in-memory)
- Split up `archive` table into specialised tables
- Process tracking requests in bulk (Redis or memory)
- Allow for multiple sites in same Ana instance
- Custom date range picker
- Settle on a better name than "Ana"

View File

@ -2,10 +2,11 @@ package api
import (
"encoding/json"
"net/http"
"github.com/dannyvankooten/ana/count"
"github.com/dannyvankooten/ana/db"
"github.com/dannyvankooten/ana/models"
"net/http"
)
// URL: /api/pageviews
@ -16,26 +17,15 @@ var GetPageviewsHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.R
SELECT
p.hostname,
p.path,
SUM(a.count) AS count,
"0" AS count_unique
FROM archive a
LEFT JOIN pages p ON p.id = a.value
WHERE metric = 'pageviews.page' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
SUM(t.count) AS count,
SUM(t.count_unique) AS count_unique
FROM total_pageviews t
LEFT JOIN pages p ON p.id = t.page_id
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY p.path, p.hostname
ORDER BY count DESC
LIMIT ?`)
// stmt, err := db.Conn.Prepare(`SELECT
// p.hostname,
// p.path,
// COUNT(*) AS pageviews,
// COUNT(DISTINCT(pv.visitor_id)) AS pageviews_unique
// FROM pageviews pv
// LEFT JOIN pages p ON pv.page_id = p.id
// WHERE UNIX_TIMESTAMP(pv.timestamp) <= ? AND UNIX_TIMESTAMP(pv.timestamp) >= ?
// GROUP BY p.path, p.hostname
// ORDER BY pageviews DESC
// LIMIT ?`)
checkError(err)
defer stmt.Close()

View File

@ -45,6 +45,7 @@ class Pageviews extends Component {
<td class="muted">{i+1}</td>
<td><a href={"//" + p.Hostname + p.Path}>{p.Path.substring(0, 50)}{p.Path.length > 50 ? '..' : ''}</a></td>
<td>{numbers.formatWithComma(p.Count)}</td>
<td>{numbers.formatWithComma(p.CountUnique)}</td>
</tr>
));
@ -58,6 +59,7 @@ class Pageviews extends Component {
<th>#</th>
<th>URL</th>
<th>Pageviews</th>
<th>Unique</th>
</tr>
</thead>
<tbody>{tableRows}</tbody>

View File

@ -10,9 +10,8 @@ func seedData() {
}
func archiveData() {
count.CreatePageviewArchives()
count.CreateVisitorArchives()
count.CreatePageviewArchivesPerPage()
count.CreatePageviewArchives()
count.CreateScreenArchives()
count.CreateLanguageArchives()
count.CreateBrowserArchives()

View File

@ -9,11 +9,11 @@ func Browsers(before int64, after int64, limit int, total float64) []Point {
// TODO: Calculate total instead of requiring it as a parameter.
stmt, err := db.Conn.Prepare(`
SELECT
a.value,
SUM(a.count) AS count
FROM archive a
WHERE a.metric = 'browsers' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
GROUP BY a.value
t.value,
SUM(t.count_unique) AS count
FROM total_browser_names t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY t.value
ORDER BY count DESC
LIMIT ?`)
checkError(err)
@ -27,34 +27,20 @@ func Browsers(before int64, after int64, limit int, total float64) []Point {
// CreateBrowserArchives aggregates screen data into daily totals
func CreateBrowserArchives() {
stmt, err := db.Conn.Prepare(`
rows := queryTotalRows(`
SELECT
v.browser_name,
COUNT(DISTINCT(pv.visitor_id)) AS count,
v.browser_name,
COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS(
SELECT a.id
FROM archive a
WHERE a.metric = 'browsers' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
SELECT t.id
FROM total_browser_names t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.browser_name`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
checkError(err)
defer rows.Close()
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "browsers",
}
err = rows.Scan(&a.Value, &a.Count, &a.Date)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
processTotalRows(rows, "total_browser_names")
}

View File

@ -8,13 +8,14 @@ import (
"github.com/dannyvankooten/ana/db"
)
// The Archive model contains data for a daily metric total
type Archive struct {
ID int64
Metric string
Value string
Count int64
Date string
// Total represents a daily aggregated total for a metric
type Total struct {
ID int64
PageID int64
Value string
Count int64
CountUnique int64
Date string
}
// Point represents a data point, will always have a Label and Value
@ -24,12 +25,12 @@ type Point struct {
PercentageValue float64
}
// Save the Archive in the given database connection
func (a *Archive) Save(Conn *sql.DB) error {
stmt, err := db.Conn.Prepare(`INSERT INTO archive(
metric,
// Save the Total in the given database connection + table
func (t *Total) Save(Conn *sql.DB, table string) error {
stmt, err := db.Conn.Prepare(`INSERT INTO ` + table + `(
value,
count,
count_unique,
date
) VALUES( ?, ?, ?, ? )`)
if err != nil {
@ -38,12 +39,12 @@ func (a *Archive) Save(Conn *sql.DB) error {
defer stmt.Close()
result, err := stmt.Exec(
a.Metric,
a.Value,
a.Count,
a.Date,
t.Value,
t.Count,
t.CountUnique,
t.Date,
)
a.ID, _ = result.LastInsertId()
t.ID, _ = result.LastInsertId()
return err
}
@ -116,3 +117,26 @@ func fill(start int64, end int64, points []Point) []Point {
return newPoints
}
func queryTotalRows(sql string) *sql.Rows {
stmt, err := db.Conn.Prepare(sql)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
checkError(err)
return rows
}
func processTotalRows(rows *sql.Rows, table string) {
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
var t Total
err := rows.Scan(&t.Value, &t.Count, &t.CountUnique, &t.Date)
checkError(err)
t.Save(db.Conn, table)
}
db.Conn.Exec("COMMIT")
rows.Close()
}

View File

@ -9,11 +9,11 @@ func Languages(before int64, after int64, limit int, total float64) []Point {
// TODO: Calculate total instead of requiring it as a parameter.
stmt, err := db.Conn.Prepare(`
SELECT
a.value,
SUM(a.count) AS count
FROM archive a
WHERE a.metric = 'languages' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
GROUP BY a.value
t.value,
SUM(t.count_unique) AS count
FROM total_browser_languages t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY t.value
ORDER BY count DESC
LIMIT ?`)
checkError(err)
@ -27,34 +27,20 @@ func Languages(before int64, after int64, limit int, total float64) []Point {
// CreateLanguageArchives aggregates screen data into daily totals
func CreateLanguageArchives() {
stmt, err := db.Conn.Prepare(`
rows := queryTotalRows(`
SELECT
v.browser_language,
COUNT(DISTINCT(pv.visitor_id)) AS count,
COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS(
SELECT a.id
FROM archive a
WHERE a.metric = 'languages' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
SELECT t.id
FROM total_browser_languages t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.browser_language`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
checkError(err)
defer rows.Close()
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "languages",
}
err = rows.Scan(&a.Value, &a.Count, &a.Date)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
processTotalRows(rows, "total_browser_languages")
}

View File

@ -9,9 +9,9 @@ func Pageviews(before int64, after int64) float64 {
// get total
stmt, err := db.Conn.Prepare(`
SELECT
SUM(a.count)
FROM archive a
WHERE a.metric = 'pageviews' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?`)
SUM(t.count)
FROM total_pageviews t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?`)
checkError(err)
defer stmt.Close()
var total float64
@ -22,10 +22,10 @@ func Pageviews(before int64, after int64) float64 {
// PageviewsPerDay returns a slice of data points representing the number of pageviews per day
func PageviewsPerDay(before int64, after int64) []Point {
stmt, err := db.Conn.Prepare(`SELECT
SUM(a.count) AS count,
DATE_FORMAT(a.date, '%Y-%m-%d') AS date_group
FROM archive a
WHERE a.metric = 'pageviews' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
SUM(t.count) AS count,
DATE_FORMAT(t.date, '%Y-%m-%d') AS date_group
FROM total_pageviews t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY date_group`)
checkError(err)
defer stmt.Close()
@ -47,50 +47,18 @@ func PageviewsPerDay(before int64, after int64) []Point {
return results
}
// CreatePageviewArchives aggregates pageview data into daily totals
// CreatePageviewArchives aggregates pageview data for each page into daily totals
func CreatePageviewArchives() {
stmt, err := db.Conn.Prepare(`
SELECT
COUNT(*) AS count,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
WHERE NOT EXISTS(
SELECT a.id
FROM archive a
WHERE a.metric = 'pageviews' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
checkError(err)
defer rows.Close()
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "pageviews",
Value: "",
}
err = rows.Scan(&a.Count, &a.Date)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
}
// CreatePageviewArchivesPerPage aggregates pageview data for each page into daily totals
func CreatePageviewArchivesPerPage() {
stmt, err := db.Conn.Prepare(`SELECT
pv.page_id,
COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
WHERE NOT EXISTS (
SELECT a.id
FROM archive a
WHERE a.metric = 'pageviews.page' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AND a.value = pv.page_id
SELECT t.id
FROM total_pageviews t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AND t.page_id = pv.page_id
)
GROUP BY pv.page_id, date_group`)
checkError(err)
@ -100,12 +68,28 @@ func CreatePageviewArchivesPerPage() {
checkError(err)
defer rows.Close()
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "pageviews.page",
}
err = rows.Scan(&a.Value, &a.Count, &a.Date)
var t Total
err = rows.Scan(&t.PageID, &t.Count, &t.CountUnique, &t.Date)
checkError(err)
stmt, err := db.Conn.Prepare(`INSERT INTO total_pageviews(
page_id,
count,
count_unique,
date
) VALUES( ?, ?, ?, ? )`)
checkError(err)
defer stmt.Close()
_, err = stmt.Exec(
t.PageID,
t.Count,
t.CountUnique,
t.Date,
)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
}

View File

@ -9,11 +9,11 @@ func Referrers(before int64, after int64, limit int, total float64) []Point {
// TODO: Calculate total instead of requiring it as a parameter.
stmt, err := db.Conn.Prepare(`
SELECT
a.value,
SUM(a.count) AS count
FROM archive a
WHERE a.metric = 'referrers' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
GROUP BY a.value
t.value,
SUM(t.count_unique) AS count
FROM total_referrers t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY t.value
ORDER BY count DESC
LIMIT ?`)
checkError(err)
@ -27,35 +27,21 @@ func Referrers(before int64, after int64, limit int, total float64) []Point {
// CreateReferrerArchives aggregates screen data into daily totals
func CreateReferrerArchives() {
stmt, err := db.Conn.Prepare(`
rows := queryTotalRows(`
SELECT
pv.referrer_url,
COUNT(DISTINCT(pv.visitor_id)) AS count,
COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
WHERE pv.referrer_url IS NOT NULL
AND pv.referrer_url != ''
AND pv.referrer_url != ''
AND NOT EXISTS(
SELECT a.id
FROM archive a
WHERE a.metric = 'referrers' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
SELECT t.id
FROM total_referrers t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, pv.referrer_url`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
checkError(err)
defer rows.Close()
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "referrers",
}
err = rows.Scan(&a.Value, &a.Count, &a.Date)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
processTotalRows(rows, "total_referrers")
}

View File

@ -9,11 +9,11 @@ func Screens(before int64, after int64, limit int, total float64) []Point {
// TODO: Calculate total instead of requiring it as a parameter.
stmt, err := db.Conn.Prepare(`
SELECT
a.value,
SUM(a.count) AS count
FROM archive a
WHERE a.metric = 'screens' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
GROUP BY a.value
t.value,
SUM(t.count_unique) AS count
FROM total_screens t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY t.value
ORDER BY count DESC
LIMIT ?`)
checkError(err)
@ -27,34 +27,20 @@ func Screens(before int64, after int64, limit int, total float64) []Point {
// CreateScreenArchives aggregates screen data into daily totals
func CreateScreenArchives() {
stmt, err := db.Conn.Prepare(`
rows := queryTotalRows(`
SELECT
v.screen_resolution,
COUNT(DISTINCT(pv.visitor_id)) AS count,
COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS(
SELECT a.id
FROM archive a
WHERE a.metric = 'screens' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
SELECT t.id
FROM total_screens t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.screen_resolution`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
checkError(err)
defer rows.Close()
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "screens",
}
err = rows.Scan(&a.Value, &a.Count, &a.Date)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
processTotalRows(rows, "total_screens")
}

View File

@ -4,14 +4,16 @@ import (
"github.com/dannyvankooten/ana/db"
)
// TODO: Convert to total_visitors table.
// Visitors returns the number of total visitors between the given timestamps
func Visitors(before int64, after int64) float64 {
// get total
stmt, err := db.Conn.Prepare(`
SELECT
SUM(a.count)
FROM archive a
WHERE a.metric = 'visitors' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?`)
SUM(t.count)
FROM total_visitors t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?`)
checkError(err)
defer stmt.Close()
var total float64
@ -22,10 +24,10 @@ func Visitors(before int64, after int64) float64 {
// VisitorsPerDay returns a point slice containing visitor data per day
func VisitorsPerDay(before int64, after int64) []Point {
stmt, err := db.Conn.Prepare(`SELECT
SUM(a.count) AS count,
DATE_FORMAT(a.date, '%Y-%m-%d') AS date_group
FROM archive a
WHERE a.metric = 'visitors' AND UNIX_TIMESTAMP(a.date) <= ? AND UNIX_TIMESTAMP(a.date) >= ?
SUM(t.count) AS count,
DATE_FORMAT(t.date, '%Y-%m-%d') AS date_group
FROM total_visitors t
WHERE UNIX_TIMESTAMP(t.date) <= ? AND UNIX_TIMESTAMP(t.date) >= ?
GROUP BY date_group`)
checkError(err)
defer stmt.Close()
@ -55,9 +57,9 @@ func CreateVisitorArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
WHERE NOT EXISTS(
SELECT a.id
FROM archive a
WHERE a.metric = 'visitors' AND a.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
SELECT t.id
FROM total_visitors t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group`)
checkError(err)
@ -69,13 +71,22 @@ func CreateVisitorArchives() {
db.Conn.Exec("START TRANSACTION")
for rows.Next() {
a := Archive{
Metric: "visitors",
Value: "",
}
err = rows.Scan(&a.Count, &a.Date)
var t Total
err = rows.Scan(&t.Count, &t.Date)
checkError(err)
stmt, err := db.Conn.Prepare(`INSERT INTO total_visitors(
count,
date
) VALUES( ?, ? )`)
checkError(err)
defer stmt.Close()
_, err = stmt.Exec(
t.Count,
t.Date,
)
checkError(err)
a.Save(db.Conn)
}
db.Conn.Exec("COMMIT")
}

View File

@ -36,14 +36,55 @@ CREATE TABLE users (
`password` VARCHAR(255) NOT NULL
);
CREATE TABLE `archive` (
CREATE TABLE `total_pageviews` (
`id` INTEGER UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL,
`metric` VARCHAR(31) NOT NULL,
`value` VARCHAR(31) NULL,
`count` INTEGER UNSIGNED NOT NULL,
`page_id` INTEGER UNSIGNED NOT NULL,
`count` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`count_unique` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`date` DATE NOT NULL
);
CREATE INDEX archive_metric ON archive(`metric`);
CREATE INDEX archive_metric_count ON archive(`metric`, `count`);
CREATE INDEX archive_metric_date ON archive(`metric`, `date`);
CREATE TABLE `total_visitors` (
`id` INTEGER UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL,
`count` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`date` DATE NOT NULL
);
CREATE TABLE `total_screens` (
`id` INTEGER UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL,
`value` VARCHAR(12) NOT NULL,
`count` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`count_unique` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`date` DATE NOT NULL
);
CREATE TABLE `total_browser_names` (
`id` INTEGER UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL,
`value` VARCHAR(50) NOT NULL,
`count` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`count_unique` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`date` DATE NOT NULL
);
CREATE TABLE `total_browser_languages` (
`id` INTEGER UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL,
`value` VARCHAR(12) NOT NULL,
`count` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`count_unique` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`date` DATE NOT NULL
);
CREATE TABLE `total_referrers` (
`id` INTEGER UNSIGNED AUTO_INCREMENT PRIMARY KEY NOT NULL,
`value` TEXT NOT NULL,
`count` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`count_unique` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`date` DATE NOT NULL
);
CREATE INDEX total_referrers_date ON total_referrers(`date`);
CREATE INDEX total_pageviews_date ON total_pageviews(`date`);
CREATE INDEX total_screens_date ON total_screens(`date`);
CREATE INDEX total_browser_names_date ON total_browser_names(`date`);
CREATE INDEX total_browser_languages_date ON total_browser_languages(`date`);
CREATE INDEX total_visitors_date ON total_visitors(`date`);