Store `last_archived` option after storing aggregated daily totals & use that when re-running `-archive_data` command. This speeds up the archive command considerably & allows for multiple runs a day.

This commit is contained in:
Danny van Kooten 2016-12-24 15:14:25 +02:00
parent 4454b044d4
commit 3a99939800
11 changed files with 100 additions and 77 deletions

View File

@ -2,6 +2,9 @@ package commands
import ( import (
"flag" "flag"
"github.com/dannyvankooten/ana/count"
"github.com/dannyvankooten/ana/db"
) )
var runCreateUserCommand bool var runCreateUserCommand bool
@ -22,7 +25,7 @@ func Parse() {
flag.BoolVar(&runDeleteUserCommand, "delete_user", false, "Deletes a user") flag.BoolVar(&runDeleteUserCommand, "delete_user", false, "Deletes a user")
flag.BoolVar(&runStartServerCommand, "start_server", true, "Start the API web server, listen on -port") flag.BoolVar(&runStartServerCommand, "start_server", true, "Start the API web server, listen on -port")
flag.BoolVar(&runSeedDataCommand, "seed_data", false, "Seed the database -n times") flag.BoolVar(&runSeedDataCommand, "seed_data", false, "Seed the database -n times")
flag.BoolVar(&runArchiveDataCommand, "archive_data", false, "Archives data into daily aggregated totals") flag.BoolVar(&runArchiveDataCommand, "archive_data", false, "Aggregates data into daily totals")
flag.StringVar(&emailArg, "email", "", "Email address") flag.StringVar(&emailArg, "email", "", "Email address")
flag.StringVar(&passwordArg, "password", "", "Password") flag.StringVar(&passwordArg, "password", "", "Password")
flag.IntVar(&idArg, "id", 0, "Object ID") flag.IntVar(&idArg, "id", 0, "Object ID")
@ -38,9 +41,9 @@ func Run() {
} else if runDeleteUserCommand { } else if runDeleteUserCommand {
deleteUser() deleteUser()
} else if runSeedDataCommand { } else if runSeedDataCommand {
seedData() db.Seed(nArg)
} else if runArchiveDataCommand { } else if runArchiveDataCommand {
archiveData() count.Archive()
} else if runStartServerCommand { } else if runStartServerCommand {
startServer(portArg) startServer(portArg)
} }

View File

@ -1,19 +0,0 @@
package commands
import (
"github.com/dannyvankooten/ana/count"
"github.com/dannyvankooten/ana/db"
)
func seedData() {
db.Seed(nArg)
}
func archiveData() {
count.CreateVisitorArchives()
count.CreatePageviewArchives()
count.CreateScreenArchives()
count.CreateLanguageArchives()
count.CreateBrowserArchives()
count.CreateReferrerArchives()
}

View File

@ -25,8 +25,8 @@ func Browsers(before int64, after int64, limit int) []Point {
return newPointSlice(rows, total) return newPointSlice(rows, total)
} }
// CreateBrowserArchives aggregates screen data into daily totals // CreateBrowserTotals aggregates screen data into daily totals
func CreateBrowserArchives() { func CreateBrowserTotals(since int64) {
rows := queryTotalRows(` rows := queryTotalRows(`
SELECT SELECT
v.browser_name, v.browser_name,
@ -35,12 +35,8 @@ func CreateBrowserArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS( WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
SELECT t.id GROUP BY date_group, v.browser_name`, since)
FROM total_browser_names t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.browser_name`)
processTotalRows(rows, "total_browser_names") processTotalRows(rows, "total_browser_names")
} }

View File

@ -3,9 +3,12 @@ package count
import ( import (
"database/sql" "database/sql"
"log" "log"
"strconv"
"time" "time"
"fmt"
"github.com/dannyvankooten/ana/db" "github.com/dannyvankooten/ana/db"
"github.com/dannyvankooten/ana/options"
) )
// Total represents a daily aggregated total for a metric // Total represents a daily aggregated total for a metric
@ -25,6 +28,27 @@ type Point struct {
PercentageValue float64 PercentageValue float64
} }
func getLastArchivedTime() int64 {
value := options.Get("last_archived")
intVal, _ := strconv.ParseInt(value, 10, 64)
return intVal
}
// Archive aggregates data into daily totals
func Archive() {
lastArchived := getLastArchivedTime()
CreateVisitorTotals(lastArchived)
CreatePageviewTotals(lastArchived)
CreateScreenTotals(lastArchived)
CreateLanguageTotals(lastArchived)
CreateBrowserTotals(lastArchived)
CreateReferrerTotals(lastArchived)
err := options.Set("last_archived", fmt.Sprintf("%d", time.Now().Unix()))
checkError(err)
}
// Save the Total in the given database connection + table // Save the Total in the given database connection + table
func (t *Total) Save(Conn *sql.DB, table string) error { func (t *Total) Save(Conn *sql.DB, table string) error {
stmt, err := db.Conn.Prepare(`INSERT INTO ` + table + `( stmt, err := db.Conn.Prepare(`INSERT INTO ` + table + `(
@ -118,12 +142,12 @@ func fill(start int64, end int64, points []Point) []Point {
return newPoints return newPoints
} }
func queryTotalRows(sql string) *sql.Rows { func queryTotalRows(sql string, lastArchived int64) *sql.Rows {
stmt, err := db.Conn.Prepare(sql) stmt, err := db.Conn.Prepare(sql)
checkError(err) checkError(err)
defer stmt.Close() defer stmt.Close()
rows, err := stmt.Query() rows, err := stmt.Query(lastArchived)
checkError(err) checkError(err)
return rows return rows
} }

View File

@ -27,8 +27,8 @@ func Languages(before int64, after int64, limit int) []Point {
return newPointSlice(rows, total) return newPointSlice(rows, total)
} }
// CreateLanguageArchives aggregates screen data into daily totals // CreateLanguageTotals aggregates screen data into daily totals
func CreateLanguageArchives() { func CreateLanguageTotals(since int64) {
rows := queryTotalRows(` rows := queryTotalRows(`
SELECT SELECT
v.browser_language, v.browser_language,
@ -37,12 +37,8 @@ func CreateLanguageArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS( WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
SELECT t.id GROUP BY date_group, v.browser_language`, since)
FROM total_browser_languages t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.browser_language`)
processTotalRows(rows, "total_browser_languages") processTotalRows(rows, "total_browser_languages")
} }

View File

@ -1,8 +1,6 @@
package count package count
import ( import "github.com/dannyvankooten/ana/db"
"github.com/dannyvankooten/ana/db"
)
// Pageviews returns the total number of pageviews between the given timestamps // Pageviews returns the total number of pageviews between the given timestamps
func Pageviews(before int64, after int64) float64 { func Pageviews(before int64, after int64) float64 {
@ -47,24 +45,20 @@ func PageviewsPerDay(before int64, after int64) []Point {
return results return results
} }
// CreatePageviewArchives aggregates pageview data for each page into daily totals // CreatePageviewTotals aggregates pageview data for each page into daily totals
func CreatePageviewArchives() { func CreatePageviewTotals(since int64) {
stmt, err := db.Conn.Prepare(`SELECT stmt, err := db.Conn.Prepare(`SELECT
pv.page_id, pv.page_id,
COUNT(*) AS count, COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique, COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv FROM pageviews pv
WHERE NOT EXISTS ( WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
SELECT t.id
FROM total_pageviews t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AND t.page_id = pv.page_id
)
GROUP BY pv.page_id, date_group`) GROUP BY pv.page_id, date_group`)
checkError(err) checkError(err)
defer stmt.Close() defer stmt.Close()
rows, err := stmt.Query() rows, err := stmt.Query(since)
checkError(err) checkError(err)
defer rows.Close() defer rows.Close()

View File

@ -5,8 +5,7 @@ import (
) )
// Referrers returns a point slice containing browser data per browser name // Referrers returns a point slice containing browser data per browser name
func Referrers(before int64, after int64, limit int, total float64) []Point { func Referrers(before int64, after int64, limit int) []Point {
// TODO: Calculate total instead of requiring it as a parameter.
stmt, err := db.Conn.Prepare(` stmt, err := db.Conn.Prepare(`
SELECT SELECT
t.value, t.value,
@ -27,8 +26,8 @@ func Referrers(before int64, after int64, limit int, total float64) []Point {
return newPointSlice(rows, total) return newPointSlice(rows, total)
} }
// CreateReferrerArchives aggregates screen data into daily totals // CreateReferrerTotals aggregates screen data into daily totals
func CreateReferrerArchives() { func CreateReferrerTotals(since int64) {
rows := queryTotalRows(` rows := queryTotalRows(`
SELECT SELECT
pv.referrer_url, pv.referrer_url,
@ -38,12 +37,8 @@ func CreateReferrerArchives() {
FROM pageviews pv FROM pageviews pv
WHERE pv.referrer_url IS NOT NULL WHERE pv.referrer_url IS NOT NULL
AND pv.referrer_url != '' AND pv.referrer_url != ''
AND NOT EXISTS( AND UNIX_TIMESTAMP(pv.timestamp) > ?
SELECT t.id GROUP BY date_group, pv.referrer_url`, since)
FROM total_referrers t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, pv.referrer_url`)
processTotalRows(rows, "total_referrers") processTotalRows(rows, "total_referrers")
} }

View File

@ -27,8 +27,8 @@ func Screens(before int64, after int64, limit int) []Point {
return newPointSlice(rows, total) return newPointSlice(rows, total)
} }
// CreateScreenArchives aggregates screen data into daily totals // CreateScreenTotals aggregates screen data into daily totals
func CreateScreenArchives() { func CreateScreenTotals(since int64) {
rows := queryTotalRows(` rows := queryTotalRows(`
SELECT SELECT
v.screen_resolution, v.screen_resolution,
@ -37,12 +37,8 @@ func CreateScreenArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS( WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
SELECT t.id GROUP BY date_group, v.screen_resolution`, since)
FROM total_screens t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.screen_resolution`)
processTotalRows(rows, "total_screens") processTotalRows(rows, "total_screens")
} }

View File

@ -57,23 +57,19 @@ func VisitorsPerDay(before int64, after int64) []Point {
return results return results
} }
// CreateVisitorArchives aggregates visitor data into daily totals // CreateVisitorTotals aggregates visitor data into daily totals
func CreateVisitorArchives() { func CreateVisitorTotals(since int64) {
stmt, err := db.Conn.Prepare(` stmt, err := db.Conn.Prepare(`
SELECT SELECT
COUNT(DISTINCT(pv.visitor_id)) AS count, COUNT(DISTINCT(pv.visitor_id)) AS count,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv FROM pageviews pv
WHERE NOT EXISTS( WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
SELECT t.id
FROM total_visitors t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group`) GROUP BY date_group`)
checkError(err) checkError(err)
defer stmt.Close() defer stmt.Close()
rows, err := stmt.Query() rows, err := stmt.Query(since)
checkError(err) checkError(err)
defer rows.Close() defer rows.Close()

29
options/options.go Normal file
View File

@ -0,0 +1,29 @@
package options
import (
"github.com/dannyvankooten/ana/db"
)
// Get returns an option value by its name
func Get(name string) string {
var value string
stmt, _ := db.Conn.Prepare(`SELECT o.value FROM options o WHERE o.name = ?`)
defer stmt.Close()
stmt.QueryRow(name).Scan(&value)
return value
}
// Set updates an option by its name
func Set(name string, value string) error {
stmt, err := db.Conn.Prepare(`INSERT INTO options(name, value) VALUES(?, ?) ON DUPLICATE KEY UPDATE value = ?`)
if err != nil {
return err
}
defer stmt.Close()
_, err = stmt.Exec(name, value, value)
return err
}

13
options/options_test.go Normal file
View File

@ -0,0 +1,13 @@
package options
import (
"testing"
)
func TestGet(t *testing.T) {
// TODO
}
func TestSet(t *testing.T) {
// TODO
}