Store `last_archived` option after storing aggregated daily totals & use that when re-running `-archive_data` command. This speeds up the archive command considerably & allows for multiple runs a day.

This commit is contained in:
Danny van Kooten 2016-12-24 15:14:25 +02:00
parent 4454b044d4
commit 3a99939800
11 changed files with 100 additions and 77 deletions

View File

@ -2,6 +2,9 @@ package commands
import (
"flag"
"github.com/dannyvankooten/ana/count"
"github.com/dannyvankooten/ana/db"
)
var runCreateUserCommand bool
@ -22,7 +25,7 @@ func Parse() {
flag.BoolVar(&runDeleteUserCommand, "delete_user", false, "Deletes a user")
flag.BoolVar(&runStartServerCommand, "start_server", true, "Start the API web server, listen on -port")
flag.BoolVar(&runSeedDataCommand, "seed_data", false, "Seed the database -n times")
flag.BoolVar(&runArchiveDataCommand, "archive_data", false, "Archives data into daily aggregated totals")
flag.BoolVar(&runArchiveDataCommand, "archive_data", false, "Aggregates data into daily totals")
flag.StringVar(&emailArg, "email", "", "Email address")
flag.StringVar(&passwordArg, "password", "", "Password")
flag.IntVar(&idArg, "id", 0, "Object ID")
@ -38,9 +41,9 @@ func Run() {
} else if runDeleteUserCommand {
deleteUser()
} else if runSeedDataCommand {
seedData()
db.Seed(nArg)
} else if runArchiveDataCommand {
archiveData()
count.Archive()
} else if runStartServerCommand {
startServer(portArg)
}

View File

@ -1,19 +0,0 @@
package commands
import (
"github.com/dannyvankooten/ana/count"
"github.com/dannyvankooten/ana/db"
)
func seedData() {
db.Seed(nArg)
}
func archiveData() {
count.CreateVisitorArchives()
count.CreatePageviewArchives()
count.CreateScreenArchives()
count.CreateLanguageArchives()
count.CreateBrowserArchives()
count.CreateReferrerArchives()
}

View File

@ -25,8 +25,8 @@ func Browsers(before int64, after int64, limit int) []Point {
return newPointSlice(rows, total)
}
// CreateBrowserArchives aggregates screen data into daily totals
func CreateBrowserArchives() {
// CreateBrowserTotals aggregates screen data into daily totals
func CreateBrowserTotals(since int64) {
rows := queryTotalRows(`
SELECT
v.browser_name,
@ -35,12 +35,8 @@ func CreateBrowserArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS(
SELECT t.id
FROM total_browser_names t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.browser_name`)
WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
GROUP BY date_group, v.browser_name`, since)
processTotalRows(rows, "total_browser_names")
}

View File

@ -3,9 +3,12 @@ package count
import (
"database/sql"
"log"
"strconv"
"time"
"fmt"
"github.com/dannyvankooten/ana/db"
"github.com/dannyvankooten/ana/options"
)
// Total represents a daily aggregated total for a metric
@ -25,6 +28,27 @@ type Point struct {
PercentageValue float64
}
func getLastArchivedTime() int64 {
value := options.Get("last_archived")
intVal, _ := strconv.ParseInt(value, 10, 64)
return intVal
}
// Archive aggregates data into daily totals
func Archive() {
lastArchived := getLastArchivedTime()
CreateVisitorTotals(lastArchived)
CreatePageviewTotals(lastArchived)
CreateScreenTotals(lastArchived)
CreateLanguageTotals(lastArchived)
CreateBrowserTotals(lastArchived)
CreateReferrerTotals(lastArchived)
err := options.Set("last_archived", fmt.Sprintf("%d", time.Now().Unix()))
checkError(err)
}
// Save the Total in the given database connection + table
func (t *Total) Save(Conn *sql.DB, table string) error {
stmt, err := db.Conn.Prepare(`INSERT INTO ` + table + `(
@ -118,12 +142,12 @@ func fill(start int64, end int64, points []Point) []Point {
return newPoints
}
func queryTotalRows(sql string) *sql.Rows {
func queryTotalRows(sql string, lastArchived int64) *sql.Rows {
stmt, err := db.Conn.Prepare(sql)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
rows, err := stmt.Query(lastArchived)
checkError(err)
return rows
}

View File

@ -27,8 +27,8 @@ func Languages(before int64, after int64, limit int) []Point {
return newPointSlice(rows, total)
}
// CreateLanguageArchives aggregates screen data into daily totals
func CreateLanguageArchives() {
// CreateLanguageTotals aggregates screen data into daily totals
func CreateLanguageTotals(since int64) {
rows := queryTotalRows(`
SELECT
v.browser_language,
@ -37,12 +37,8 @@ func CreateLanguageArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS(
SELECT t.id
FROM total_browser_languages t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.browser_language`)
WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
GROUP BY date_group, v.browser_language`, since)
processTotalRows(rows, "total_browser_languages")
}

View File

@ -1,8 +1,6 @@
package count
import (
"github.com/dannyvankooten/ana/db"
)
import "github.com/dannyvankooten/ana/db"
// Pageviews returns the total number of pageviews between the given timestamps
func Pageviews(before int64, after int64) float64 {
@ -47,24 +45,20 @@ func PageviewsPerDay(before int64, after int64) []Point {
return results
}
// CreatePageviewArchives aggregates pageview data for each page into daily totals
func CreatePageviewArchives() {
// CreatePageviewTotals aggregates pageview data for each page into daily totals
func CreatePageviewTotals(since int64) {
stmt, err := db.Conn.Prepare(`SELECT
pv.page_id,
COUNT(*) AS count,
COUNT(DISTINCT(pv.visitor_id)) AS count_unique,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
WHERE NOT EXISTS (
SELECT t.id
FROM total_pageviews t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AND t.page_id = pv.page_id
)
WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
GROUP BY pv.page_id, date_group`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
rows, err := stmt.Query(since)
checkError(err)
defer rows.Close()

View File

@ -5,8 +5,7 @@ import (
)
// Referrers returns a point slice containing browser data per browser name
func Referrers(before int64, after int64, limit int, total float64) []Point {
// TODO: Calculate total instead of requiring it as a parameter.
func Referrers(before int64, after int64, limit int) []Point {
stmt, err := db.Conn.Prepare(`
SELECT
t.value,
@ -27,8 +26,8 @@ func Referrers(before int64, after int64, limit int, total float64) []Point {
return newPointSlice(rows, total)
}
// CreateReferrerArchives aggregates screen data into daily totals
func CreateReferrerArchives() {
// CreateReferrerTotals aggregates screen data into daily totals
func CreateReferrerTotals(since int64) {
rows := queryTotalRows(`
SELECT
pv.referrer_url,
@ -38,12 +37,8 @@ func CreateReferrerArchives() {
FROM pageviews pv
WHERE pv.referrer_url IS NOT NULL
AND pv.referrer_url != ''
AND NOT EXISTS(
SELECT t.id
FROM total_referrers t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, pv.referrer_url`)
AND UNIX_TIMESTAMP(pv.timestamp) > ?
GROUP BY date_group, pv.referrer_url`, since)
processTotalRows(rows, "total_referrers")
}

View File

@ -27,8 +27,8 @@ func Screens(before int64, after int64, limit int) []Point {
return newPointSlice(rows, total)
}
// CreateScreenArchives aggregates screen data into daily totals
func CreateScreenArchives() {
// CreateScreenTotals aggregates screen data into daily totals
func CreateScreenTotals(since int64) {
rows := queryTotalRows(`
SELECT
v.screen_resolution,
@ -37,12 +37,8 @@ func CreateScreenArchives() {
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
LEFT JOIN visitors v ON v.id = pv.visitor_id
WHERE NOT EXISTS(
SELECT t.id
FROM total_screens t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
GROUP BY date_group, v.screen_resolution`)
WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
GROUP BY date_group, v.screen_resolution`, since)
processTotalRows(rows, "total_screens")
}

View File

@ -57,23 +57,19 @@ func VisitorsPerDay(before int64, after int64) []Point {
return results
}
// CreateVisitorArchives aggregates visitor data into daily totals
func CreateVisitorArchives() {
// CreateVisitorTotals aggregates visitor data into daily totals
func CreateVisitorTotals(since int64) {
stmt, err := db.Conn.Prepare(`
SELECT
COUNT(DISTINCT(pv.visitor_id)) AS count,
DATE_FORMAT(pv.timestamp, "%Y-%m-%d") AS date_group
FROM pageviews pv
WHERE NOT EXISTS(
SELECT t.id
FROM total_visitors t
WHERE t.date = DATE_FORMAT(pv.timestamp, "%Y-%m-%d")
)
WHERE UNIX_TIMESTAMP(pv.timestamp) > ?
GROUP BY date_group`)
checkError(err)
defer stmt.Close()
rows, err := stmt.Query()
rows, err := stmt.Query(since)
checkError(err)
defer rows.Close()

29
options/options.go Normal file
View File

@ -0,0 +1,29 @@
package options
import (
"github.com/dannyvankooten/ana/db"
)
// Get returns an option value by its name
func Get(name string) string {
var value string
stmt, _ := db.Conn.Prepare(`SELECT o.value FROM options o WHERE o.name = ?`)
defer stmt.Close()
stmt.QueryRow(name).Scan(&value)
return value
}
// Set updates an option by its name
func Set(name string, value string) error {
stmt, err := db.Conn.Prepare(`INSERT INTO options(name, value) VALUES(?, ?) ON DUPLICATE KEY UPDATE value = ?`)
if err != nil {
return err
}
defer stmt.Close()
_, err = stmt.Exec(name, value, value)
return err
}

13
options/options_test.go Normal file
View File

@ -0,0 +1,13 @@
package options
import (
"testing"
)
func TestGet(t *testing.T) {
// TODO
}
func TestSet(t *testing.T) {
// TODO
}