use buffered INSERT for collecting pageviews & misc improvements to datastore retrieval func's

This commit is contained in:
Danny van Kooten 2017-01-25 22:48:24 +01:00
parent 6b5cccd147
commit ce070d485c
11 changed files with 177 additions and 113 deletions

View File

@ -4,6 +4,7 @@ import (
"crypto/md5"
"encoding/base64"
"encoding/hex"
"log"
"net/http"
"strings"
"time"
@ -13,6 +14,10 @@ import (
"github.com/mssola/user_agent"
)
var buffer []*models.Pageview
var bufferSize = 250
var timeout = 100 * time.Millisecond
func getRequestIp(r *http.Request) string {
ipAddress := r.RemoteAddr
@ -24,77 +29,109 @@ func getRequestIp(r *http.Request) string {
return ipAddress
}
func CollectHandler(w http.ResponseWriter, r *http.Request) {
ua := user_agent.New(r.UserAgent())
// abort if this is a bot.
if ua.Bot() {
return
func persistPageviews() {
if len(buffer) > 0 {
log.Printf("Persisting %d pageviews\n", len(buffer))
err := datastore.SavePageviews(buffer)
buffer = buffer[:0]
checkError(err)
}
}
q := r.URL.Query()
// find or insert page
page, err := datastore.GetPageByHostnameAndPath(q.Get("h"), q.Get("p"))
if page.ID == 0 {
page = &models.Page{
Hostname: q.Get("h"),
Path: q.Get("p"),
Title: q.Get("t"),
func processBuffer(pv chan *models.Pageview) {
for {
select {
case pageview := <-pv:
buffer = append(buffer, pageview)
if len(buffer) >= bufferSize {
persistPageviews()
}
case <-time.After(timeout):
persistPageviews()
}
err = datastore.SavePage(page)
}
checkError(err)
}
// find or insert visitor.
now := time.Now()
ipAddress := getRequestIp(r)
visitorKey := generateVisitorKey(now.Format("2006-01-02"), ipAddress, r.UserAgent())
/* middleware */
func NewCollectHandler() http.Handler {
pageviews := make(chan *models.Pageview, 100)
go processBuffer(pageviews)
visitor, err := datastore.GetVisitorByKey(visitorKey)
if visitor.ID == 0 {
visitor = &models.Visitor{
IpAddress: ipAddress,
BrowserLanguage: q.Get("l"),
ScreenResolution: q.Get("sr"),
DeviceOS: ua.OS(),
Country: "",
Key: visitorKey,
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ua := user_agent.New(r.UserAgent())
// abort if this is a bot.
if ua.Bot() {
return
}
// add browser details
visitor.BrowserName, visitor.BrowserVersion = ua.Browser()
visitor.BrowserName = parseMajorMinor(visitor.BrowserName)
err = datastore.SaveVisitor(visitor)
}
checkError(err)
q := r.URL.Query()
pageview := &models.Pageview{
PageID: page.ID,
VisitorID: visitor.ID,
ReferrerUrl: q.Get("ru"),
ReferrerKeyword: q.Get("rk"),
Timestamp: now.Format("2006-01-02 15:04:05"),
}
// find or insert page
page, err := datastore.GetPageByHostnameAndPath(q.Get("h"), q.Get("p"))
// only store referrer URL if not coming from own site
if strings.Contains(pageview.ReferrerUrl, page.Hostname) {
pageview.ReferrerUrl = ""
}
if err != nil {
page = &models.Page{
Hostname: q.Get("h"),
Path: q.Get("p"),
Title: q.Get("t"),
}
err = datastore.SavePageview(pageview)
checkError(err)
err = datastore.SavePage(page)
checkError(err)
}
// don't you cache this
w.Header().Set("Content-Type", "image/gif")
w.Header().Set("Expires", "Mon, 01 Jan 1990 00:00:00 GMT")
w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
w.Header().Set("Pragma", "no-cache")
w.WriteHeader(http.StatusOK)
// find or insert visitor.
now := time.Now()
ipAddress := getRequestIp(r)
visitorKey := generateVisitorKey(now.Format("2006-01-02"), ipAddress, r.UserAgent())
// 1x1 px transparent GIF
b, _ := base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
w.Write(b)
visitor, err := datastore.GetVisitorByKey(visitorKey)
if err != nil {
log.Println(err)
visitor = &models.Visitor{
IpAddress: ipAddress,
BrowserLanguage: q.Get("l"),
ScreenResolution: q.Get("sr"),
DeviceOS: ua.OS(),
Country: "",
Key: visitorKey,
}
// add browser details
visitor.BrowserName, visitor.BrowserVersion = ua.Browser()
visitor.BrowserName = parseMajorMinor(visitor.BrowserName)
err = datastore.SaveVisitor(visitor)
checkError(err)
}
pageview := &models.Pageview{
PageID: page.ID,
VisitorID: visitor.ID,
ReferrerUrl: q.Get("ru"),
ReferrerKeyword: q.Get("rk"),
Timestamp: now.Format("2006-01-02 15:04:05"),
}
// only store referrer URL if not coming from own site
if strings.Contains(pageview.ReferrerUrl, page.Hostname) {
pageview.ReferrerUrl = ""
}
// push onto channel
pageviews <- pageview
// don't you cache this
w.Header().Set("Content-Type", "image/gif")
w.Header().Set("Expires", "Mon, 01 Jan 1990 00:00:00 GMT")
w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
w.Header().Set("Pragma", "no-cache")
w.WriteHeader(http.StatusOK)
// 1x1 px transparent GIF
b, _ := base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
w.Write(b)
})
}
// generateVisitorKey generates the "unique" visitor key from date, user agent + screen resolution

View File

@ -15,7 +15,7 @@ func Server(port int) {
// register routes
r := mux.NewRouter()
r.HandleFunc("/collect", api.CollectHandler).Methods("GET")
r.Handle("/collect", api.NewCollectHandler()).Methods("GET")
r.Handle("/api/session", api.LoginHandler).Methods("POST")
r.Handle("/api/session", api.LogoutHandler).Methods("DELETE")
r.Handle("/api/visitors/count", api.Authorize(api.GetVisitorsCountHandler)).Methods("GET")

View File

@ -26,7 +26,7 @@ type Point struct {
}
func getLastArchivedDate() string {
value := datastore.GetOption("last_archived")
value, _ := datastore.GetOption("last_archived")
return value
}

View File

@ -1,14 +1,18 @@
package datastore
// GetOption returns an option value by its name
func GetOption(name string) string {
func GetOption(name string) (string, error) {
var value string
stmt, _ := DB.Prepare(`SELECT o.value FROM options o WHERE o.name = ? LIMIT 1`)
stmt, err := DB.Prepare(`SELECT o.value FROM options o WHERE o.name = ? LIMIT 1`)
if err != nil {
return "", err
}
defer stmt.Close()
stmt.QueryRow(name).Scan(&value)
return value
return value, nil
}
// SetOption updates an option by its name

View File

@ -8,12 +8,16 @@ var p models.Page
// GetPage ...
func GetPage(id int64) (*models.Page, error) {
return &p, err
return &p, nil
}
// GetPageByHostnameAndPath ...
func GetPageByHostnameAndPath(hostname, path string) (*models.Page, error) {
stmt, err = DB.Prepare("SELECT p.id, p.hostname, p.path FROM pages p WHERE p.hostname = ? AND p.path = ? LIMIT 1")
stmt, err := DB.Prepare("SELECT p.id, p.hostname, p.path FROM pages p WHERE p.hostname = ? AND p.path = ? LIMIT 1")
if err != nil {
return nil, err
}
defer stmt.Close()
err = stmt.QueryRow(hostname, path).Scan(&p.ID, &p.Hostname, &p.Path)
return &p, err
@ -22,7 +26,7 @@ func GetPageByHostnameAndPath(hostname, path string) (*models.Page, error) {
// SavePage ...
func SavePage(p *models.Page) error {
// prepare statement for inserting data
stmt, err = DB.Prepare(`INSERT INTO pages(
stmt, err := DB.Prepare(`INSERT INTO pages(
hostname,
path,
title
@ -32,7 +36,7 @@ func SavePage(p *models.Page) error {
return err
}
result, err = stmt.Exec(p.Hostname, p.Path, p.Title)
result, err := stmt.Exec(p.Hostname, p.Path, p.Title)
if err != nil {
return err
}

View File

@ -2,6 +2,7 @@ package datastore
import (
"github.com/dannyvankooten/ana/models"
"log"
)
//var pv models.Pageview
@ -9,7 +10,7 @@ import (
// SavePageview ...
func SavePageview(pv *models.Pageview) error {
// prepare statement for inserting data
stmt, err = DB.Prepare(`INSERT INTO pageviews (
stmt, err := DB.Prepare(`INSERT INTO pageviews (
page_id,
visitor_id,
referrer_url,
@ -21,7 +22,7 @@ func SavePageview(pv *models.Pageview) error {
return err
}
result, err = stmt.Exec(
result, err := stmt.Exec(
pv.PageID,
pv.VisitorID,
pv.ReferrerUrl,
@ -36,3 +37,40 @@ func SavePageview(pv *models.Pageview) error {
pv.ID, err = result.LastInsertId()
return err
}
// SavePageviews ...
func SavePageviews(pvs []*models.Pageview) error {
tx, err := DB.Begin()
stmt, err := tx.Prepare(`INSERT INTO pageviews(
page_id,
visitor_id,
referrer_url,
referrer_keyword,
timestamp
) VALUES( ?, ?, ?, ?, ? )`)
defer stmt.Close()
if err != nil {
log.Println(err)
return err
}
for _, pv := range pvs {
result, err := stmt.Exec(
pv.PageID,
pv.VisitorID,
pv.ReferrerUrl,
pv.ReferrerKeyword,
pv.Timestamp,
)
if err != nil {
log.Println(err)
return err
}
pv.ID, err = result.LastInsertId()
}
err = tx.Commit()
return err
}

View File

@ -59,28 +59,28 @@ func seedPages() []models.Page {
Path: "/",
Title: "Homepage",
}
homepage.Save(DB)
SavePage(&homepage)
contactPage := models.Page{
Hostname: "wordpress.dev",
Path: "/contact/",
Title: "Contact",
}
contactPage.Save(DB)
SavePage(&contactPage)
aboutPage := models.Page{
Hostname: "wordpress.dev",
Path: "/about/",
Title: "About Me",
}
aboutPage.Save(DB)
SavePage(&aboutPage)
portfolioPage := models.Page{
Hostname: "wordpress.dev",
Path: "/portfolio/",
Title: "Portfolio",
}
portfolioPage.Save(DB)
SavePage(&portfolioPage)
pages = append(pages, homepage)
pages = append(pages, homepage)
@ -94,9 +94,6 @@ func seedPages() []models.Page {
func Seed(n int) {
pages := seedPages()
stmtVisitor, _ := DB.Prepare("SELECT v.id FROM visitors v WHERE v.visitor_key = ? LIMIT 1")
defer stmtVisitor.Close()
// insert X random hits
for i := 0; i < n; i++ {
@ -110,10 +107,9 @@ func Seed(n int) {
dummyUserAgent := browserName + browserVersion + deviceOS
visitorKey := generateVisitorKey(date.Format("2006-01-02"), ipAddress, dummyUserAgent)
visitor, err := GetVisitorByKey(visitorKey)
var visitor *models.Visitor
visitor, err = GetVisitorByKey(visitorKey)
if visitor == nil {
if err != nil {
// create or find visitor
visitor := models.Visitor{
IpAddress: ipAddress,

View File

@ -12,9 +12,6 @@ import (
// DB ...
var DB *sql.DB
var err error
var stmt *sql.Stmt
var result sql.Result
// Init creates a database connection pool
func Init() *sql.DB {
@ -33,8 +30,7 @@ func Init() *sql.DB {
// New creates a new database pool
func New(driver string, config string) *sql.DB {
var db *sql.DB
db, err = sql.Open(driver, config)
db, err := sql.Open(driver, config)
if err != nil {
log.Fatal(err)
}
@ -72,8 +68,7 @@ func runMigrations(driver string) {
migrate.SetTable("migrations")
var n int
n, err = migrate.Exec(DB, driver, migrations, migrate.Up)
n, err := migrate.Exec(DB, driver, migrations, migrate.Up)
if err != nil {
log.Fatal("Database migrations failed: ", err)

View File

@ -8,14 +8,22 @@ var u models.User
// GetUser retrieves user from datastore by its ID
func GetUser(id int64) (*models.User, error) {
stmt, err = DB.Prepare("SELECT id, email FROM users WHERE id = ? LIMIT 1")
stmt, err := DB.Prepare("SELECT id, email FROM users WHERE id = ? LIMIT 1")
if err != nil {
return nil, err
}
err = stmt.QueryRow(id).Scan(&u.ID, &u.Email)
return &u, err
}
// GetUserByEmail retrieves user from datastore by its email
func GetUserByEmail(email string) (*models.User, error) {
stmt, err = DB.Prepare("SELECT id, email, password FROM users WHERE email = ? LIMIT 1")
stmt, err := DB.Prepare("SELECT id, email, password FROM users WHERE email = ? LIMIT 1")
if err != nil {
return nil, err
}
err = stmt.QueryRow(email).Scan(&u.ID, &u.Email, &u.HashedPassword)
return &u, err
}

View File

@ -9,7 +9,11 @@ var v models.Visitor
// GetVisitorByKey ...
func GetVisitorByKey(key string) (*models.Visitor, error) {
// query by unique visitor key
stmt, err = DB.Prepare("SELECT v.id FROM visitors v WHERE v.visitor_key = ? LIMIT 1")
stmt, err := DB.Prepare("SELECT v.id FROM visitors v WHERE v.visitor_key = ? LIMIT 1")
if err != nil {
return nil, err
}
defer stmt.Close()
err = stmt.QueryRow(key).Scan(&v.ID)
return &v, err
@ -18,7 +22,7 @@ func GetVisitorByKey(key string) (*models.Visitor, error) {
// SaveVisitor ...
func SaveVisitor(v *models.Visitor) error {
// prepare statement for inserting data
stmt, err = DB.Prepare(`INSERT INTO visitors (
stmt, err := DB.Prepare(`INSERT INTO visitors (
visitor_key,
ip_address,
device_os,
@ -33,7 +37,7 @@ func SaveVisitor(v *models.Visitor) error {
return err
}
result, err = stmt.Exec(
result, err := stmt.Exec(
v.Key,
v.IpAddress,
v.DeviceOS,

View File

@ -1,30 +1,8 @@
package models
import (
"database/sql"
)
type Page struct {
ID int64
Hostname string
Path string
Title string
}
func (p *Page) Save(conn *sql.DB) error {
// prepare statement for inserting data
stmt, err := conn.Prepare(`INSERT INTO pages(
hostname,
path,
title
) VALUES( ?, ?, ? )`)
if err != nil {
return err
}
defer stmt.Close()
result, err := stmt.Exec(p.Hostname, p.Path, p.Title)
p.ID, _ = result.LastInsertId()
return err
}