2016-11-21 13:24:50 +01:00
|
|
|
package api
|
2016-11-19 22:35:23 +01:00
|
|
|
|
|
|
|
import (
|
2016-12-11 14:50:01 +01:00
|
|
|
"encoding/base64"
|
2018-07-12 13:30:32 +02:00
|
|
|
log "github.com/sirupsen/logrus"
|
2016-12-11 14:50:01 +01:00
|
|
|
"net/http"
|
2018-05-08 13:08:16 +02:00
|
|
|
"net/url"
|
2018-05-14 10:14:43 +02:00
|
|
|
"strings"
|
2016-12-24 15:57:04 +02:00
|
|
|
"time"
|
2016-12-25 16:37:45 +01:00
|
|
|
|
2016-12-11 14:50:01 +01:00
|
|
|
"github.com/mssola/user_agent"
|
2018-05-14 15:31:33 +02:00
|
|
|
"github.com/usefathom/fathom/pkg/aggregator"
|
2018-04-24 10:28:23 +02:00
|
|
|
"github.com/usefathom/fathom/pkg/datastore"
|
|
|
|
"github.com/usefathom/fathom/pkg/models"
|
2016-11-19 22:35:23 +01:00
|
|
|
)
|
|
|
|
|
2018-05-22 10:30:35 +02:00
|
|
|
func shouldCollect(r *http.Request) bool {
|
2018-07-12 14:06:07 +02:00
|
|
|
// abort if DNT header is set to "1" (these should have been filtered client-side already)
|
|
|
|
if r.Header.Get("DNT") == "1" {
|
2018-05-14 10:14:43 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-07-12 14:06:07 +02:00
|
|
|
// don't track prerendered pages, see https://github.com/usefathom/fathom/issues/13
|
|
|
|
if r.Header.Get("X-Moz") == "prefetch" || r.Header.Get("X-Purpose") == "preview" {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// abort if this is a bot.
|
|
|
|
ua := user_agent.New(r.UserAgent())
|
|
|
|
if ua.Bot() {
|
2018-06-13 10:44:33 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-05-14 10:14:43 +02:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2018-05-22 10:30:35 +02:00
|
|
|
func parsePathname(p string) string {
|
|
|
|
return "/" + strings.TrimLeft(p, "/")
|
|
|
|
}
|
|
|
|
|
2018-05-30 13:40:59 +02:00
|
|
|
// TODO: Move this to aggregator, as we need this endpoint to be as fast as possible
|
2018-05-22 10:30:35 +02:00
|
|
|
func parseReferrer(r string) string {
|
|
|
|
u, err := url.Parse(r)
|
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove AMP & UTM vars
|
|
|
|
q := u.Query()
|
|
|
|
keys := []string{"amp", "utm_campaign", "utm_medium", "utm_source"}
|
|
|
|
for _, k := range keys {
|
|
|
|
q.Del(k)
|
|
|
|
}
|
|
|
|
u.RawQuery = q.Encode()
|
|
|
|
|
|
|
|
// remove /amp/
|
|
|
|
if strings.HasSuffix(u.Path, "/amp/") {
|
|
|
|
u.Path = u.Path[0:(len(u.Path) - 5)]
|
|
|
|
}
|
|
|
|
|
|
|
|
return u.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseHostname(r string) string {
|
|
|
|
u, err := url.Parse(r)
|
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
return u.Scheme + "://" + u.Host
|
|
|
|
}
|
|
|
|
|
2018-05-15 13:30:37 +02:00
|
|
|
func (api *API) NewCollectHandler() http.Handler {
|
2018-07-12 13:30:32 +02:00
|
|
|
pageviews := make(chan *models.Pageview, 10)
|
2018-05-15 13:30:37 +02:00
|
|
|
go aggregate(api.database)
|
2018-07-12 13:30:32 +02:00
|
|
|
go collect(api.database, pageviews)
|
2016-12-11 14:50:01 +01:00
|
|
|
|
2018-04-25 11:59:30 +02:00
|
|
|
return HandlerFunc(func(w http.ResponseWriter, r *http.Request) error {
|
2018-05-22 10:30:35 +02:00
|
|
|
if !shouldCollect(r) {
|
2018-04-25 11:59:30 +02:00
|
|
|
return nil
|
2017-01-13 16:45:17 +01:00
|
|
|
}
|
2017-01-25 22:48:24 +01:00
|
|
|
|
|
|
|
q := r.URL.Query()
|
|
|
|
now := time.Now()
|
2016-12-11 14:50:01 +01:00
|
|
|
|
2018-05-02 15:33:01 +02:00
|
|
|
// get pageview details
|
2018-05-07 16:05:53 +02:00
|
|
|
pageview := &models.Pageview{
|
2018-07-11 15:03:56 +02:00
|
|
|
ID: q.Get("id"),
|
2018-06-28 10:37:02 +02:00
|
|
|
Hostname: parseHostname(q.Get("h")),
|
2018-05-22 10:30:35 +02:00
|
|
|
Pathname: parsePathname(q.Get("p")),
|
2018-05-08 11:52:01 +02:00
|
|
|
IsNewVisitor: q.Get("nv") == "1",
|
|
|
|
IsNewSession: q.Get("ns") == "1",
|
2018-05-06 11:53:19 +02:00
|
|
|
IsUnique: q.Get("u") == "1",
|
|
|
|
IsBounce: q.Get("b") != "0",
|
2018-05-22 10:30:35 +02:00
|
|
|
Referrer: parseReferrer(q.Get("r")),
|
2018-05-06 11:53:19 +02:00
|
|
|
Duration: 0,
|
|
|
|
Timestamp: now,
|
2017-01-25 22:48:24 +01:00
|
|
|
}
|
|
|
|
|
2018-05-07 16:05:53 +02:00
|
|
|
// find previous pageview by same visitor
|
2018-07-11 15:03:56 +02:00
|
|
|
previousPageviewID := q.Get("pid")
|
|
|
|
if !pageview.IsNewSession && previousPageviewID != "" {
|
|
|
|
previousPageview, err := api.database.GetPageview(previousPageviewID)
|
2018-05-08 11:52:01 +02:00
|
|
|
if err != nil && err != datastore.ErrNoResults {
|
2018-05-07 16:05:53 +02:00
|
|
|
return err
|
|
|
|
}
|
2018-05-08 11:52:01 +02:00
|
|
|
|
|
|
|
// if we have a recent pageview that is less than 30 minutes old
|
|
|
|
if previousPageview != nil && previousPageview.Timestamp.After(now.Add(-30*time.Minute)) {
|
|
|
|
previousPageview.Duration = (now.Unix() - previousPageview.Timestamp.Unix())
|
|
|
|
previousPageview.IsBounce = false
|
2018-07-12 13:30:32 +02:00
|
|
|
|
|
|
|
// push onto channel to be updated (in batch) later
|
|
|
|
pageviews <- previousPageview
|
2018-05-08 11:52:01 +02:00
|
|
|
}
|
2018-05-07 16:05:53 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 13:30:32 +02:00
|
|
|
// push pageview onto channel to be inserted (in batch) later
|
|
|
|
pageviews <- pageview
|
2016-12-11 14:50:01 +01:00
|
|
|
|
2018-06-13 11:15:18 +02:00
|
|
|
// indicate that we're not tracking user data, see https://github.com/usefathom/fathom/issues/65
|
|
|
|
w.Header().Set("Tk", "N")
|
|
|
|
|
|
|
|
// headers to prevent caching
|
2017-01-25 22:48:24 +01:00
|
|
|
w.Header().Set("Content-Type", "image/gif")
|
|
|
|
w.Header().Set("Expires", "Mon, 01 Jan 1990 00:00:00 GMT")
|
|
|
|
w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
|
|
|
|
w.Header().Set("Pragma", "no-cache")
|
2018-06-13 11:15:18 +02:00
|
|
|
|
|
|
|
// response
|
2017-01-25 22:48:24 +01:00
|
|
|
w.WriteHeader(http.StatusOK)
|
2016-12-11 14:50:01 +01:00
|
|
|
|
2017-01-25 22:48:24 +01:00
|
|
|
// 1x1 px transparent GIF
|
|
|
|
b, _ := base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
|
|
|
|
w.Write(b)
|
2018-04-25 11:59:30 +02:00
|
|
|
return nil
|
2017-01-25 22:48:24 +01:00
|
|
|
})
|
2016-11-19 22:35:23 +01:00
|
|
|
}
|
2018-05-07 16:05:53 +02:00
|
|
|
|
2018-05-09 10:36:05 +02:00
|
|
|
// runs the aggregate func every minute
|
2018-05-15 13:30:37 +02:00
|
|
|
func aggregate(db datastore.Datastore) {
|
|
|
|
agg := aggregator.New(db)
|
|
|
|
agg.Run()
|
|
|
|
|
2018-05-09 10:36:05 +02:00
|
|
|
timeout := 1 * time.Minute
|
2018-05-07 16:05:53 +02:00
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-time.After(timeout):
|
2018-05-15 13:30:37 +02:00
|
|
|
agg.Run()
|
2018-05-07 16:05:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-07-12 13:30:32 +02:00
|
|
|
|
|
|
|
func collect(db datastore.Datastore, pageviews chan *models.Pageview) {
|
2018-07-15 07:34:06 +02:00
|
|
|
var size = 800
|
|
|
|
var timeout = 600 * time.Millisecond
|
2018-07-15 10:05:03 +02:00
|
|
|
var buffer = make([]*models.Pageview, 0)
|
2018-07-12 13:30:32 +02:00
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case pageview := <-pageviews:
|
|
|
|
buffer = append(buffer, pageview)
|
|
|
|
if len(buffer) >= size {
|
|
|
|
persist(db, buffer)
|
2018-07-15 10:05:03 +02:00
|
|
|
buffer = make([]*models.Pageview, 0)
|
2018-07-12 13:30:32 +02:00
|
|
|
}
|
|
|
|
case <-time.After(timeout):
|
|
|
|
if len(buffer) > 0 {
|
|
|
|
persist(db, buffer)
|
2018-07-15 10:05:03 +02:00
|
|
|
buffer = make([]*models.Pageview, 0)
|
2018-07-12 13:30:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func persist(db datastore.Datastore, pageviews []*models.Pageview) {
|
|
|
|
n := len(pageviews)
|
|
|
|
updates := make([]*models.Pageview, 0, n)
|
|
|
|
inserts := make([]*models.Pageview, 0, n)
|
|
|
|
|
2018-07-15 10:05:03 +02:00
|
|
|
for i := range pageviews {
|
|
|
|
if !pageviews[i].IsBounce {
|
|
|
|
updates = append(updates, pageviews[i])
|
2018-07-12 13:30:32 +02:00
|
|
|
} else {
|
2018-07-15 10:05:03 +02:00
|
|
|
inserts = append(inserts, pageviews[i])
|
2018-07-12 13:30:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debugf("persisting %d pageviews (%d inserts, %d updates)", len(pageviews), len(inserts), len(updates))
|
|
|
|
|
|
|
|
var err error
|
|
|
|
err = db.InsertPageviews(inserts)
|
|
|
|
if err != nil {
|
2018-07-15 10:05:03 +02:00
|
|
|
log.Errorf("error inserting pageviews: %s", err)
|
2018-07-12 13:30:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
err = db.UpdatePageviews(updates)
|
|
|
|
if err != nil {
|
2018-07-15 10:05:03 +02:00
|
|
|
log.Errorf("error updating pageviews: %s", err)
|
2018-07-12 13:30:32 +02:00
|
|
|
}
|
|
|
|
}
|