2016-11-21 12:24:50 +00:00
|
|
|
package api
|
2016-11-19 21:35:23 +00:00
|
|
|
|
|
|
|
import (
|
2017-01-25 19:01:02 +00:00
|
|
|
"crypto/md5"
|
2016-12-11 13:50:01 +00:00
|
|
|
"encoding/base64"
|
2017-01-25 19:01:02 +00:00
|
|
|
"encoding/hex"
|
2016-12-11 13:50:01 +00:00
|
|
|
"net/http"
|
|
|
|
"strings"
|
2016-12-24 13:57:04 +00:00
|
|
|
"time"
|
2016-12-25 15:37:45 +00:00
|
|
|
|
2016-12-11 13:50:01 +00:00
|
|
|
"github.com/mssola/user_agent"
|
2018-04-24 08:28:23 +00:00
|
|
|
"github.com/usefathom/fathom/pkg/datastore"
|
|
|
|
"github.com/usefathom/fathom/pkg/models"
|
2018-04-25 09:59:30 +00:00
|
|
|
|
|
|
|
log "github.com/sirupsen/logrus"
|
2016-11-19 21:35:23 +00:00
|
|
|
)
|
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
var buffer []*models.Pageview
|
|
|
|
var bufferSize = 250
|
2018-05-02 13:33:01 +00:00
|
|
|
var timeout = 200 * time.Millisecond
|
2017-01-25 21:48:24 +00:00
|
|
|
|
|
|
|
func persistPageviews() {
|
|
|
|
if len(buffer) > 0 {
|
|
|
|
err := datastore.SavePageviews(buffer)
|
2018-04-25 09:59:30 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Errorf("error saving pageviews: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// clear buffer regardless of error... this means data loss, but better than filling the buffer for now
|
2017-01-25 21:48:24 +00:00
|
|
|
buffer = buffer[:0]
|
|
|
|
}
|
|
|
|
}
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
func processBuffer(pv chan *models.Pageview) {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case pageview := <-pv:
|
|
|
|
buffer = append(buffer, pageview)
|
|
|
|
if len(buffer) >= bufferSize {
|
|
|
|
persistPageviews()
|
|
|
|
}
|
|
|
|
case <-time.After(timeout):
|
|
|
|
persistPageviews()
|
|
|
|
}
|
2016-12-11 13:50:01 +00:00
|
|
|
}
|
2017-01-25 21:48:24 +00:00
|
|
|
}
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
/* middleware */
|
|
|
|
func NewCollectHandler() http.Handler {
|
2018-05-04 10:20:37 +00:00
|
|
|
pageviews := make(chan *models.Pageview, bufferSize)
|
2017-01-25 21:48:24 +00:00
|
|
|
go processBuffer(pageviews)
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2018-04-25 09:59:30 +00:00
|
|
|
return HandlerFunc(func(w http.ResponseWriter, r *http.Request) error {
|
2017-01-25 21:48:24 +00:00
|
|
|
|
|
|
|
// abort if this is a bot.
|
2018-05-02 13:33:01 +00:00
|
|
|
userAgent := r.UserAgent()
|
|
|
|
ua := user_agent.New(userAgent)
|
2017-01-25 21:48:24 +00:00
|
|
|
if ua.Bot() {
|
2018-04-25 09:59:30 +00:00
|
|
|
return nil
|
2017-01-13 15:45:17 +00:00
|
|
|
}
|
2017-01-25 21:48:24 +00:00
|
|
|
|
|
|
|
q := r.URL.Query()
|
|
|
|
|
2018-05-02 13:33:01 +00:00
|
|
|
// find page
|
2017-01-25 21:48:24 +00:00
|
|
|
page, err := datastore.GetPageByHostnameAndPath(q.Get("h"), q.Get("p"))
|
2018-05-02 13:33:01 +00:00
|
|
|
if err != nil && err != datastore.ErrNoResults {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// page does not exist yet, get details & save it
|
|
|
|
if page == nil {
|
2017-01-25 21:48:24 +00:00
|
|
|
page = &models.Page{
|
2018-05-02 13:33:01 +00:00
|
|
|
Scheme: "http",
|
2017-01-25 21:48:24 +00:00
|
|
|
Hostname: q.Get("h"),
|
|
|
|
Path: q.Get("p"),
|
|
|
|
Title: q.Get("t"),
|
|
|
|
}
|
|
|
|
|
2018-05-02 13:33:01 +00:00
|
|
|
if scheme := q.Get("scheme"); scheme != "" {
|
|
|
|
page.Scheme = scheme
|
|
|
|
}
|
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
err = datastore.SavePage(page)
|
2018-04-25 09:59:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-01-13 15:45:17 +00:00
|
|
|
}
|
2017-01-25 19:01:02 +00:00
|
|
|
|
2018-05-02 13:33:01 +00:00
|
|
|
// find visitor by anonymized key from query params
|
2017-01-25 21:48:24 +00:00
|
|
|
now := time.Now()
|
2018-05-02 13:33:01 +00:00
|
|
|
visitorKey := q.Get("vk")
|
|
|
|
visitorKey = enhanceVisitorKey(visitorKey, now.Format("2006-01-02"), userAgent, q.Get("l"), q.Get("sr"))
|
2017-01-25 21:48:24 +00:00
|
|
|
visitor, err := datastore.GetVisitorByKey(visitorKey)
|
2018-05-02 13:33:01 +00:00
|
|
|
if err != nil && err != datastore.ErrNoResults {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// visitor is new, save it
|
|
|
|
if visitor == nil {
|
2017-01-25 21:48:24 +00:00
|
|
|
visitor = &models.Visitor{
|
|
|
|
BrowserLanguage: q.Get("l"),
|
|
|
|
ScreenResolution: q.Get("sr"),
|
|
|
|
DeviceOS: ua.OS(),
|
|
|
|
Country: "",
|
|
|
|
Key: visitorKey,
|
|
|
|
}
|
|
|
|
|
|
|
|
// add browser details
|
|
|
|
visitor.BrowserName, visitor.BrowserVersion = ua.Browser()
|
2018-05-02 13:33:01 +00:00
|
|
|
|
|
|
|
// get rid of exact browser versions
|
|
|
|
visitor.BrowserVersion = parseMajorMinor(visitor.BrowserVersion)
|
2017-01-25 21:48:24 +00:00
|
|
|
err = datastore.SaveVisitor(visitor)
|
2018-04-25 09:59:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-05-04 10:20:37 +00:00
|
|
|
} else {
|
|
|
|
lastPageview, err := datastore.GetLastPageviewForVisitor(visitor.ID)
|
|
|
|
if err != nil && err != datastore.ErrNoResults {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2018-05-04 13:43:05 +00:00
|
|
|
if lastPageview != nil && lastPageview.Timestamp.After(now.Add(-30*time.Minute)) {
|
2018-05-04 10:20:37 +00:00
|
|
|
lastPageview.Bounced = false
|
2018-05-04 13:43:05 +00:00
|
|
|
lastPageview.TimeOnPage = now.Unix() - lastPageview.Timestamp.Unix()
|
|
|
|
|
|
|
|
// TODO: Delay storage until in buffer?
|
2018-05-04 10:20:37 +00:00
|
|
|
err := datastore.UpdatePageview(lastPageview)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2017-01-25 21:48:24 +00:00
|
|
|
}
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2018-05-02 13:33:01 +00:00
|
|
|
// get pageview details
|
2017-01-25 21:48:24 +00:00
|
|
|
pageview := &models.Pageview{
|
|
|
|
PageID: page.ID,
|
|
|
|
VisitorID: visitor.ID,
|
|
|
|
ReferrerUrl: q.Get("ru"),
|
|
|
|
ReferrerKeyword: q.Get("rk"),
|
2018-05-04 13:43:05 +00:00
|
|
|
TimeOnPage: 0,
|
|
|
|
Bounced: true, // TODO: Only mark as bounced if no other pageviews in this session
|
|
|
|
Timestamp: now,
|
2017-01-25 21:48:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// only store referrer URL if not coming from own site
|
|
|
|
if strings.Contains(pageview.ReferrerUrl, page.Hostname) {
|
|
|
|
pageview.ReferrerUrl = ""
|
|
|
|
}
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
// push onto channel
|
|
|
|
pageviews <- pageview
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
// don't you cache this
|
|
|
|
w.Header().Set("Content-Type", "image/gif")
|
|
|
|
w.Header().Set("Expires", "Mon, 01 Jan 1990 00:00:00 GMT")
|
|
|
|
w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
|
|
|
|
w.Header().Set("Pragma", "no-cache")
|
|
|
|
w.WriteHeader(http.StatusOK)
|
2016-12-11 13:50:01 +00:00
|
|
|
|
2017-01-25 21:48:24 +00:00
|
|
|
// 1x1 px transparent GIF
|
|
|
|
b, _ := base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
|
|
|
|
w.Write(b)
|
2018-04-25 09:59:30 +00:00
|
|
|
return nil
|
2017-01-25 21:48:24 +00:00
|
|
|
})
|
2016-11-19 21:35:23 +00:00
|
|
|
}
|
2017-01-25 19:01:02 +00:00
|
|
|
|
|
|
|
// generateVisitorKey generates the "unique" visitor key from date, user agent + screen resolution
|
2018-05-02 13:33:01 +00:00
|
|
|
func enhanceVisitorKey(key string, date string, userAgent string, lang string, screenRes string) string {
|
|
|
|
byteKey := md5.Sum([]byte(date + userAgent + lang + screenRes))
|
2017-01-25 19:01:02 +00:00
|
|
|
return hex.EncodeToString(byteKey[:])
|
|
|
|
}
|