mirror of https://github.com/status-im/fathom.git
improved referrer parsing. match hostname against blacklist using community-maintained blacklist file graciously provided by Matomo (https://github.com/matomo-org/referrer-spam-blacklist). closes #170 relates to #154
This commit is contained in:
parent
9589072e42
commit
bca066b614
4
Makefile
4
Makefile
|
@ -64,3 +64,7 @@ lint:
|
|||
test:
|
||||
for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done;
|
||||
|
||||
.PHONY: referrer-spam-blacklist
|
||||
referrer-spam-blacklist:
|
||||
wget https://raw.githubusercontent.com/matomo-org/referrer-spam-blacklist/master/spammers.txt -O pkg/aggregator/data/blacklist.txt
|
||||
go-bindata -prefix "pkg/aggregator/data/" -o pkg/aggregator/bindata.go -pkg aggregator pkg/aggregator/data/
|
|
@ -1,7 +1,9 @@
|
|||
package aggregator
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/usefathom/fathom/pkg/datastore"
|
||||
"github.com/usefathom/fathom/pkg/models"
|
||||
|
@ -64,16 +66,46 @@ func (agg *Aggregator) Run() int {
|
|||
// if no explicit site ID was given in the tracking request, default to site with ID 1
|
||||
trackingIDMap[""] = 1
|
||||
|
||||
// setup referrer spam blacklist
|
||||
blacklist, err := newBlacklist()
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
return 0
|
||||
}
|
||||
|
||||
// add each pageview to the various statistics we gather
|
||||
for _, p := range pageviews {
|
||||
|
||||
// discard pageview if site tracking ID is unknown
|
||||
siteID, ok := trackingIDMap[p.SiteTrackingID]
|
||||
if !ok {
|
||||
log.Debugf("discarding pageview because of unrecognized site tracking ID %s", p.SiteTrackingID)
|
||||
log.Debugf("Skipping pageview because of unrecognized site tracking ID %s", p.SiteTrackingID)
|
||||
continue
|
||||
}
|
||||
|
||||
// start with referrer because we may want to skip this pageview altogether if it is referrer spam
|
||||
if p.Referrer != "" {
|
||||
ref, err := parseReferrer(p.Referrer)
|
||||
if err != nil {
|
||||
log.Debugf("Skipping pageview from referrer %s because of malformed referrer URL", p.Referrer)
|
||||
continue
|
||||
}
|
||||
|
||||
// ignore out pageviews from blacklisted referrers
|
||||
// we use Hostname() here to discard port numbers
|
||||
if blacklist.Has(ref.Hostname()) {
|
||||
log.Debugf("Skipping pageview from referrer %s because of blacklist", p.Referrer)
|
||||
continue
|
||||
}
|
||||
|
||||
hostname := ref.Scheme + "://" + ref.Host
|
||||
referrerStats, err := agg.getReferrerStats(results, siteID, p.Timestamp, hostname, ref.Path)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
continue
|
||||
}
|
||||
referrerStats.HandlePageview(p)
|
||||
}
|
||||
|
||||
// get existing site stats so we can add this pageview to it
|
||||
site, err := agg.getSiteStats(results, siteID, p.Timestamp)
|
||||
if err != nil {
|
||||
|
@ -88,23 +120,6 @@ func (agg *Aggregator) Run() int {
|
|||
continue
|
||||
}
|
||||
pageStats.HandlePageview(p)
|
||||
|
||||
// referrer stats
|
||||
if p.Referrer != "" {
|
||||
hostname, pathname, err := parseUrlParts(p.Referrer)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
continue
|
||||
}
|
||||
|
||||
referrerStats, err := agg.getReferrerStats(results, siteID, p.Timestamp, hostname, pathname)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
continue
|
||||
}
|
||||
referrerStats.HandlePageview(p)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// update stats
|
||||
|
@ -134,11 +149,33 @@ func (agg *Aggregator) Run() int {
|
|||
return n
|
||||
}
|
||||
|
||||
func parseUrlParts(s string) (string, string, error) {
|
||||
u, err := url.Parse(s)
|
||||
// parseReferrer parses the referrer string & normalizes it
|
||||
func parseReferrer(r string) (*url.URL, error) {
|
||||
u, err := url.Parse(r)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return u.Scheme + "://" + u.Host, u.Path, nil
|
||||
// always require a hostname
|
||||
if u.Host == "" {
|
||||
return nil, errors.New("malformed URL, empty host")
|
||||
}
|
||||
|
||||
// remove AMP & UTM vars
|
||||
if u.RawQuery != "" {
|
||||
q := u.Query()
|
||||
keys := []string{"amp", "utm_campaign", "utm_medium", "utm_source"}
|
||||
for _, k := range keys {
|
||||
q.Del(k)
|
||||
}
|
||||
u.RawQuery = q.Encode()
|
||||
}
|
||||
|
||||
// remove amp/ suffix (but keep trailing slash)
|
||||
if strings.HasSuffix(u.Path, "/amp/") {
|
||||
u.Path = u.Path[0:(len(u.Path) - 4)]
|
||||
}
|
||||
|
||||
// re-parse our normalized string into a new URL struct
|
||||
return url.Parse(u.String())
|
||||
}
|
||||
|
|
|
@ -1,9 +1,54 @@
|
|||
package aggregator
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestProcess(t *testing.T) {
|
||||
func TestParseReferrer(t *testing.T) {
|
||||
testsValid := map[string]*url.URL{
|
||||
"https://www.usefathom.com/?utm_source=github": &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "www.usefathom.com",
|
||||
Path: "/",
|
||||
},
|
||||
"https://www.usefathom.com/privacy/amp/?utm_source=github": &url.URL{
|
||||
Scheme: "https",
|
||||
Host: "www.usefathom.com",
|
||||
Path: "/privacy/",
|
||||
},
|
||||
}
|
||||
testsErr := []string{
|
||||
"mysite.com",
|
||||
"foobar",
|
||||
"",
|
||||
}
|
||||
|
||||
for r, e := range testsValid {
|
||||
v, err := parseReferrer(r)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if v.Host != e.Host {
|
||||
t.Errorf("Invalid Host: expected %s, got %s", e.Host, v.Host)
|
||||
}
|
||||
|
||||
if v.Scheme != e.Scheme {
|
||||
t.Errorf("Invalid Scheme: expected %s, got %s", e.Scheme, v.Scheme)
|
||||
}
|
||||
|
||||
if v.Path != e.Path {
|
||||
t.Errorf("Invalid Path: expected %s, got %s", e.Path, v.Path)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for _, r := range testsErr {
|
||||
v, err := parseReferrer(r)
|
||||
if err == nil {
|
||||
t.Errorf("Expected err, got %#v", v)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,42 @@
|
|||
package aggregator
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type blacklist struct {
|
||||
data []byte
|
||||
}
|
||||
|
||||
func newBlacklist() (*blacklist, error) {
|
||||
var err error
|
||||
b := &blacklist{}
|
||||
b.data, err = Asset("blacklist.txt")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Has returns true if the given domain appears on the blacklist
|
||||
// Uses sub-string matching, so if usesfathom.com is blacklisted then this function will also return true for danny.usesfathom.com
|
||||
func (b *blacklist) Has(r string) bool {
|
||||
if r == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b.data))
|
||||
domain := ""
|
||||
|
||||
for scanner.Scan() {
|
||||
domain = scanner.Text()
|
||||
if strings.HasSuffix(r, domain) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package aggregator
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBlacklistHas(t *testing.T) {
|
||||
b, err := newBlacklist()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
table := map[string]bool{
|
||||
"03e.info": true,
|
||||
"zvetki.ru": true,
|
||||
"usefathom.com": false,
|
||||
"foo.03e.info": true, // sub-string match
|
||||
}
|
||||
|
||||
for r, e := range table {
|
||||
if v := b.Has(r); v != e {
|
||||
t.Errorf("Expected %v, got %v", e, v)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,947 @@
|
|||
03e.info
|
||||
0n-line.tv
|
||||
1-99seo.com
|
||||
1-free-share-buttons.com
|
||||
100dollars-seo.com
|
||||
100searchengines.com
|
||||
12masterov.com
|
||||
12u.info
|
||||
1pamm.ru
|
||||
1webmaster.ml
|
||||
24x7-server-support.site
|
||||
2your.site
|
||||
3-letter-domains.net
|
||||
3waynetworks.com
|
||||
4inn.ru
|
||||
4istoshop.com
|
||||
4webmasters.org
|
||||
5-steps-to-start-business.com
|
||||
5forex.ru
|
||||
6hopping.com
|
||||
7kop.ru
|
||||
7makemoneyonline.com
|
||||
7zap.com
|
||||
abcdefh.xyz
|
||||
abcdeg.xyz
|
||||
abclauncher.com
|
||||
acads.net
|
||||
acarreo.ru
|
||||
acunetix-referrer.com
|
||||
adanih.com
|
||||
adcash.com
|
||||
adf.ly
|
||||
adspart.com
|
||||
adtiger.tk
|
||||
adventureparkcostarica.com
|
||||
adviceforum.info
|
||||
advokateg.xyz
|
||||
aerodizain.com
|
||||
affordablewebsitesandmobileapps.com
|
||||
afora.ru
|
||||
aibolita.com
|
||||
aidarmebel.kz
|
||||
akuhni.by
|
||||
alfabot.xyz
|
||||
alibestsale.com
|
||||
aliexsale.ru
|
||||
alinabaniecka.pl
|
||||
alkanfarma.org
|
||||
allergick.com
|
||||
allergija.com
|
||||
allknow.info
|
||||
allmarketsnewdayli.gdn
|
||||
allnews.md
|
||||
allnews24.in
|
||||
allwomen.info
|
||||
allwrighter.ru
|
||||
alpharma.net
|
||||
altermix.ua
|
||||
amazon-seo-service.com
|
||||
amt-k.ru
|
||||
amtel-vredestein.com
|
||||
anal-acrobats.hol.es
|
||||
analytics-ads.xyz
|
||||
anapa-inns.ru
|
||||
android-style.com
|
||||
animalphotos.xyz
|
||||
animenime.ru
|
||||
anticrawler.org
|
||||
antiguabarbuda.ru
|
||||
apteka-pharm.ru
|
||||
arendakvartir.kz
|
||||
arendovalka.xyz
|
||||
arkkivoltti.net
|
||||
artdeko.info
|
||||
artpaint-market.ru
|
||||
artparquet.ru
|
||||
aruplighting.com
|
||||
ask-yug.com
|
||||
atleticpharm.org
|
||||
atyks.ru
|
||||
auto-complex.by
|
||||
auto-kia-fulldrive.ru
|
||||
auto-seo-service.org
|
||||
autoblog.org.ua
|
||||
autoseo-service.org
|
||||
autoseo-traffic.com
|
||||
autovideobroadcast.com
|
||||
aviva-limoux.com
|
||||
avkzarabotok.info
|
||||
avtointeres.ru
|
||||
avtovykup.kz
|
||||
azartclub.org
|
||||
azbukafree.com
|
||||
azlex.uz
|
||||
baixar-musicas-gratis.com
|
||||
baladur.ru
|
||||
balitouroffice.com
|
||||
balkanfarma.org
|
||||
bard-real.com.ua
|
||||
batut-fun.ru
|
||||
bavariagid.de
|
||||
beachtoday.ru
|
||||
bedroomlighting.us
|
||||
beremenyashka.com
|
||||
best-deal-hdd.pro
|
||||
best-ping-service-usa.blue
|
||||
best-seo-offer.com
|
||||
best-seo-software.xyz
|
||||
best-seo-solution.com
|
||||
bestmobilityscooterstoday.com
|
||||
bestofferhddbyt.info
|
||||
bestofferhddeed.info
|
||||
bestwebsitesawards.com
|
||||
betterhealthbeauty.com
|
||||
bezprostatita.com
|
||||
bif-ru.info
|
||||
biglistofwebsites.com
|
||||
billiard-classic.com.ua
|
||||
bio-market.kz
|
||||
biplanecentre.ru
|
||||
bird1.ru
|
||||
biteg.xyz
|
||||
bizru.info
|
||||
black-friday.ga
|
||||
blackhatworth.com
|
||||
blog100.org
|
||||
blog4u.top
|
||||
blogstar.fun
|
||||
blogtotal.de
|
||||
blue-square.biz
|
||||
bluerobot.info
|
||||
boltalko.xyz
|
||||
boostmyppc.com
|
||||
bpro1.top
|
||||
brakehawk.com
|
||||
brateg.xyz
|
||||
break-the-chains.com
|
||||
brillianty.info
|
||||
brk-rti.ru
|
||||
brothers-smaller.ru
|
||||
brusilov.ru
|
||||
bsell.ru
|
||||
budilneg.xyz
|
||||
budmavtomatika.com.ua
|
||||
bufetout.ru
|
||||
buketeg.xyz
|
||||
bukleteg.xyz
|
||||
burger-imperia.com
|
||||
burn-fat.ga
|
||||
buttons-for-website.com
|
||||
buttons-for-your-website.com
|
||||
buy-cheap-online.info
|
||||
buy-cheap-pills-order-online.com
|
||||
buy-forum.ru
|
||||
buy-meds24.com
|
||||
call-of-duty.info
|
||||
cardiosport.com.ua
|
||||
cartechnic.ru
|
||||
cenokos.ru
|
||||
cenoval.ru
|
||||
cezartabac.ro
|
||||
chcu.net
|
||||
cheap-trusted-backlinks.com
|
||||
chelyabinsk.dienai.ru
|
||||
chinese-amezon.com
|
||||
chizhik-2.ru
|
||||
ci.ua
|
||||
cityadspix.com
|
||||
civilwartheater.com
|
||||
cleaningservices.kiev.ua
|
||||
clicksor.com
|
||||
climate.by
|
||||
club-lukojl.ru
|
||||
coderstate.com
|
||||
codysbbq.com
|
||||
coffeemashiny.ru
|
||||
columb.net.ua
|
||||
commerage.ru
|
||||
comp-pomosch.ru
|
||||
compliance-alex.xyz
|
||||
compliance-alexa.xyz
|
||||
compliance-andrew.xyz
|
||||
compliance-barak.xyz
|
||||
compliance-brian.xyz
|
||||
compliance-don.xyz
|
||||
compliance-donald.xyz
|
||||
compliance-elena.xyz
|
||||
compliance-fred.xyz
|
||||
compliance-george.xyz
|
||||
compliance-irvin.xyz
|
||||
compliance-ivan.xyz
|
||||
compliance-john.top
|
||||
compliance-julianna.top
|
||||
computer-remont.ru
|
||||
conciergegroup.org
|
||||
connectikastudio.com
|
||||
cookie-law-enforcement-aa.xyz
|
||||
cookie-law-enforcement-bb.xyz
|
||||
cookie-law-enforcement-cc.xyz
|
||||
cookie-law-enforcement-dd.xyz
|
||||
cookie-law-enforcement-ee.xyz
|
||||
cookie-law-enforcement-ff.xyz
|
||||
cookie-law-enforcement-gg.xyz
|
||||
cookie-law-enforcement-hh.xyz
|
||||
cookie-law-enforcement-ii.xyz
|
||||
cookie-law-enforcement-jj.xyz
|
||||
cookie-law-enforcement-kk.xyz
|
||||
cookie-law-enforcement-ll.xyz
|
||||
cookie-law-enforcement-mm.xyz
|
||||
cookie-law-enforcement-nn.xyz
|
||||
cookie-law-enforcement-oo.xyz
|
||||
cookie-law-enforcement-pp.xyz
|
||||
cookie-law-enforcement-qq.xyz
|
||||
cookie-law-enforcement-rr.xyz
|
||||
cookie-law-enforcement-ss.xyz
|
||||
cookie-law-enforcement-tt.xyz
|
||||
cookie-law-enforcement-uu.xyz
|
||||
cookie-law-enforcement-vv.xyz
|
||||
cookie-law-enforcement-ww.xyz
|
||||
cookie-law-enforcement-xx.xyz
|
||||
cookie-law-enforcement-yy.xyz
|
||||
cookie-law-enforcement-zz.xyz
|
||||
copyrightclaims.org
|
||||
copyrightinstitute.org
|
||||
covadhosting.biz
|
||||
cp24.com.ua
|
||||
cubook.supernew.org
|
||||
customsua.com.ua
|
||||
cyber-monday.ga
|
||||
dailyrank.net
|
||||
darodar.com
|
||||
dawlenie.com
|
||||
dbutton.net
|
||||
dcdcapital.com
|
||||
deart-13.ru
|
||||
delfin-aqua.com.ua
|
||||
demenageur.com
|
||||
dengi-v-kredit.in.ua
|
||||
dermatovenerologiya.com
|
||||
descargar-musica-gratis.net
|
||||
detskie-konstruktory.ru
|
||||
dev-seo.blog
|
||||
dienai.ru
|
||||
diplomas-ru.com
|
||||
dipstar.org
|
||||
distonija.com
|
||||
dividendo.ru
|
||||
djekxa.ru
|
||||
djonwatch.ru
|
||||
dktr.ru
|
||||
docs4all.com
|
||||
docsarchive.net
|
||||
docsportal.net
|
||||
documentbase.net
|
||||
documentserver.net
|
||||
documentsite.net
|
||||
dogsrun.net
|
||||
dojki-hd.com
|
||||
domain-tracker.com
|
||||
domashniy-hotel.ru
|
||||
dominateforex.ml
|
||||
domination.ml
|
||||
doska-vsem.ru
|
||||
dostavka-v-krym.com
|
||||
dosugrostov.site
|
||||
drupa.com
|
||||
dvr.biz.ua
|
||||
e-buyeasy.com
|
||||
e-commerce-seo.com
|
||||
e-commerce-seo1.com
|
||||
earn-from-articles.com
|
||||
earnian-money.info
|
||||
easycommerce.cf
|
||||
ecommerce-seo.org
|
||||
ecomp3.ru
|
||||
econom.co
|
||||
edakgfvwql.ru
|
||||
edudocs.net
|
||||
eduinfosite.com
|
||||
eduserver.net
|
||||
egovaleo.it
|
||||
ek-invest.ru
|
||||
ekatalog.xyz
|
||||
eko-gazon.ru
|
||||
ekoproekt-kr.ru
|
||||
ekto.ee
|
||||
elektrikovich.ru
|
||||
elementspluss.ru
|
||||
elentur.com.ua
|
||||
elmifarhangi.com
|
||||
elvel.com.ua
|
||||
emerson-rus.ru
|
||||
eric-artem.com
|
||||
erot.co
|
||||
escort-russian.com
|
||||
este-line.com.ua
|
||||
etairikavideo.gr
|
||||
etehnika.com.ua
|
||||
eu-cookie-law-enforcement2.xyz
|
||||
euromasterclass.ru
|
||||
europages.com.ru
|
||||
eurosamodelki.ru
|
||||
event-tracking.com
|
||||
exdocsfiles.com
|
||||
express-vyvoz.ru
|
||||
eyes-on-you.ga
|
||||
f1nder.org
|
||||
fanoboi.com
|
||||
fast-wordpress-start.com
|
||||
fbdownloader.com
|
||||
feminist.org.ua
|
||||
fidalsa.de
|
||||
filesclub.net
|
||||
filesdatabase.net
|
||||
filter-ot-zheleza.ru
|
||||
financial-simulation.com
|
||||
finansov.info
|
||||
findercarphotos.com
|
||||
fix-website-errors.com
|
||||
floating-share-buttons.com
|
||||
flowertherapy.ru
|
||||
for-your.website
|
||||
forex-procto.ru
|
||||
forsex.info
|
||||
fortwosmartcar.pw
|
||||
forum69.info
|
||||
foxweber.com
|
||||
frauplus.ru
|
||||
free-fb-traffic.com
|
||||
free-fbook-traffic.com
|
||||
free-floating-buttons.com
|
||||
free-share-buttons.com
|
||||
free-social-buttons.com
|
||||
free-social-buttons.xyz
|
||||
free-social-buttons7.xyz
|
||||
free-traffic.xyz
|
||||
free-video-tool.com
|
||||
free-website-traffic.com
|
||||
freenode.info
|
||||
freewhatsappload.com
|
||||
freewlan.info
|
||||
freshnails.com.ua
|
||||
fsalas.com
|
||||
game300.ru
|
||||
gandikapper.ru
|
||||
gearcraft.us
|
||||
gearsadspromo.club
|
||||
generalporn.org
|
||||
gepatit-info.top
|
||||
germes-trans.com
|
||||
get-clickize.info
|
||||
get-free-social-traffic.com
|
||||
get-free-traffic-now.com
|
||||
get-more-freeer-visitors.info
|
||||
get-more-freeish-visitors.info
|
||||
get-seo-help.com
|
||||
get-your-social-buttons.info
|
||||
getaadsincome.info
|
||||
getadsincomely.info
|
||||
getlamborghini.ga
|
||||
getpy-click.info
|
||||
getrichquick.ml
|
||||
getrichquickly.info
|
||||
ghazel.ru
|
||||
ghostvisitor.com
|
||||
giftbig.ru
|
||||
girlporn.ru
|
||||
gkvector.ru
|
||||
glavprofit.ru
|
||||
global-smm.ru
|
||||
gobongo.info
|
||||
goodhumor24.com
|
||||
goodprotein.ru
|
||||
google-liar.ru
|
||||
googlemare.com
|
||||
googlsucks.com
|
||||
gorgaz.info
|
||||
grafaman.ru
|
||||
guardlink.org
|
||||
guidetopetersburg.com
|
||||
handicapvantoday.com
|
||||
happysong.ru
|
||||
hard-porn.mobi
|
||||
havepussy.com
|
||||
hawaiisurf.com
|
||||
hdmoviecamera.net
|
||||
hdmoviecams.com
|
||||
healbio.ru
|
||||
healgastro.com
|
||||
homeafrikalike.tk
|
||||
homemypicture.tk
|
||||
hongfanji.com
|
||||
hosting-tracker.com
|
||||
hottour.com
|
||||
housediz.com
|
||||
housemilan.ru
|
||||
howopen.ru
|
||||
howtostopreferralspam.eu
|
||||
hoztorg-opt.ru
|
||||
hseipaa.kz
|
||||
hulfingtonpost.com
|
||||
humanorightswatch.org
|
||||
hundejo.com
|
||||
hvd-store.com
|
||||
hyip-zanoza.me
|
||||
ico.re
|
||||
igadgetsworld.com
|
||||
igru-xbox.net
|
||||
ilikevitaly.com
|
||||
iloveitaly.ro
|
||||
iloveitaly.ru
|
||||
ilovevitaly.co
|
||||
ilovevitaly.com
|
||||
ilovevitaly.info
|
||||
ilovevitaly.org
|
||||
ilovevitaly.ru
|
||||
ilovevitaly.xyz
|
||||
iminent.com
|
||||
imperiafilm.ru
|
||||
impotentik.com
|
||||
incitystroy.ru
|
||||
incomekey.net
|
||||
increasewwwtraffic.info
|
||||
inet-shop.su
|
||||
infektsii.com
|
||||
infodocsportal.com
|
||||
inform-ua.info
|
||||
insider.pro
|
||||
interferencer.ru
|
||||
intex-air.ru
|
||||
investpamm.ru
|
||||
iskalko.ru
|
||||
isotoner.com
|
||||
ispaniya-costa-blanca.ru
|
||||
it-max.com.ua
|
||||
izhstrelok.ru
|
||||
jjbabskoe.ru
|
||||
jobius.com.ua
|
||||
jumkite.com
|
||||
justkillingti.me
|
||||
justprofit.xyz
|
||||
kabbalah-red-bracelets.com
|
||||
kabinet-binbank.ru
|
||||
kabinet-card-5ka.ru
|
||||
kabinet-click-alfabank.ru
|
||||
kabinet-lk-megafon.ru
|
||||
kabinet-login-mts.ru
|
||||
kabinet-mil.ru
|
||||
kabinet-mos.ru
|
||||
kabinet-my-beeline.ru
|
||||
kabinet-my-pochtabank.ru
|
||||
kabinet-online-vtb.ru
|
||||
kabinet-tinkoff.ru
|
||||
kabinet-ttk.ru
|
||||
kakablog.net
|
||||
kambasoft.com
|
||||
kamin-sam.ru
|
||||
karapuz.org.ua
|
||||
kazka.ru
|
||||
kazrent.com
|
||||
kerch.site
|
||||
kevblog.top
|
||||
keywords-monitoring-success.com
|
||||
keywords-monitoring-your-success.com
|
||||
kharkov.ua
|
||||
kino-fun.ru
|
||||
kino-key.info
|
||||
kino2018.cc
|
||||
kinobum.org
|
||||
kinopolet.net
|
||||
kinosed.net
|
||||
knigonosha.net
|
||||
komp-pomosch.ru
|
||||
komputers-best.ru
|
||||
komukc.com.ua
|
||||
konkursov.net
|
||||
kozhasobak.com
|
||||
krasnodar-avtolombard.ru
|
||||
kredytbank.com.ua
|
||||
laminat.com.ua
|
||||
landliver.org
|
||||
landoftracking.com
|
||||
laptop-4-less.com
|
||||
law-check-two.xyz
|
||||
law-enforcement-bot-ff.xyz
|
||||
law-enforcement-check-three.xyz
|
||||
law-enforcement-ee.xyz
|
||||
law-six.xyz
|
||||
laxdrills.com
|
||||
leeboyrussia.com
|
||||
legalrc.biz
|
||||
lerporn.info
|
||||
leto-dacha.ru
|
||||
lider82.ru
|
||||
lipidofobia.com.br
|
||||
littleberry.ru
|
||||
livefixer.com
|
||||
livia-pache.ru
|
||||
livingroomdecoratingideas.website
|
||||
lk-gosuslugi.ru
|
||||
login-tinkoff.ru
|
||||
loveorganic.ch
|
||||
lsex.xyz
|
||||
luckybull.io
|
||||
lukoilcard.ru
|
||||
lumb.co
|
||||
luton-invest.ru
|
||||
luxup.ru
|
||||
magicdiet.gq
|
||||
magnetic-bracelets.ru
|
||||
makemoneyonline.com
|
||||
makeprogress.ga
|
||||
manimpotence.com
|
||||
manualterap.roleforum.ru
|
||||
marblestyle.ru
|
||||
maridan.com.ua
|
||||
marketland.ml
|
||||
masterseek.com
|
||||
matras.space
|
||||
mattgibson.us
|
||||
max-apprais.com
|
||||
maxxximoda.ru
|
||||
mebel-iz-dereva.kiev.ua
|
||||
mebelcomplekt.ru
|
||||
mebeldekor.com.ua
|
||||
med-dopomoga.com
|
||||
med-zdorovie.com.ua
|
||||
medicineseasybuy.com
|
||||
meds-online24.com
|
||||
meduza-consult.ru
|
||||
megapolis-96.ru
|
||||
metallo-konstruktsii.ru
|
||||
metallosajding.ru
|
||||
mifepriston.net
|
||||
mikozstop.com
|
||||
mikrocement.com.ua
|
||||
mikrozaym2you.ru
|
||||
minegam.com
|
||||
mirobuvi.com.ua
|
||||
mirtorrent.net
|
||||
mksport.ru
|
||||
mobilemedia.md
|
||||
mockupui.com
|
||||
modforwot.ru
|
||||
modnie-futbolki.net
|
||||
moinozhki.com
|
||||
monetizationking.net
|
||||
money-for-placing-articles.com
|
||||
money7777.info
|
||||
moneytop.ru
|
||||
moneyzzz.ru
|
||||
mosrif.ru
|
||||
mostorgnerud.ru
|
||||
moy-dokument.com
|
||||
moyakuhnia.ru
|
||||
muscle-factory.com.ua
|
||||
musichallaudio.ru
|
||||
mybuh.kz
|
||||
myftpupload.com
|
||||
myplaycity.com
|
||||
nachalka21.ru
|
||||
nanochskazki.ru
|
||||
needtosellmyhousefast.com
|
||||
net-profits.xyz
|
||||
nevapotolok.ru
|
||||
newsrosprom.ru
|
||||
newstaffadsshop.club
|
||||
niki-mlt.ru
|
||||
nizniynovgorod.dienai.ru
|
||||
novosti-hi-tech.ru
|
||||
nubuilderian.info
|
||||
nufaq.com
|
||||
o-o-11-o-o.com
|
||||
o-o-6-o-o.com
|
||||
o-o-6-o-o.ru
|
||||
o-o-8-o-o.com
|
||||
o-o-8-o-o.ru
|
||||
obsessionphrases.com
|
||||
odiabetikah.com
|
||||
odsadsmobile.biz
|
||||
ofermerah.com
|
||||
office2web.com
|
||||
officedocuments.net
|
||||
ogorodnic.com
|
||||
online-binbank.ru
|
||||
online-hit.info
|
||||
online-intim.com
|
||||
online-mkb.ru
|
||||
online-templatestore.com
|
||||
online-vtb.ru
|
||||
onlinetvseries.me
|
||||
onlywoman.org
|
||||
ooo-olni.ru
|
||||
optsol.ru
|
||||
orakul.spb.ru
|
||||
osteochondrosis.ru
|
||||
ownshop.cf
|
||||
ozas.net
|
||||
paidonlinesites.com
|
||||
palvira.com.ua
|
||||
pc-services.ru
|
||||
perm.dienai.ru
|
||||
perper.ru
|
||||
petrovka-online.com
|
||||
photo-clip.ru
|
||||
photokitchendesign.com
|
||||
picturesmania.com
|
||||
pills24h.com
|
||||
piulatte.cz
|
||||
pizza-imperia.com
|
||||
pizza-tycoon.com
|
||||
pk-pomosch.ru
|
||||
pk-services.ru
|
||||
podarkilove.ru
|
||||
podemnik.pro
|
||||
podseka1.ru
|
||||
poiskzakona.ru
|
||||
pokupaylegko.ru
|
||||
popads.net
|
||||
pops.foundation
|
||||
popugaychiki.com
|
||||
pornhub-forum.ga
|
||||
pornhub-forum.uni.me
|
||||
pornhub-ru.com
|
||||
porno-chaman.info
|
||||
pornoelita.info
|
||||
pornoforadult.com
|
||||
pornogig.com
|
||||
pornohd1080.online
|
||||
pornoklad.ru
|
||||
pornonik.com
|
||||
pornoplen.com
|
||||
portnoff.od.ua
|
||||
pozdravleniya-c.ru
|
||||
priceg.com
|
||||
pricheski-video.com
|
||||
prlog.ru
|
||||
procrafts.ru
|
||||
prodaemdveri.com
|
||||
producm.ru
|
||||
prodvigator.ua
|
||||
professionalsolutions.eu
|
||||
prointer.net.ua
|
||||
promoforum.ru
|
||||
pron.pro
|
||||
prosmibank.ru
|
||||
prostitutki-rostova.ru.com
|
||||
psa48.ru
|
||||
punch.media
|
||||
purchasepillsnorx.com
|
||||
qualitymarketzone.com
|
||||
quit-smoking.ga
|
||||
qwesa.ru
|
||||
rank-checker.online
|
||||
rankings-analytics.com
|
||||
ranksonic.info
|
||||
ranksonic.net
|
||||
ranksonic.org
|
||||
rapidgator-porn.ga
|
||||
rapidsites.pro
|
||||
razborka-skoda.org.ua
|
||||
rcb101.ru
|
||||
realresultslist.com
|
||||
rednise.com
|
||||
regionshop.biz
|
||||
releshop.ru
|
||||
remkompov.ru
|
||||
remont-kvartirspb.com
|
||||
rent2spb.ru
|
||||
replica-watch.ru
|
||||
research.ifmo.ru
|
||||
resell-seo-services.com
|
||||
resellerclub.com
|
||||
responsive-test.net
|
||||
reversing.cc
|
||||
rfavon.ru
|
||||
rightenergysolutions.com.au
|
||||
roof-city.ru
|
||||
rospromtest.ru
|
||||
ru-lk-rt.ru
|
||||
ruinfocomp.ru
|
||||
rulate.ru
|
||||
rumamba.com
|
||||
rupolitshow.ru
|
||||
rusexy.xyz
|
||||
ruspoety.ru
|
||||
russian-postindex.ru
|
||||
russian-translator.com
|
||||
rybalka-opt.ru
|
||||
sad-torg.com.ua
|
||||
sady-urala.ru
|
||||
saltspray.ru
|
||||
sanjosestartups.com
|
||||
santaren.by
|
||||
santasgift.ml
|
||||
santehnovich.ru
|
||||
savetubevideo.com
|
||||
savetubevideo.info
|
||||
scansafe.net
|
||||
scat.porn
|
||||
screentoolkit.com
|
||||
scripted.com
|
||||
search-error.com
|
||||
searchencrypt.com
|
||||
security-corporation.com.ua
|
||||
sell-fb-group-here.com
|
||||
semalt.com
|
||||
semaltmedia.com
|
||||
seo-2-0.com
|
||||
seo-platform.com
|
||||
seo-smm.kz
|
||||
seoanalyses.com
|
||||
seocheckupx.com
|
||||
seocheckupx.net
|
||||
seoexperimenty.ru
|
||||
seojokes.net
|
||||
seopub.net
|
||||
seoservices2018.com
|
||||
sexsaoy.com
|
||||
sexyali.com
|
||||
sexyteens.hol.es
|
||||
shagtomsk.ru
|
||||
share-buttons-for-free.com
|
||||
share-buttons.xyz
|
||||
sharebutton.io
|
||||
sharebutton.net
|
||||
sharebutton.to
|
||||
shnyagi.net
|
||||
shoppingmiracles.co.uk
|
||||
shops-ru.ru
|
||||
sibecoprom.ru
|
||||
sim-dealer.ru
|
||||
simple-share-buttons.com
|
||||
sinhronperevod.ru
|
||||
site-auditor.online
|
||||
site5.com
|
||||
siteripz.net
|
||||
sitevaluation.org
|
||||
skinali.com
|
||||
sladkoevideo.com
|
||||
sledstvie-veli.net
|
||||
slftsdybbg.ru
|
||||
slkrm.ru
|
||||
slomm.ru
|
||||
slow-website.xyz
|
||||
smailik.org
|
||||
smartphonediscount.info
|
||||
snabs.kz
|
||||
snegozaderzhatel.ru
|
||||
snip.to
|
||||
snip.tw
|
||||
soaksoak.ru
|
||||
sochi-3d.ru
|
||||
social-button.xyz
|
||||
social-buttons-ii.xyz
|
||||
social-buttons.com
|
||||
social-traffic-1.xyz
|
||||
social-traffic-2.xyz
|
||||
social-traffic-3.xyz
|
||||
social-traffic-4.xyz
|
||||
social-traffic-5.xyz
|
||||
social-traffic-7.xyz
|
||||
social-widget.xyz
|
||||
socialbuttons.xyz
|
||||
socialseet.ru
|
||||
socialtrade.biz
|
||||
sohoindia.net
|
||||
solitaire-game.ru
|
||||
solnplast.ru
|
||||
sosdepotdebilan.com
|
||||
souvenirua.com
|
||||
sovetskie-plakaty.ru
|
||||
soyuzexpedition.ru
|
||||
sp-laptop.ru
|
||||
sp-zakupki.ru
|
||||
spb-plitka.ru
|
||||
spb-scenar.ru
|
||||
speedup-my.site
|
||||
spin2016.cf
|
||||
sportwizard.ru
|
||||
spravka130.ru
|
||||
spravkavspb.net
|
||||
sribno.net
|
||||
stavimdveri.ru
|
||||
steame.ru
|
||||
stiralkovich.ru
|
||||
stocktwists.com
|
||||
store-rx.com
|
||||
stream-tds.com
|
||||
stroyka47.ru
|
||||
studentguide.ru
|
||||
success-seo.com
|
||||
sundrugstore.com
|
||||
superiends.org
|
||||
supermama.top
|
||||
supervesti.ru
|
||||
svetka.info
|
||||
svetoch.moscow
|
||||
t-machinery.ru
|
||||
t-rec.su
|
||||
taihouse.ru
|
||||
tattoo-stickers.ru
|
||||
tattooha.com
|
||||
td-perimetr.ru
|
||||
technika-remont.ru
|
||||
tedxrj.com
|
||||
tentcomplekt.ru
|
||||
teplohod-gnezdo.ru
|
||||
texnika.com.ua
|
||||
tgtclick.com
|
||||
thaoduoctoc.com
|
||||
theautoprofit.ml
|
||||
theguardlan.com
|
||||
thesmartsearch.net
|
||||
tokshow.online
|
||||
tomck.com
|
||||
top-gan.ru
|
||||
top-l2.com
|
||||
top1-seo-service.com
|
||||
top10-way.com
|
||||
topquality.cf
|
||||
topseoservices.co
|
||||
track-rankings.online
|
||||
tracker24-gps.ru
|
||||
traffic-cash.xyz
|
||||
traffic2cash.org
|
||||
traffic2cash.xyz
|
||||
traffic2money.com
|
||||
trafficgenius.xyz
|
||||
trafficmonetize.org
|
||||
trafficmonetizer.org
|
||||
traphouselatino.net
|
||||
trion.od.ua
|
||||
tsatu.edu.ua
|
||||
tsc-koleso.ru
|
||||
tuningdom.ru
|
||||
twsufa.ru
|
||||
ua.tc
|
||||
uasb.ru
|
||||
ucoz.ru
|
||||
udav.net
|
||||
ufa.dienai.ru
|
||||
ukrainian-poetry.com
|
||||
ul-potolki.ru
|
||||
undergroundcityphoto.com
|
||||
unibus.su
|
||||
univerfiles.com
|
||||
unlimitdocs.net
|
||||
unpredictable.ga
|
||||
uptime-as.net
|
||||
uptime-eu.net
|
||||
uptime-us.net
|
||||
uptime.com
|
||||
uptimechecker.com
|
||||
uzpaket.com
|
||||
uzungil.com
|
||||
vaderenergy.ru
|
||||
validus.pro
|
||||
varikozdok.ru
|
||||
veloland.in.ua
|
||||
ventopt.by
|
||||
veselokloun.ru
|
||||
vesnatehno.com
|
||||
viagra-soft.ru
|
||||
video--production.com
|
||||
video-woman.com
|
||||
videos-for-your-business.com
|
||||
viel.su
|
||||
viktoria-center.ru
|
||||
vodaodessa.com
|
||||
vodkoved.ru
|
||||
vzheludke.com
|
||||
vzubkah.com
|
||||
w3javascript.com
|
||||
wallpaperdesk.info
|
||||
wdss.com.ua
|
||||
we-ping-for-youic.info
|
||||
web-revenue.xyz
|
||||
webmaster-traffic.com
|
||||
webmonetizer.net
|
||||
website-analytics.online
|
||||
website-analyzer.info
|
||||
website-speed-check.site
|
||||
website-speed-checker.site
|
||||
websites-reviews.com
|
||||
websocial.me
|
||||
weburlopener.com
|
||||
wmasterlead.com
|
||||
woman-orgasm.ru
|
||||
wordpress-crew.net
|
||||
wordpresscore.com
|
||||
workius.ru
|
||||
works.if.ua
|
||||
worldmed.info
|
||||
wufak.com
|
||||
ww2awards.info
|
||||
www-lk-rt.ru
|
||||
x5market.ru
|
||||
xkaz.org
|
||||
xn-------53dbcapga5atlplfdm6ag1ab1bvehl0b7toa0k.xn--p1ai
|
||||
xn-----6kcamwewcd9bayelq.xn--p1ai
|
||||
xn-----7kcaaxchbbmgncr7chzy0k0hk.xn--p1ai
|
||||
xn-----clckdac3bsfgdft3aebjp5etek.xn--p1ai
|
||||
xn----7sbabhjc3ccc5aggbzfmfi.xn--p1ai
|
||||
xn----7sbabm1ahc4b2aqff.su
|
||||
xn----7sbabn5abjehfwi8bj.xn--p1ai
|
||||
xn----7sbbpe3afguye.xn--p1ai
|
||||
xn----7sbho2agebbhlivy.xn--p1ai
|
||||
xn----8sbaki4azawu5b.xn--p1ai
|
||||
xn----8sbarihbihxpxqgaf0g1e.xn--80adxhks
|
||||
xn----8sbhefaln6acifdaon5c6f4axh.xn--p1ai
|
||||
xn----8sblgmbj1a1bk8l.xn----161-4vemb6cjl7anbaea3afninj.xn--p1ai
|
||||
xn----ctbbcjd3dbsehgi.xn--p1ai
|
||||
xn----ctbfcdjl8baejhfb1oh.xn--p1ai
|
||||
xn----ctbigni3aj4h.xn--p1ai
|
||||
xn----ftbeoaiyg1ak1cb7d.xn--p1ai
|
||||
xn----itbbudqejbfpg3l.com
|
||||
xn--80aaajkrncdlqdh6ane8t.xn--p1ai
|
||||
xn--80aanaardaperhcem4a6i.com
|
||||
xn--80adaggc5bdhlfamsfdij4p7b.xn--p1ai
|
||||
xn--80adgcaax6acohn6r.xn--p1ai
|
||||
xn--90acenikpebbdd4f6d.xn--p1ai
|
||||
xn--90acjmaltae3acm.xn--p1acf
|
||||
xn--c1acygb.xn--p1ai
|
||||
xn--d1abj0abs9d.in.ua
|
||||
xn--d1aifoe0a9a.top
|
||||
xn--e1aaajzchnkg.ru.com
|
||||
xn--e1agf4c.xn--80adxhks
|
||||
xtrafficplus.com
|
||||
xz618.com
|
||||
yaderenergy.ru
|
||||
yes-com.com
|
||||
yhirurga.ru
|
||||
ykecwqlixx.ru
|
||||
yodse.io
|
||||
youporn-forum.ga
|
||||
youporn-forum.uni.me
|
||||
youporn-ru.com
|
||||
yourserverisdown.com
|
||||
zahvat.ru
|
||||
zastroyka.org
|
||||
zavod-gm.ru
|
||||
zdm-auto.com
|
||||
zdorovie-nogi.info
|
||||
zelena-mriya.com.ua
|
||||
zoominfo.com
|
||||
zvetki.ru
|
|
@ -59,7 +59,7 @@ func (c *Collector) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||
IsNewVisitor: q.Get("nv") == "1",
|
||||
IsNewSession: q.Get("ns") == "1",
|
||||
IsUnique: q.Get("u") == "1",
|
||||
Referrer: parseReferrer(q.Get("r")),
|
||||
Referrer: q.Get("r"),
|
||||
IsFinished: false,
|
||||
IsBounce: true,
|
||||
Duration: 0,
|
||||
|
@ -193,29 +193,6 @@ func parsePathname(p string) string {
|
|||
return "/" + strings.TrimLeft(p, "/")
|
||||
}
|
||||
|
||||
// TODO: Move this to aggregator, as we need this endpoint to be as fast as possible
|
||||
func parseReferrer(r string) string {
|
||||
u, err := url.Parse(r)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// remove AMP & UTM vars
|
||||
q := u.Query()
|
||||
keys := []string{"amp", "utm_campaign", "utm_medium", "utm_source"}
|
||||
for _, k := range keys {
|
||||
q.Del(k)
|
||||
}
|
||||
u.RawQuery = q.Encode()
|
||||
|
||||
// remove /amp/
|
||||
if strings.HasSuffix(u.Path, "/amp/") {
|
||||
u.Path = u.Path[0:(len(u.Path) - 5)]
|
||||
}
|
||||
|
||||
return u.String()
|
||||
}
|
||||
|
||||
func parseHostname(r string) string {
|
||||
u, err := url.Parse(r)
|
||||
if err != nil {
|
||||
|
|
Loading…
Reference in New Issue