improved referrer parsing. match hostname against blacklist using community-maintained blacklist file graciously provided by Matomo (https://github.com/matomo-org/referrer-spam-blacklist). closes #170 relates to #154

This commit is contained in:
Danny van Kooten 2018-11-09 10:39:14 +01:00
parent 9589072e42
commit bca066b614
8 changed files with 1360 additions and 48 deletions

View File

@ -64,3 +64,7 @@ lint:
test:
for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done;
.PHONY: referrer-spam-blacklist
referrer-spam-blacklist:
wget https://raw.githubusercontent.com/matomo-org/referrer-spam-blacklist/master/spammers.txt -O pkg/aggregator/data/blacklist.txt
go-bindata -prefix "pkg/aggregator/data/" -o pkg/aggregator/bindata.go -pkg aggregator pkg/aggregator/data/

View File

@ -1,7 +1,9 @@
package aggregator
import (
"errors"
"net/url"
"strings"
"github.com/usefathom/fathom/pkg/datastore"
"github.com/usefathom/fathom/pkg/models"
@ -64,16 +66,46 @@ func (agg *Aggregator) Run() int {
// if no explicit site ID was given in the tracking request, default to site with ID 1
trackingIDMap[""] = 1
// setup referrer spam blacklist
blacklist, err := newBlacklist()
if err != nil {
log.Error(err)
return 0
}
// add each pageview to the various statistics we gather
for _, p := range pageviews {
// discard pageview if site tracking ID is unknown
siteID, ok := trackingIDMap[p.SiteTrackingID]
if !ok {
log.Debugf("discarding pageview because of unrecognized site tracking ID %s", p.SiteTrackingID)
log.Debugf("Skipping pageview because of unrecognized site tracking ID %s", p.SiteTrackingID)
continue
}
// start with referrer because we may want to skip this pageview altogether if it is referrer spam
if p.Referrer != "" {
ref, err := parseReferrer(p.Referrer)
if err != nil {
log.Debugf("Skipping pageview from referrer %s because of malformed referrer URL", p.Referrer)
continue
}
// ignore out pageviews from blacklisted referrers
// we use Hostname() here to discard port numbers
if blacklist.Has(ref.Hostname()) {
log.Debugf("Skipping pageview from referrer %s because of blacklist", p.Referrer)
continue
}
hostname := ref.Scheme + "://" + ref.Host
referrerStats, err := agg.getReferrerStats(results, siteID, p.Timestamp, hostname, ref.Path)
if err != nil {
log.Error(err)
continue
}
referrerStats.HandlePageview(p)
}
// get existing site stats so we can add this pageview to it
site, err := agg.getSiteStats(results, siteID, p.Timestamp)
if err != nil {
@ -88,23 +120,6 @@ func (agg *Aggregator) Run() int {
continue
}
pageStats.HandlePageview(p)
// referrer stats
if p.Referrer != "" {
hostname, pathname, err := parseUrlParts(p.Referrer)
if err != nil {
log.Error(err)
continue
}
referrerStats, err := agg.getReferrerStats(results, siteID, p.Timestamp, hostname, pathname)
if err != nil {
log.Error(err)
continue
}
referrerStats.HandlePageview(p)
}
}
// update stats
@ -134,11 +149,33 @@ func (agg *Aggregator) Run() int {
return n
}
func parseUrlParts(s string) (string, string, error) {
u, err := url.Parse(s)
// parseReferrer parses the referrer string & normalizes it
func parseReferrer(r string) (*url.URL, error) {
u, err := url.Parse(r)
if err != nil {
return "", "", err
return nil, err
}
return u.Scheme + "://" + u.Host, u.Path, nil
// always require a hostname
if u.Host == "" {
return nil, errors.New("malformed URL, empty host")
}
// remove AMP & UTM vars
if u.RawQuery != "" {
q := u.Query()
keys := []string{"amp", "utm_campaign", "utm_medium", "utm_source"}
for _, k := range keys {
q.Del(k)
}
u.RawQuery = q.Encode()
}
// remove amp/ suffix (but keep trailing slash)
if strings.HasSuffix(u.Path, "/amp/") {
u.Path = u.Path[0:(len(u.Path) - 4)]
}
// re-parse our normalized string into a new URL struct
return url.Parse(u.String())
}

View File

@ -1,9 +1,54 @@
package aggregator
import (
"net/url"
"testing"
)
func TestProcess(t *testing.T) {
func TestParseReferrer(t *testing.T) {
testsValid := map[string]*url.URL{
"https://www.usefathom.com/?utm_source=github": &url.URL{
Scheme: "https",
Host: "www.usefathom.com",
Path: "/",
},
"https://www.usefathom.com/privacy/amp/?utm_source=github": &url.URL{
Scheme: "https",
Host: "www.usefathom.com",
Path: "/privacy/",
},
}
testsErr := []string{
"mysite.com",
"foobar",
"",
}
for r, e := range testsValid {
v, err := parseReferrer(r)
if err != nil {
t.Error(err)
}
if v.Host != e.Host {
t.Errorf("Invalid Host: expected %s, got %s", e.Host, v.Host)
}
if v.Scheme != e.Scheme {
t.Errorf("Invalid Scheme: expected %s, got %s", e.Scheme, v.Scheme)
}
if v.Path != e.Path {
t.Errorf("Invalid Path: expected %s, got %s", e.Path, v.Path)
}
}
for _, r := range testsErr {
v, err := parseReferrer(r)
if err == nil {
t.Errorf("Expected err, got %#v", v)
}
}
}

235
pkg/aggregator/bindata.go Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,42 @@
package aggregator
import (
"bufio"
"bytes"
"strings"
)
type blacklist struct {
data []byte
}
func newBlacklist() (*blacklist, error) {
var err error
b := &blacklist{}
b.data, err = Asset("blacklist.txt")
if err != nil {
return nil, err
}
return b, nil
}
// Has returns true if the given domain appears on the blacklist
// Uses sub-string matching, so if usesfathom.com is blacklisted then this function will also return true for danny.usesfathom.com
func (b *blacklist) Has(r string) bool {
if r == "" {
return false
}
scanner := bufio.NewScanner(bytes.NewReader(b.data))
domain := ""
for scanner.Scan() {
domain = scanner.Text()
if strings.HasSuffix(r, domain) {
return true
}
}
return false
}

View File

@ -0,0 +1,25 @@
package aggregator
import (
"testing"
)
func TestBlacklistHas(t *testing.T) {
b, err := newBlacklist()
if err != nil {
t.Error(err)
}
table := map[string]bool{
"03e.info": true,
"zvetki.ru": true,
"usefathom.com": false,
"foo.03e.info": true, // sub-string match
}
for r, e := range table {
if v := b.Has(r); v != e {
t.Errorf("Expected %v, got %v", e, v)
}
}
}

View File

@ -0,0 +1,947 @@
03e.info
0n-line.tv
1-99seo.com
1-free-share-buttons.com
100dollars-seo.com
100searchengines.com
12masterov.com
12u.info
1pamm.ru
1webmaster.ml
24x7-server-support.site
2your.site
3-letter-domains.net
3waynetworks.com
4inn.ru
4istoshop.com
4webmasters.org
5-steps-to-start-business.com
5forex.ru
6hopping.com
7kop.ru
7makemoneyonline.com
7zap.com
abcdefh.xyz
abcdeg.xyz
abclauncher.com
acads.net
acarreo.ru
acunetix-referrer.com
adanih.com
adcash.com
adf.ly
adspart.com
adtiger.tk
adventureparkcostarica.com
adviceforum.info
advokateg.xyz
aerodizain.com
affordablewebsitesandmobileapps.com
afora.ru
aibolita.com
aidarmebel.kz
akuhni.by
alfabot.xyz
alibestsale.com
aliexsale.ru
alinabaniecka.pl
alkanfarma.org
allergick.com
allergija.com
allknow.info
allmarketsnewdayli.gdn
allnews.md
allnews24.in
allwomen.info
allwrighter.ru
alpharma.net
altermix.ua
amazon-seo-service.com
amt-k.ru
amtel-vredestein.com
anal-acrobats.hol.es
analytics-ads.xyz
anapa-inns.ru
android-style.com
animalphotos.xyz
animenime.ru
anticrawler.org
antiguabarbuda.ru
apteka-pharm.ru
arendakvartir.kz
arendovalka.xyz
arkkivoltti.net
artdeko.info
artpaint-market.ru
artparquet.ru
aruplighting.com
ask-yug.com
atleticpharm.org
atyks.ru
auto-complex.by
auto-kia-fulldrive.ru
auto-seo-service.org
autoblog.org.ua
autoseo-service.org
autoseo-traffic.com
autovideobroadcast.com
aviva-limoux.com
avkzarabotok.info
avtointeres.ru
avtovykup.kz
azartclub.org
azbukafree.com
azlex.uz
baixar-musicas-gratis.com
baladur.ru
balitouroffice.com
balkanfarma.org
bard-real.com.ua
batut-fun.ru
bavariagid.de
beachtoday.ru
bedroomlighting.us
beremenyashka.com
best-deal-hdd.pro
best-ping-service-usa.blue
best-seo-offer.com
best-seo-software.xyz
best-seo-solution.com
bestmobilityscooterstoday.com
bestofferhddbyt.info
bestofferhddeed.info
bestwebsitesawards.com
betterhealthbeauty.com
bezprostatita.com
bif-ru.info
biglistofwebsites.com
billiard-classic.com.ua
bio-market.kz
biplanecentre.ru
bird1.ru
biteg.xyz
bizru.info
black-friday.ga
blackhatworth.com
blog100.org
blog4u.top
blogstar.fun
blogtotal.de
blue-square.biz
bluerobot.info
boltalko.xyz
boostmyppc.com
bpro1.top
brakehawk.com
brateg.xyz
break-the-chains.com
brillianty.info
brk-rti.ru
brothers-smaller.ru
brusilov.ru
bsell.ru
budilneg.xyz
budmavtomatika.com.ua
bufetout.ru
buketeg.xyz
bukleteg.xyz
burger-imperia.com
burn-fat.ga
buttons-for-website.com
buttons-for-your-website.com
buy-cheap-online.info
buy-cheap-pills-order-online.com
buy-forum.ru
buy-meds24.com
call-of-duty.info
cardiosport.com.ua
cartechnic.ru
cenokos.ru
cenoval.ru
cezartabac.ro
chcu.net
cheap-trusted-backlinks.com
chelyabinsk.dienai.ru
chinese-amezon.com
chizhik-2.ru
ci.ua
cityadspix.com
civilwartheater.com
cleaningservices.kiev.ua
clicksor.com
climate.by
club-lukojl.ru
coderstate.com
codysbbq.com
coffeemashiny.ru
columb.net.ua
commerage.ru
comp-pomosch.ru
compliance-alex.xyz
compliance-alexa.xyz
compliance-andrew.xyz
compliance-barak.xyz
compliance-brian.xyz
compliance-don.xyz
compliance-donald.xyz
compliance-elena.xyz
compliance-fred.xyz
compliance-george.xyz
compliance-irvin.xyz
compliance-ivan.xyz
compliance-john.top
compliance-julianna.top
computer-remont.ru
conciergegroup.org
connectikastudio.com
cookie-law-enforcement-aa.xyz
cookie-law-enforcement-bb.xyz
cookie-law-enforcement-cc.xyz
cookie-law-enforcement-dd.xyz
cookie-law-enforcement-ee.xyz
cookie-law-enforcement-ff.xyz
cookie-law-enforcement-gg.xyz
cookie-law-enforcement-hh.xyz
cookie-law-enforcement-ii.xyz
cookie-law-enforcement-jj.xyz
cookie-law-enforcement-kk.xyz
cookie-law-enforcement-ll.xyz
cookie-law-enforcement-mm.xyz
cookie-law-enforcement-nn.xyz
cookie-law-enforcement-oo.xyz
cookie-law-enforcement-pp.xyz
cookie-law-enforcement-qq.xyz
cookie-law-enforcement-rr.xyz
cookie-law-enforcement-ss.xyz
cookie-law-enforcement-tt.xyz
cookie-law-enforcement-uu.xyz
cookie-law-enforcement-vv.xyz
cookie-law-enforcement-ww.xyz
cookie-law-enforcement-xx.xyz
cookie-law-enforcement-yy.xyz
cookie-law-enforcement-zz.xyz
copyrightclaims.org
copyrightinstitute.org
covadhosting.biz
cp24.com.ua
cubook.supernew.org
customsua.com.ua
cyber-monday.ga
dailyrank.net
darodar.com
dawlenie.com
dbutton.net
dcdcapital.com
deart-13.ru
delfin-aqua.com.ua
demenageur.com
dengi-v-kredit.in.ua
dermatovenerologiya.com
descargar-musica-gratis.net
detskie-konstruktory.ru
dev-seo.blog
dienai.ru
diplomas-ru.com
dipstar.org
distonija.com
dividendo.ru
djekxa.ru
djonwatch.ru
dktr.ru
docs4all.com
docsarchive.net
docsportal.net
documentbase.net
documentserver.net
documentsite.net
dogsrun.net
dojki-hd.com
domain-tracker.com
domashniy-hotel.ru
dominateforex.ml
domination.ml
doska-vsem.ru
dostavka-v-krym.com
dosugrostov.site
drupa.com
dvr.biz.ua
e-buyeasy.com
e-commerce-seo.com
e-commerce-seo1.com
earn-from-articles.com
earnian-money.info
easycommerce.cf
ecommerce-seo.org
ecomp3.ru
econom.co
edakgfvwql.ru
edudocs.net
eduinfosite.com
eduserver.net
egovaleo.it
ek-invest.ru
ekatalog.xyz
eko-gazon.ru
ekoproekt-kr.ru
ekto.ee
elektrikovich.ru
elementspluss.ru
elentur.com.ua
elmifarhangi.com
elvel.com.ua
emerson-rus.ru
eric-artem.com
erot.co
escort-russian.com
este-line.com.ua
etairikavideo.gr
etehnika.com.ua
eu-cookie-law-enforcement2.xyz
euromasterclass.ru
europages.com.ru
eurosamodelki.ru
event-tracking.com
exdocsfiles.com
express-vyvoz.ru
eyes-on-you.ga
f1nder.org
fanoboi.com
fast-wordpress-start.com
fbdownloader.com
feminist.org.ua
fidalsa.de
filesclub.net
filesdatabase.net
filter-ot-zheleza.ru
financial-simulation.com
finansov.info
findercarphotos.com
fix-website-errors.com
floating-share-buttons.com
flowertherapy.ru
for-your.website
forex-procto.ru
forsex.info
fortwosmartcar.pw
forum69.info
foxweber.com
frauplus.ru
free-fb-traffic.com
free-fbook-traffic.com
free-floating-buttons.com
free-share-buttons.com
free-social-buttons.com
free-social-buttons.xyz
free-social-buttons7.xyz
free-traffic.xyz
free-video-tool.com
free-website-traffic.com
freenode.info
freewhatsappload.com
freewlan.info
freshnails.com.ua
fsalas.com
game300.ru
gandikapper.ru
gearcraft.us
gearsadspromo.club
generalporn.org
gepatit-info.top
germes-trans.com
get-clickize.info
get-free-social-traffic.com
get-free-traffic-now.com
get-more-freeer-visitors.info
get-more-freeish-visitors.info
get-seo-help.com
get-your-social-buttons.info
getaadsincome.info
getadsincomely.info
getlamborghini.ga
getpy-click.info
getrichquick.ml
getrichquickly.info
ghazel.ru
ghostvisitor.com
giftbig.ru
girlporn.ru
gkvector.ru
glavprofit.ru
global-smm.ru
gobongo.info
goodhumor24.com
goodprotein.ru
google-liar.ru
googlemare.com
googlsucks.com
gorgaz.info
grafaman.ru
guardlink.org
guidetopetersburg.com
handicapvantoday.com
happysong.ru
hard-porn.mobi
havepussy.com
hawaiisurf.com
hdmoviecamera.net
hdmoviecams.com
healbio.ru
healgastro.com
homeafrikalike.tk
homemypicture.tk
hongfanji.com
hosting-tracker.com
hottour.com
housediz.com
housemilan.ru
howopen.ru
howtostopreferralspam.eu
hoztorg-opt.ru
hseipaa.kz
hulfingtonpost.com
humanorightswatch.org
hundejo.com
hvd-store.com
hyip-zanoza.me
ico.re
igadgetsworld.com
igru-xbox.net
ilikevitaly.com
iloveitaly.ro
iloveitaly.ru
ilovevitaly.co
ilovevitaly.com
ilovevitaly.info
ilovevitaly.org
ilovevitaly.ru
ilovevitaly.xyz
iminent.com
imperiafilm.ru
impotentik.com
incitystroy.ru
incomekey.net
increasewwwtraffic.info
inet-shop.su
infektsii.com
infodocsportal.com
inform-ua.info
insider.pro
interferencer.ru
intex-air.ru
investpamm.ru
iskalko.ru
isotoner.com
ispaniya-costa-blanca.ru
it-max.com.ua
izhstrelok.ru
jjbabskoe.ru
jobius.com.ua
jumkite.com
justkillingti.me
justprofit.xyz
kabbalah-red-bracelets.com
kabinet-binbank.ru
kabinet-card-5ka.ru
kabinet-click-alfabank.ru
kabinet-lk-megafon.ru
kabinet-login-mts.ru
kabinet-mil.ru
kabinet-mos.ru
kabinet-my-beeline.ru
kabinet-my-pochtabank.ru
kabinet-online-vtb.ru
kabinet-tinkoff.ru
kabinet-ttk.ru
kakablog.net
kambasoft.com
kamin-sam.ru
karapuz.org.ua
kazka.ru
kazrent.com
kerch.site
kevblog.top
keywords-monitoring-success.com
keywords-monitoring-your-success.com
kharkov.ua
kino-fun.ru
kino-key.info
kino2018.cc
kinobum.org
kinopolet.net
kinosed.net
knigonosha.net
komp-pomosch.ru
komputers-best.ru
komukc.com.ua
konkursov.net
kozhasobak.com
krasnodar-avtolombard.ru
kredytbank.com.ua
laminat.com.ua
landliver.org
landoftracking.com
laptop-4-less.com
law-check-two.xyz
law-enforcement-bot-ff.xyz
law-enforcement-check-three.xyz
law-enforcement-ee.xyz
law-six.xyz
laxdrills.com
leeboyrussia.com
legalrc.biz
lerporn.info
leto-dacha.ru
lider82.ru
lipidofobia.com.br
littleberry.ru
livefixer.com
livia-pache.ru
livingroomdecoratingideas.website
lk-gosuslugi.ru
login-tinkoff.ru
loveorganic.ch
lsex.xyz
luckybull.io
lukoilcard.ru
lumb.co
luton-invest.ru
luxup.ru
magicdiet.gq
magnetic-bracelets.ru
makemoneyonline.com
makeprogress.ga
manimpotence.com
manualterap.roleforum.ru
marblestyle.ru
maridan.com.ua
marketland.ml
masterseek.com
matras.space
mattgibson.us
max-apprais.com
maxxximoda.ru
mebel-iz-dereva.kiev.ua
mebelcomplekt.ru
mebeldekor.com.ua
med-dopomoga.com
med-zdorovie.com.ua
medicineseasybuy.com
meds-online24.com
meduza-consult.ru
megapolis-96.ru
metallo-konstruktsii.ru
metallosajding.ru
mifepriston.net
mikozstop.com
mikrocement.com.ua
mikrozaym2you.ru
minegam.com
mirobuvi.com.ua
mirtorrent.net
mksport.ru
mobilemedia.md
mockupui.com
modforwot.ru
modnie-futbolki.net
moinozhki.com
monetizationking.net
money-for-placing-articles.com
money7777.info
moneytop.ru
moneyzzz.ru
mosrif.ru
mostorgnerud.ru
moy-dokument.com
moyakuhnia.ru
muscle-factory.com.ua
musichallaudio.ru
mybuh.kz
myftpupload.com
myplaycity.com
nachalka21.ru
nanochskazki.ru
needtosellmyhousefast.com
net-profits.xyz
nevapotolok.ru
newsrosprom.ru
newstaffadsshop.club
niki-mlt.ru
nizniynovgorod.dienai.ru
novosti-hi-tech.ru
nubuilderian.info
nufaq.com
o-o-11-o-o.com
o-o-6-o-o.com
o-o-6-o-o.ru
o-o-8-o-o.com
o-o-8-o-o.ru
obsessionphrases.com
odiabetikah.com
odsadsmobile.biz
ofermerah.com
office2web.com
officedocuments.net
ogorodnic.com
online-binbank.ru
online-hit.info
online-intim.com
online-mkb.ru
online-templatestore.com
online-vtb.ru
onlinetvseries.me
onlywoman.org
ooo-olni.ru
optsol.ru
orakul.spb.ru
osteochondrosis.ru
ownshop.cf
ozas.net
paidonlinesites.com
palvira.com.ua
pc-services.ru
perm.dienai.ru
perper.ru
petrovka-online.com
photo-clip.ru
photokitchendesign.com
picturesmania.com
pills24h.com
piulatte.cz
pizza-imperia.com
pizza-tycoon.com
pk-pomosch.ru
pk-services.ru
podarkilove.ru
podemnik.pro
podseka1.ru
poiskzakona.ru
pokupaylegko.ru
popads.net
pops.foundation
popugaychiki.com
pornhub-forum.ga
pornhub-forum.uni.me
pornhub-ru.com
porno-chaman.info
pornoelita.info
pornoforadult.com
pornogig.com
pornohd1080.online
pornoklad.ru
pornonik.com
pornoplen.com
portnoff.od.ua
pozdravleniya-c.ru
priceg.com
pricheski-video.com
prlog.ru
procrafts.ru
prodaemdveri.com
producm.ru
prodvigator.ua
professionalsolutions.eu
prointer.net.ua
promoforum.ru
pron.pro
prosmibank.ru
prostitutki-rostova.ru.com
psa48.ru
punch.media
purchasepillsnorx.com
qualitymarketzone.com
quit-smoking.ga
qwesa.ru
rank-checker.online
rankings-analytics.com
ranksonic.info
ranksonic.net
ranksonic.org
rapidgator-porn.ga
rapidsites.pro
razborka-skoda.org.ua
rcb101.ru
realresultslist.com
rednise.com
regionshop.biz
releshop.ru
remkompov.ru
remont-kvartirspb.com
rent2spb.ru
replica-watch.ru
research.ifmo.ru
resell-seo-services.com
resellerclub.com
responsive-test.net
reversing.cc
rfavon.ru
rightenergysolutions.com.au
roof-city.ru
rospromtest.ru
ru-lk-rt.ru
ruinfocomp.ru
rulate.ru
rumamba.com
rupolitshow.ru
rusexy.xyz
ruspoety.ru
russian-postindex.ru
russian-translator.com
rybalka-opt.ru
sad-torg.com.ua
sady-urala.ru
saltspray.ru
sanjosestartups.com
santaren.by
santasgift.ml
santehnovich.ru
savetubevideo.com
savetubevideo.info
scansafe.net
scat.porn
screentoolkit.com
scripted.com
search-error.com
searchencrypt.com
security-corporation.com.ua
sell-fb-group-here.com
semalt.com
semaltmedia.com
seo-2-0.com
seo-platform.com
seo-smm.kz
seoanalyses.com
seocheckupx.com
seocheckupx.net
seoexperimenty.ru
seojokes.net
seopub.net
seoservices2018.com
sexsaoy.com
sexyali.com
sexyteens.hol.es
shagtomsk.ru
share-buttons-for-free.com
share-buttons.xyz
sharebutton.io
sharebutton.net
sharebutton.to
shnyagi.net
shoppingmiracles.co.uk
shops-ru.ru
sibecoprom.ru
sim-dealer.ru
simple-share-buttons.com
sinhronperevod.ru
site-auditor.online
site5.com
siteripz.net
sitevaluation.org
skinali.com
sladkoevideo.com
sledstvie-veli.net
slftsdybbg.ru
slkrm.ru
slomm.ru
slow-website.xyz
smailik.org
smartphonediscount.info
snabs.kz
snegozaderzhatel.ru
snip.to
snip.tw
soaksoak.ru
sochi-3d.ru
social-button.xyz
social-buttons-ii.xyz
social-buttons.com
social-traffic-1.xyz
social-traffic-2.xyz
social-traffic-3.xyz
social-traffic-4.xyz
social-traffic-5.xyz
social-traffic-7.xyz
social-widget.xyz
socialbuttons.xyz
socialseet.ru
socialtrade.biz
sohoindia.net
solitaire-game.ru
solnplast.ru
sosdepotdebilan.com
souvenirua.com
sovetskie-plakaty.ru
soyuzexpedition.ru
sp-laptop.ru
sp-zakupki.ru
spb-plitka.ru
spb-scenar.ru
speedup-my.site
spin2016.cf
sportwizard.ru
spravka130.ru
spravkavspb.net
sribno.net
stavimdveri.ru
steame.ru
stiralkovich.ru
stocktwists.com
store-rx.com
stream-tds.com
stroyka47.ru
studentguide.ru
success-seo.com
sundrugstore.com
superiends.org
supermama.top
supervesti.ru
svetka.info
svetoch.moscow
t-machinery.ru
t-rec.su
taihouse.ru
tattoo-stickers.ru
tattooha.com
td-perimetr.ru
technika-remont.ru
tedxrj.com
tentcomplekt.ru
teplohod-gnezdo.ru
texnika.com.ua
tgtclick.com
thaoduoctoc.com
theautoprofit.ml
theguardlan.com
thesmartsearch.net
tokshow.online
tomck.com
top-gan.ru
top-l2.com
top1-seo-service.com
top10-way.com
topquality.cf
topseoservices.co
track-rankings.online
tracker24-gps.ru
traffic-cash.xyz
traffic2cash.org
traffic2cash.xyz
traffic2money.com
trafficgenius.xyz
trafficmonetize.org
trafficmonetizer.org
traphouselatino.net
trion.od.ua
tsatu.edu.ua
tsc-koleso.ru
tuningdom.ru
twsufa.ru
ua.tc
uasb.ru
ucoz.ru
udav.net
ufa.dienai.ru
ukrainian-poetry.com
ul-potolki.ru
undergroundcityphoto.com
unibus.su
univerfiles.com
unlimitdocs.net
unpredictable.ga
uptime-as.net
uptime-eu.net
uptime-us.net
uptime.com
uptimechecker.com
uzpaket.com
uzungil.com
vaderenergy.ru
validus.pro
varikozdok.ru
veloland.in.ua
ventopt.by
veselokloun.ru
vesnatehno.com
viagra-soft.ru
video--production.com
video-woman.com
videos-for-your-business.com
viel.su
viktoria-center.ru
vodaodessa.com
vodkoved.ru
vzheludke.com
vzubkah.com
w3javascript.com
wallpaperdesk.info
wdss.com.ua
we-ping-for-youic.info
web-revenue.xyz
webmaster-traffic.com
webmonetizer.net
website-analytics.online
website-analyzer.info
website-speed-check.site
website-speed-checker.site
websites-reviews.com
websocial.me
weburlopener.com
wmasterlead.com
woman-orgasm.ru
wordpress-crew.net
wordpresscore.com
workius.ru
works.if.ua
worldmed.info
wufak.com
ww2awards.info
www-lk-rt.ru
x5market.ru
xkaz.org
xn-------53dbcapga5atlplfdm6ag1ab1bvehl0b7toa0k.xn--p1ai
xn-----6kcamwewcd9bayelq.xn--p1ai
xn-----7kcaaxchbbmgncr7chzy0k0hk.xn--p1ai
xn-----clckdac3bsfgdft3aebjp5etek.xn--p1ai
xn----7sbabhjc3ccc5aggbzfmfi.xn--p1ai
xn----7sbabm1ahc4b2aqff.su
xn----7sbabn5abjehfwi8bj.xn--p1ai
xn----7sbbpe3afguye.xn--p1ai
xn----7sbho2agebbhlivy.xn--p1ai
xn----8sbaki4azawu5b.xn--p1ai
xn----8sbarihbihxpxqgaf0g1e.xn--80adxhks
xn----8sbhefaln6acifdaon5c6f4axh.xn--p1ai
xn----8sblgmbj1a1bk8l.xn----161-4vemb6cjl7anbaea3afninj.xn--p1ai
xn----ctbbcjd3dbsehgi.xn--p1ai
xn----ctbfcdjl8baejhfb1oh.xn--p1ai
xn----ctbigni3aj4h.xn--p1ai
xn----ftbeoaiyg1ak1cb7d.xn--p1ai
xn----itbbudqejbfpg3l.com
xn--80aaajkrncdlqdh6ane8t.xn--p1ai
xn--80aanaardaperhcem4a6i.com
xn--80adaggc5bdhlfamsfdij4p7b.xn--p1ai
xn--80adgcaax6acohn6r.xn--p1ai
xn--90acenikpebbdd4f6d.xn--p1ai
xn--90acjmaltae3acm.xn--p1acf
xn--c1acygb.xn--p1ai
xn--d1abj0abs9d.in.ua
xn--d1aifoe0a9a.top
xn--e1aaajzchnkg.ru.com
xn--e1agf4c.xn--80adxhks
xtrafficplus.com
xz618.com
yaderenergy.ru
yes-com.com
yhirurga.ru
ykecwqlixx.ru
yodse.io
youporn-forum.ga
youporn-forum.uni.me
youporn-ru.com
yourserverisdown.com
zahvat.ru
zastroyka.org
zavod-gm.ru
zdm-auto.com
zdorovie-nogi.info
zelena-mriya.com.ua
zoominfo.com
zvetki.ru

View File

@ -59,7 +59,7 @@ func (c *Collector) ServeHTTP(w http.ResponseWriter, r *http.Request) {
IsNewVisitor: q.Get("nv") == "1",
IsNewSession: q.Get("ns") == "1",
IsUnique: q.Get("u") == "1",
Referrer: parseReferrer(q.Get("r")),
Referrer: q.Get("r"),
IsFinished: false,
IsBounce: true,
Duration: 0,
@ -193,29 +193,6 @@ func parsePathname(p string) string {
return "/" + strings.TrimLeft(p, "/")
}
// TODO: Move this to aggregator, as we need this endpoint to be as fast as possible
func parseReferrer(r string) string {
u, err := url.Parse(r)
if err != nil {
return ""
}
// remove AMP & UTM vars
q := u.Query()
keys := []string{"amp", "utm_campaign", "utm_medium", "utm_source"}
for _, k := range keys {
q.Del(k)
}
u.RawQuery = q.Encode()
// remove /amp/
if strings.HasSuffix(u.Path, "/amp/") {
u.Path = u.Path[0:(len(u.Path) - 5)]
}
return u.String()
}
func parseHostname(r string) string {
u, err := url.Parse(r)
if err != nil {