Skip to content

Commit 2b125c8

Browse files
committed
[submodule:extensions] Add new bots: AI2Bot, aiHitBot, anthropic-ai, cohere-ai, Diffbot, ImagesiftBot, magpie-crawler, Omgilibot, Screaming Frog SEO Spider, Seznambot, Teoma, Timpibot, VelenPublicWebCrawler, Vercelbot, Webzio-Extended, YouBot
1 parent 2181559 commit 2b125c8

File tree

3 files changed

+141
-8
lines changed

3 files changed

+141
-8
lines changed

src/extensions/ua-parser-extensions.js

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ const Crawlers = Object.freeze({
5353
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
5454
// PerplexityBot - https://perplexity.ai/perplexitybot
5555
// SemrushBot - http://www.semrush.com/bot.html
56-
/((?:ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush)bot)\/([\w\.]+)/i,
56+
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
57+
/((?:ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
5758

5859
// Applebot - http://apple.com/go/applebot
5960
/(applebot(?:-extended)?)\/([\w\.]+)/i,
@@ -62,7 +63,7 @@ const Crawlers = Object.freeze({
6263
/(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i,
6364

6465
// ClaudeBot (Anthropic)
65-
/(claude(?:bot|-web))\/([\w\.]+)/i,
66+
/(claude(?:bot|-web)|anthropic-ai)\/?([\w\.]*)/i,
6667

6768
// Coc Coc Bot - https://help.coccoc.com/en/search-engine
6869
/(coccocbot-(?:image|web))\/([\w\.]+)/i,
@@ -89,8 +90,8 @@ const Crawlers = Object.freeze({
8990
// Yeti (Naver)
9091
/(yeti)\/([\w\.]+)/i,
9192

92-
// YisouSpider
93-
/(yisouspider)\/?([\w\.]*)/i
93+
// aiHitBot / Cohere-AI / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
94+
/((?:aihit|diff|timpi|you)bot|cohere-ai|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
9495
],
9596

9697
[NAME, VERSION, [TYPE, CRAWLER]],
@@ -99,13 +100,15 @@ const Crawlers = Object.freeze({
99100
// Google Bots
100101
/((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i,
101102

103+
// AI2Bot - https://allenai.org/crawler
102104
// Bytespider
103105
// DataForSeoBot - https://dataforseo.com/dataforseo-bot
104106
// Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot
107+
// ImagesiftBot - https://imagesift.com/about
105108
// Qihoo 360Spider
106109
// TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html
107110
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
108-
/(360spider-?(?:image|video)?|bytespider|(?:aspiegel|dataforseo|petal|turnitin)bot|(?=yahoo! )slurp)/i
111+
/\b(360spider-?(?:image|video)?|bytespider|(?:ai2|aspiegel|dataforseo|imagesift|petal|turnitin)bot|teoma|(?=yahoo! )slurp)/i
109112
],
110113
[NAME, [TYPE, CRAWLER]]
111114
]
@@ -238,8 +241,8 @@ const Fetchers = Object.freeze({
238241
],
239242
[NAME, VERSION, [TYPE, FETCHER]],
240243

241-
// Google Bots / Snapchat
242-
[/(feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
244+
// Google Bots / Snapchat / Vercelbot
245+
[/(vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
243246
[NAME, [TYPE, FETCHER]],
244247
]
245248
});

test/specs/browser-crawlers.json

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@
4949
"type" : "crawler"
5050
}
5151
},
52+
{
53+
"desc" : "AI2Bot",
54+
"ua" : "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)",
55+
"expect" :
56+
{
57+
"name" : "AI2Bot",
58+
"version" : "undefined",
59+
"type" : "crawler"
60+
}
61+
},
62+
{
63+
"desc" : "aiHitBot",
64+
"ua" : "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)",
65+
"expect" :
66+
{
67+
"name" : "aiHitBot",
68+
"version" : "2.9",
69+
"type" : "crawler"
70+
}
71+
},
5272
{
5373
"desc" : "Applebot",
5474
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)",
@@ -131,14 +151,24 @@
131151
},
132152
{
133153
"desc" : "DataForSEO",
134-
"ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot) ",
154+
"ua" : "Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)",
135155
"expect" :
136156
{
137157
"name" : "DataForSeoBot",
138158
"version" : "undefined",
139159
"type" : "crawler"
140160
}
141161
},
162+
{
163+
"desc" : "Diffbot",
164+
"ua" : "Diffbot/0.1",
165+
"expect" :
166+
{
167+
"name" : "Diffbot",
168+
"version" : "0.1",
169+
"type" : "crawler"
170+
}
171+
},
142172
{
143173
"desc" : "Dotbot",
144174
"ua" : "Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)",
@@ -329,6 +359,26 @@
329359
"type" : "crawler"
330360
}
331361
},
362+
{
363+
"desc" : "ImagesiftBot",
364+
"ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)",
365+
"expect" :
366+
{
367+
"name" : "ImagesiftBot",
368+
"version" : "undefined",
369+
"type" : "crawler"
370+
}
371+
},
372+
{
373+
"desc" : "magpie-crawler",
374+
"ua" : "magpie-crawler/1.1 (robots-txt-checker; +http://www.brandwatch.net)",
375+
"expect" :
376+
{
377+
"name" : "magpie-crawler",
378+
"version" : "1.1",
379+
"type" : "crawler"
380+
}
381+
},
332382
{
333383
"desc" : "Meta-ExternalAgent",
334384
"ua" : "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)",
@@ -360,6 +410,26 @@
360410
"type" : "crawler"
361411
}
362412
},
413+
{
414+
"desc" : "Omgili",
415+
"ua" : "omgili/0.5 +https://omgili.com",
416+
"expect" :
417+
{
418+
"name" : "omgili",
419+
"version" : "0.5",
420+
"type" : "crawler"
421+
}
422+
},
423+
{
424+
"desc" : "Omgilibot",
425+
"ua" : "omgilibot/0.3 +http://www.omgili.com/Crawler.html",
426+
"expect" :
427+
{
428+
"name" : "omgilibot",
429+
"version" : "0.3",
430+
"type" : "crawler"
431+
}
432+
},
363433
{
364434
"desc" : "OpenAI Search",
365435
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot",
@@ -410,6 +480,36 @@
410480
"type" : "crawler"
411481
}
412482
},
483+
{
484+
"desc" : "SeznamBot",
485+
"ua" : "Mozilla/5.0 (compatible; SeznamBot/4.0-RC1; +http://napoveda.seznam.cz/seznambot-intro/)",
486+
"expect" :
487+
{
488+
"name" : "SeznamBot",
489+
"version" : "4.0-RC1",
490+
"type" : "crawler"
491+
}
492+
},
493+
{
494+
"desc" : "Teoma",
495+
"ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",
496+
"expect" :
497+
{
498+
"name" : "Teoma",
499+
"version" : "undefined",
500+
"type" : "crawler"
501+
}
502+
},
503+
{
504+
"desc" : "Timpibot",
505+
"ua" : "Timpibot/0.8 (+http://www.timpi.io)",
506+
"expect" :
507+
{
508+
"name" : "Timpibot",
509+
"version" : "0.8",
510+
"type" : "crawler"
511+
}
512+
},
413513
{
414514
"desc" : "TurnitinBot",
415515
"ua" : "TurnitinBot (https://turnitin.com/robot/crawlerinfo.html)",
@@ -420,6 +520,16 @@
420520
"type" : "crawler"
421521
}
422522
},
523+
{
524+
"desc" : "VelenPublicWebCrawler",
525+
"ua" : "Mozilla/5.0 (compatible; VelenPublicWebCrawler/1.0; +https://velen.io)",
526+
"expect" :
527+
{
528+
"name" : "VelenPublicWebCrawler",
529+
"version" : "1.0",
530+
"type" : "crawler"
531+
}
532+
},
423533
{
424534
"desc" : "Yahoo! Japan",
425535
"ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)",
@@ -469,5 +579,15 @@
469579
"version" : "undefined",
470580
"type" : "crawler"
471581
}
582+
},
583+
{
584+
"desc" : "YouBot",
585+
"ua" : "YouBot (+http://www.you.com)",
586+
"expect" :
587+
{
588+
"name" : "YouBot",
589+
"version" : "undefined",
590+
"type" : "crawler"
591+
}
472592
}
473593
]

test/specs/browser-fetchers.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,5 +118,15 @@
118118
"version" : "2.0",
119119
"type" : "fetcher"
120120
}
121+
},
122+
{
123+
"desc" : "Vercelbot",
124+
"ua" : "Vercelbot (+https://vercel.com)",
125+
"expect" :
126+
{
127+
"name" : "Vercelbot",
128+
"version" : "undefined",
129+
"type" : "fetcher"
130+
}
121131
}
122132
]

0 commit comments

Comments
 (0)