Skip to content

Commit a9b31c0

Browse files
committed
[extensions] Add bots: AdIdxBot, Linespider, LinkedInBot, MicrosoftPreview, OpenAI Image Downloader
1 parent 8a05328 commit a9b31c0

File tree

3 files changed

+202
-21
lines changed

3 files changed

+202
-21
lines changed

src/extensions/ua-parser-extensions.js

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,25 @@ const Crawlers = Object.freeze({
4242
[
4343
// AhrefsBot - https://ahrefs.com/robot
4444
// Amazonbot - https://developer.amazon.com/amazonbot
45-
// Bingbot - http://www.bing.com/bingbot.htm
45+
// Bingbot / AdIdxBot - https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0
4646
// CCBot - https://commoncrawl.org/faq
4747
// Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot
4848
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html
4949
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/
5050
// GPTBot - https://platform.openai.com/docs/gptbot
51+
// LinkedInBot - http://www.linkedin.com
5152
// MJ12bot - https://mj12bot.com/
5253
// MojeekBot - https://www.mojeek.com/bot.html
5354
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
5455
// PerplexityBot - https://perplexity.ai/perplexitybot
5556
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
56-
/((?:ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
57+
/((?:adidx|ahrefs|amazon|bing|cc|dot|duckduck|exa|facebook|gpt|linkedin|mj12|mojeek|oai-search|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
5758

5859
// Applebot - http://apple.com/go/applebot
59-
/(applebot(?:-extended)?)\/([\w\.]+)/i,
60+
/(applebot(?:-extended)?)\/?([\w\.]*)/i,
6061

6162
// Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001
62-
/(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i,
63+
/(baiduspider[-imagevdonwsfcpr]{0,7})\/?([\w\.]*)/i,
6364

6465
// ClaudeBot (Anthropic)
6566
/(claude(?:bot|-web)|anthropic-ai)\/?([\w\.]*)/i,
@@ -92,8 +93,8 @@ const Crawlers = Object.freeze({
9293
// Yeti (Naver)
9394
/(yeti)\/([\w\.]+)/i,
9495

95-
// aiHitBot / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
96-
/((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
96+
// aiHitBot / Diffbot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
97+
/((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|openai image downloader|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |line|yisou)spider)\/?([\w\.]*)/i
9798
],
9899

99100
[NAME, VERSION, [TYPE, CRAWLER]],
@@ -219,10 +220,10 @@ const Fetchers = Object.freeze({
219220
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
220221
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot
221222
// DuckAssistBot - https://duckduckgo.com/duckassistbot/
222-
// Better Uptime / BingPreview / Mastodon / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
223+
// Better Uptime / BingPreview / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
223224
// Google Site Verifier / Meta / Yahoo! Japan
224225
// Yandex Bots - https://yandex.com/bots
225-
/(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
226+
/(ahrefssiteaudit|(?:bing|microsoft)preview|chatgpt-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero)bot|google-site-verification|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
226227

227228
// Bluesky
228229
/(bluesky) cardyb\/([\w\.]+)/i,

test/data/ua/extension/crawler.json

Lines changed: 173 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@
99
"type" : "crawler"
1010
}
1111
},
12+
{
13+
"desc" : "AdIdxBot",
14+
"ua" : "Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)",
15+
"expect" :
16+
{
17+
"name" : "adidxbot",
18+
"version" : "2.0",
19+
"type" : "crawler"
20+
}
21+
},
1222
{
1323
"desc" : "AdsBot Mobile Web",
1424
"ua" : "AdsBot-Google (+http://www.google.com/adsbot.html)",
@@ -79,6 +89,16 @@
7989
"type" : "crawler"
8090
}
8191
},
92+
{
93+
"desc" : "Applebot-Extended",
94+
"ua" : "Applebot-Extended",
95+
"expect" :
96+
{
97+
"name" : "Applebot-Extended",
98+
"version" : "undefined",
99+
"type" : "crawler"
100+
}
101+
},
82102
{
83103
"desc" : "Amazonbot",
84104
"ua" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)",
@@ -89,6 +109,126 @@
89109
"type" : "crawler"
90110
}
91111
},
112+
{
113+
"desc" : "Anthropic AI",
114+
"ua" : "anthropic-ai",
115+
"expect" :
116+
{
117+
"name" : "anthropic-ai",
118+
"version" : "undefined",
119+
"type" : "crawler"
120+
}
121+
},
122+
{
123+
"desc" : "Archive.org Bot",
124+
"ua" : "ia_archiver/8.1 (Windows 2000 1.9; en-US;)",
125+
"expect" :
126+
{
127+
"name" : "ia_archiver",
128+
"version" : "8.1",
129+
"type" : "crawler"
130+
}
131+
},
132+
{
133+
"desc" : "Archive.org Bot",
134+
"ua" : "Mozilla/5.0 (compatible; archive.org_bot/3.3.0 +https://archive.org/details/archive.org_bot)",
135+
"expect" :
136+
{
137+
"name" : "archive.org_bot",
138+
"version" : "3.3.0",
139+
"type" : "crawler"
140+
}
141+
},
142+
{
143+
"desc" : "Baiduspider",
144+
"ua" : "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
145+
"expect" :
146+
{
147+
"name" : "Baiduspider",
148+
"version" : "2.0",
149+
"type" : "crawler"
150+
}
151+
},
152+
{
153+
"desc" : "Baiduspider-ads",
154+
"ua" : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0; Baiduspider-ads) Gecko/17.0 Firefox/17.0",
155+
"expect" :
156+
{
157+
"name" : "Baiduspider-ads",
158+
"version" : "undefined",
159+
"type" : "crawler"
160+
}
161+
},
162+
{
163+
"desc" : "Baiduspider-cpro",
164+
"ua" : "Mozilla/5.0 (compatible; Baiduspider-cpro; +http://www.baidu.com/search/spider.html)",
165+
"expect" :
166+
{
167+
"name" : "Baiduspider-cpro",
168+
"version" : "undefined",
169+
"type" : "crawler"
170+
}
171+
},
172+
{
173+
"desc" : "Baiduspider-favo",
174+
"ua" : "Baiduspider-favo",
175+
"expect" :
176+
{
177+
"name" : "Baiduspider-favo",
178+
"version" : "undefined",
179+
"type" : "crawler"
180+
}
181+
},
182+
{
183+
"desc" : "Baiduspider-image",
184+
"ua" : "Baiduspider-image+(+http://www.baidu.com/search/spider.htm)",
185+
"expect" :
186+
{
187+
"name" : "Baiduspider-image",
188+
"version" : "undefined",
189+
"type" : "crawler"
190+
}
191+
},
192+
{
193+
"desc" : "Baiduspider-news",
194+
"ua" : "Baiduspider-news",
195+
"expect" :
196+
{
197+
"name" : "Baiduspider-news",
198+
"version" : "undefined",
199+
"type" : "crawler"
200+
}
201+
},
202+
{
203+
"desc" : "Baiduspider-render",
204+
"ua" : "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)",
205+
"expect" :
206+
{
207+
"name" : "Baiduspider-render",
208+
"version" : "2.0",
209+
"type" : "crawler"
210+
}
211+
},
212+
{
213+
"desc" : "Baiduspider-video",
214+
"ua" : "Baiduspider-video",
215+
"expect" :
216+
{
217+
"name" : "Baiduspider-video",
218+
"version" : "undefined",
219+
"type" : "crawler"
220+
}
221+
},
222+
{
223+
"desc" : "Bingbot",
224+
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/",
225+
"expect" :
226+
{
227+
"name" : "bingbot",
228+
"version" : "2.0",
229+
"type" : "crawler"
230+
}
231+
},
92232
{
93233
"desc" : "Bytespider",
94234
"ua" : "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.1511.1269 Mobile Safari/537.36; Bytespider",
@@ -179,6 +319,16 @@
179319
"type" : "crawler"
180320
}
181321
},
322+
{
323+
"desc" : "DuckDuckBot",
324+
"ua" : "DuckDuckBot/1.1; ( http://duckduckgo.com/duckduckbot.html)",
325+
"expect" :
326+
{
327+
"name" : "DuckDuckBot",
328+
"version" : "1.1",
329+
"type" : "crawler"
330+
}
331+
},
182332
{
183333
"desc" : "Exabot",
184334
"ua" : "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)",
@@ -340,32 +490,32 @@
340490
}
341491
},
342492
{
343-
"desc" : "Archive.org Bot",
344-
"ua" : "ia_archiver/8.1 (Windows 2000 1.9; en-US;)",
493+
"desc" : "ImagesiftBot",
494+
"ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)",
345495
"expect" :
346496
{
347-
"name" : "ia_archiver",
348-
"version" : "8.1",
497+
"name" : "ImagesiftBot",
498+
"version" : "undefined",
349499
"type" : "crawler"
350500
}
351501
},
352502
{
353-
"desc" : "Archive.org Bot",
354-
"ua" : "Mozilla/5.0 (compatible; archive.org_bot/3.3.0 +https://archive.org/details/archive.org_bot)",
503+
"desc" : "Linespider",
504+
"ua" : "Mozilla/5.0 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)",
355505
"expect" :
356506
{
357-
"name" : "archive.org_bot",
358-
"version" : "3.3.0",
507+
"name" : "Linespider",
508+
"version" : "1.1",
359509
"type" : "crawler"
360510
}
361511
},
362512
{
363-
"desc" : "ImagesiftBot",
364-
"ua" : "Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)",
513+
"desc" : "LinkedInBot",
514+
"ua" : "LinkedInBot/1.0 (compatible; Mozilla/5.0; Apache-HttpClient +http://www.linkedin.com)",
365515
"expect" :
366516
{
367-
"name" : "ImagesiftBot",
368-
"version" : "undefined",
517+
"name" : "LinkedInBot",
518+
"version" : "1.0",
369519
"type" : "crawler"
370520
}
371521
},
@@ -462,7 +612,7 @@
462612
},
463613
{
464614
"desc" : "PetalBot",
465-
"ua" : "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot) ",
615+
"ua" : "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)",
466616
"expect" :
467617
{
468618
"name" : "PetalBot",
@@ -520,6 +670,16 @@
520670
"type" : "crawler"
521671
}
522672
},
673+
{
674+
"desc" : "Sogou",
675+
"ua" : "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
676+
"expect" :
677+
{
678+
"name" : "Sogou web spider",
679+
"version" : "4.0",
680+
"type" : "crawler"
681+
}
682+
},
523683
{
524684
"desc" : "Teoma",
525685
"ua" : "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",

test/data/ua/extension/fetcher.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,26 @@
139139
"type" : "fetcher"
140140
}
141141
},
142+
{
143+
"desc" : "MicrosoftPreview",
144+
"ua" : "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; MicrosoftPreview/2.0; +https://aka.ms/MicrosoftPreview) Chrome/W.X.Y.Z Safari/537.36",
145+
"expect" :
146+
{
147+
"name" : "MicrosoftPreview",
148+
"version" : "2.0",
149+
"type" : "fetcher"
150+
}
151+
},
152+
{
153+
"desc" : "Pinterestbot",
154+
"ua" : "Mozilla/5.0 (compatible; Pinterestbot/1.0; +http://www.pinterest.com/bot.html)",
155+
"expect" :
156+
{
157+
"name" : "Pinterestbot",
158+
"version" : "1.0",
159+
"type" : "fetcher"
160+
}
161+
},
142162
{
143163
"desc" : "Rogerbot",
144164
"ua" : "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)",

0 commit comments

Comments
 (0)