Skip to content

Commit 70b3003

Browse files
committed
[submodule:helpers] Add new method isAIBot(): detect AI bots
1 parent 5b375b9 commit 70b3003

File tree

5 files changed

+102
-6
lines changed

5 files changed

+102
-6
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,15 @@ see what's new & breaking.
9999
<td>✅</td>
100100
</tr>
101101
<tr>
102-
<td>Extras (Apps, Libs, Emails, Media Players, etc)</td>
102+
<td>AI Bot detection</td>
103+
<td>❌</td>
104+
<td>✅</td>
105+
<td>✅</td>
106+
<td>✅</td>
107+
<td>✅</td>
108+
</tr>
109+
<tr>
110+
<td>Extras (Apps, Libs, Emails, Media Players, etc) detection</td>
103111
<td>❌</td>
104112
<td>✅</td>
105113
<td>✅</td>

src/extensions/ua-parser-extensions.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ const Crawlers = Object.freeze({
9090
// Yeti (Naver)
9191
/(yeti)\/([\w\.]+)/i,
9292

93-
// aiHitBot / Cohere-AI / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
94-
/((?:aihit|diff|timpi|you)bot|cohere-ai|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
93+
// aiHitBot / Diffbot / Magpie-Crawler / Omgilibot / Webzio-Extended / Screaming Frog SEO Spider / Timpibot / VelenPublicWebCrawler / YisouSpider / YouBot
94+
/((?:aihit|diff|timpi|you)bot|omgili(?:bot)?|(?:magpie-|velenpublicweb)crawler|webzio-extended|(?:screaming frog seo |yisou)spider)\/?([\w\.]*)/i
9595
],
9696

9797
[NAME, VERSION, [TYPE, CRAWLER]],
@@ -241,8 +241,8 @@ const Fetchers = Object.freeze({
241241
],
242242
[NAME, VERSION, [TYPE, FETCHER]],
243243

244-
// Google Bots / Snapchat / Vercelbot
245-
[/(vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
244+
// Google Bots / Cohere / Snapchat / Vercelbot
245+
[/(cohere-ai|vercelbot|feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
246246
[NAME, [TYPE, FETCHER]],
247247
]
248248
});

src/helpers/ua-parser-helpers.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { IResult } from "../main/ua-parser";
66

77
declare function getDeviceVendor(model: string): string | undefined;
88
declare function isAppleSilicon(resultOrUA: IResult | string): boolean;
9+
declare function isAIBot(resultOrUA: IResult | string): boolean;
910
declare function isBot(resultOrUA: IResult | string): boolean;
1011
declare function isChromeFamily(resultOrUA: IResult | string): boolean;
1112
declare function isElectron(): boolean;
@@ -16,6 +17,7 @@ declare function isStandalonePWA(): boolean;
1617
export {
1718
getDeviceVendor,
1819
isAppleSilicon,
20+
isAIBot,
1921
isBot,
2022
isChromeFamily,
2123
isElectron,

src/helpers/ua-parser-helpers.js

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,77 @@ const isAppleSilicon = (resultOrUA) => {
4141
return false;
4242
}
4343

44+
const isAIBot = (resultOrUA) => [
45+
46+
// AI2
47+
'ai2bot',
48+
49+
// Amazon
50+
'amazonbot',
51+
52+
// Anthropic
53+
'anthropic-ai',
54+
'claude-web',
55+
'claudebot',
56+
57+
// Apple
58+
'applebot',
59+
'applebot-extended',
60+
61+
// ByteDance
62+
'bytespider',
63+
64+
// Common Crawl
65+
'ccbot',
66+
67+
// DataForSeo
68+
'dataforseobot',
69+
70+
// Diffbot
71+
'diffbot',
72+
73+
// Google
74+
'googleother',
75+
'googleother-image',
76+
'googleother-video',
77+
'google-extended',
78+
79+
// Hive AI
80+
'imagesiftbot',
81+
82+
// Huawei
83+
'petalbot',
84+
85+
// Meta
86+
'facebookbot',
87+
'meta-externalagent',
88+
89+
// OpenAI
90+
'gptbot',
91+
'oai-searchbot',
92+
93+
// Perplexity
94+
'perplexitybot',
95+
96+
// Timpi
97+
'timpibot',
98+
99+
// Velen.io
100+
'velenpublicwebcrawler',
101+
102+
// Webz.io
103+
'omgili',
104+
'omgilibot',
105+
'webzio-extended',
106+
107+
// You.com
108+
'youbot',
109+
110+
// Zyte
111+
'scrapy'
112+
113+
].includes(String(toResult(resultOrUA, Bots).browser.name).toLowerCase());
114+
44115
const isBot = (resultOrUA) => [
45116
'cli',
46117
'crawler',
@@ -56,6 +127,7 @@ const isElectron = () => !!(process?.versions?.hasOwnProperty('electron') ||
56127
module.exports = {
57128
getDeviceVendor,
58129
isAppleSilicon,
130+
isAIBot,
59131
isBot,
60132
isChromeFamily,
61133
isElectron,

test/mocha-test-helpers.js

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
const assert = require('assert');
22
const { UAParser } = require('../src/main/ua-parser');
3-
const { getDeviceVendor, isAppleSilicon, isBot, isChromeFamily } = require('../src/helpers/ua-parser-helpers');
3+
const { getDeviceVendor, isAppleSilicon, isAIBot, isBot, isChromeFamily } = require('../src/helpers/ua-parser-helpers');
44
const { Bots, Emails } = require('../src/extensions/ua-parser-extensions');
55

66
describe('getDeviceVendor', () => {
@@ -34,6 +34,20 @@ describe('isAppleSilicon', () => {
3434
});
3535
});
3636

37+
describe('isAIBot', () => {
38+
it('Can detect AI Bots', () => {
39+
40+
const claudeBot = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)';
41+
const firefox = 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0';
42+
const searchGPT = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot';
43+
44+
assert.equal(isAIBot(UAParser(claudeBot, Bots)), true);
45+
assert.equal(isAIBot(claudeBot), true);
46+
assert.equal(isAIBot(firefox), false);
47+
assert.equal(isAIBot(searchGPT), true);
48+
});
49+
});
50+
3751
describe('isBot', () => {
3852
it('Can detect Bots', () => {
3953

0 commit comments

Comments
 (0)