{"last_updated":"2025-03-01","total_known_bots":47,"bots":[{"name":"Applebot","company":"Apple","user_agent":"Applebot/0.1 (+http://www.apple.com/go/applebot)","respects_robots_txt":true,"compliance_rate":0.991,"estimated_daily_requests":82000000,"purpose":"Search index, Siri, Apple Intelligence training"},{"name":"GPTBot","company":"OpenAI","user_agent":"GPTBot/1.1 (+https://openai.com/gptbot)","respects_robots_txt":true,"compliance_rate":0.982,"estimated_daily_requests":65000000,"purpose":"GPT model training data collection"},{"name":"ClaudeBot","company":"Anthropic","user_agent":"ClaudeBot/0.1 (+https://anthropic.com/)","respects_robots_txt":true,"compliance_rate":0.979,"estimated_daily_requests":41000000,"purpose":"Claude AI training data"},{"name":"Google-Extended","company":"Google","user_agent":"Google-Extended/1.0","respects_robots_txt":true,"compliance_rate":0.997,"estimated_daily_requests":120000000,"purpose":"Gemini AI training data"},{"name":"CCBot","company":"Common Crawl","user_agent":"CCBot/2.0 (+https://commoncrawl.org/faq/)","respects_robots_txt":true,"compliance_rate":0.945,"estimated_daily_requests":200000000,"purpose":"Open web archive, widely used for LLM training"},{"name":"Bytespider","company":"ByteDance","user_agent":"Bytespider","respects_robots_txt":false,"compliance_rate":0.621,"estimated_daily_requests":180000000,"purpose":"TikTok AI, LLM training"},{"name":"meta-externalagent","company":"Meta","user_agent":"meta-externalagent/1.1","respects_robots_txt":true,"compliance_rate":0.971,"estimated_daily_requests":55000000,"purpose":"Meta AI (Llama) training data"},{"name":"PerplexityBot","company":"Perplexity","user_agent":"PerplexityBot/1.0","respects_robots_txt":true,"compliance_rate":0.988,"estimated_daily_requests":12000000,"purpose":"Perplexity AI search index"}]}