# robots.txt for wal.sh # https://www.robotstxt.org/ User-agent: * Allow: / # Sitemap Sitemap: https://wal.sh/sitemap.xml # Crawl-delay for aggressive bots User-agent: SemrushBot Disallow: /research/bots/lab/disallow-SemrushBot/ Crawl-delay: 10 User-agent: AhrefsBot Disallow: /research/bots/lab/disallow-AhrefsBot/ Crawl-delay: 10 User-agent: MJ12bot Disallow: /*?C= Crawl-delay: 60 User-agent: Amazonbot Crawl-delay: 60 User-agent: Bytespider Crawl-delay: 30 # Our own bot honors robots.txt — sites can block us with: # User-agent: Walsh-Research # Disallow: / # See: https://wal.sh/bot/ User-agent: Walsh-Research Disallow: /research/bots/dogfood-disallow Allow: /research/bots/dogfood-allow Allow: /research/bots/dogfood-walsh-only Crawl-delay: 2 # Webring parametric URLs (w=, t=, d=) are allowed — the honeypot works # via torus topology, not robots.txt blocking. # Also denies the dogfood "walsh-only" canary for generic bots: the named # Walsh-Research group above Allows it, so only our bot may fetch it -- the # bidirectional RFC 9309 group-selection fixture (walsh-research-test-fixtures). User-agent: * Disallow: /research/bots/dogfood-walsh-only # ── Bot compliance lab canaries ── # Each bot has a disallowed page. If it fetches the page, it violated exclusion. # Monitor: grep "lab/disallow" in access logs. User-agent: GPTBot Disallow: /research/bots/lab/disallow-GPTBot/ User-agent: OAI-SearchBot Disallow: /research/bots/lab/disallow-OAI-SearchBot/ User-agent: ChatGPT-User Disallow: /research/bots/lab/disallow-ChatGPT-User/ User-agent: ClaudeBot Disallow: /research/bots/lab/disallow-ClaudeBot/ User-agent: anthropic-ai Disallow: /research/bots/lab/disallow-anthropic-ai/ User-agent: PerplexityBot Disallow: /research/bots/lab/disallow-PerplexityBot/ User-agent: Perplexity-User Disallow: /research/bots/lab/disallow-Perplexity-User/ User-agent: Google-Extended Disallow: /research/bots/lab/disallow-Google-Extended/ User-agent: Amazonbot Disallow: /research/bots/lab/disallow-Amazonbot/ User-agent: Applebot Disallow: /research/bots/lab/disallow-Applebot/ User-agent: Applebot-Extended Disallow: /research/bots/lab/disallow-Applebot-Extended/ User-agent: DataForSeoBot Disallow: /research/bots/lab/disallow-DataForSeoBot/ User-agent: meta-externalagent Disallow: /research/bots/lab/disallow-meta-externalagent/ User-agent: Bytespider Disallow: /research/bots/lab/disallow-Bytespider/ User-agent: DuckAssistBot Disallow: /research/bots/lab/disallow-DuckAssistBot/ User-agent: cohere-ai Disallow: /research/bots/lab/disallow-cohere-ai/ User-agent: CCBot Disallow: /research/bots/lab/disallow-CCBot/ User-agent: AI2Bot Disallow: /research/bots/lab/disallow-AI2Bot/ User-agent: Diffbot Disallow: /research/bots/lab/disallow-Diffbot/ User-agent: YouBot Disallow: /research/bots/lab/disallow-YouBot/ User-agent: MistralAI-User Disallow: /research/bots/lab/disallow-MistralAI-User/ User-agent: LinerBot Disallow: /research/bots/lab/disallow-LinerBot/ User-agent: PetalBot Disallow: /research/bots/lab/disallow-PetalBot/ User-agent: newsai Disallow: /research/bots/lab/disallow-newsai/ User-agent: Qwantbot Disallow: /research/bots/lab/disallow-Qwantbot/ User-agent: SERankingBacklinksBot Disallow: /research/bots/lab/disallow-SERankingBacklinksBot/ User-agent: SeznamBot Disallow: /research/bots/lab/disallow-SeznamBot/ User-agent: YandexBot Disallow: /research/bots/lab/disallow-YandexBot/