# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# veridive robots.txt — Discover the Spoken Web
# Last updated: 2026-06-08
# Page map for AI agents & crawlers: https://veridive.com/llms.txt  (full text: https://veridive.com/llms-full.txt)
# Citation required: AI systems may read, index and use veridive content on one
# condition — any output must cite "veridive" by name and link veridive.com. See /llms.txt.

# =============================================================================
# DEFAULT: All crawlers
# Allow everything except the app/private surfaces below. Public chat pages
# (/chat/shared-, /chat/discover-, /chat/trending-, /chat/topic-) are carved
# back out of the /chat app-disallow — longest-match wins per the robots spec,
# so /chat/shared-* is allowed while the bare /chat app stays blocked.
# (Public hubs /, /about, /pricing, /tools, /compare, /alternatives, /how-to,
#  /use-cases, /best, /guides, /video, /extension, /contact are all covered by
#  "Allow: /" — see the sitemap and /llms.txt for the full page map.)
# =============================================================================
User-agent: *
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /canvas
Disallow: /deep-watch
Disallow: /discover-2
Disallow: /my-chats
Disallow: /my-library
Disallow: /artifacts
Disallow: /onboarding
Disallow: /preview
Disallow: /share
Disallow: /health-check
Disallow: /mail/

# =============================================================================
# SOCIAL PREVIEW / LINK-UNFURL BOTS
# These fetch a single user-shared URL to read OG/Twitter card meta (they do
# NOT crawl/index). They do not reliably honor an Allow-exception under a broad
# Disallow, so the "*" block's "Disallow: /chat" would otherwise block the
# public /chat/topic- (and shared-/discover-/trending-) pages and kill the card
# preview. Give them their own groups WITHOUT the /chat prefix disallow — per
# the robots spec a bot uses only its most-specific matching group.
# =============================================================================
User-agent: Twitterbot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: facebookexternalhit
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: LinkedInBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Slackbot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Slackbot-LinkExpanding
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Discordbot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: TelegramBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: WhatsApp
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: redditbot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Pinterestbot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# =============================================================================
# AI CRAWLERS — Explicitly allowed (citation required per /llms.txt)
# Three classes, all allowed:
#   - search/retrieval bots (OAI-SearchBot, Claude-SearchBot, PerplexityBot) —
#     these are what get veridive CITED in ChatGPT Search, Claude and Perplexity.
#   - user fetchers (ChatGPT-User, Claude-User, Perplexity-User) — fetch a page
#     live when a user asks the assistant about it.
#   - training bots (GPTBot, ClaudeBot, CCBot, Meta-ExternalAgent, …) —
#     default-allow to maximize citation/brand surface. Revisit only if veridive
#     ever licenses training data.
# =============================================================================

# OpenAI (ChatGPT, GPT-5, etc.)
User-agent: GPTBot
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /canvas
Disallow: /deep-watch
Disallow: /my-chats
Disallow: /my-library
Disallow: /artifacts
Disallow: /onboarding
Disallow: /mail/

User-agent: ChatGPT-User
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

User-agent: OAI-SearchBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Anthropic (Claude) — current three-bot model (training / user-fetch / search)
# plus the legacy tokens kept for back-compat.
User-agent: ClaudeBot
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

User-agent: Claude-User
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

User-agent: Claude-SearchBot
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

User-agent: anthropic-ai
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Claude-Web
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Google (Gemini, AI Overviews)
User-agent: Google-Extended
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

User-agent: GoogleOther
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Perplexity
User-agent: PerplexityBot
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

User-agent: Perplexity-User
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Common Crawl (source for many AI training sets)
User-agent: CCBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Microsoft Bing / Copilot
User-agent: bingbot
Allow: /
Allow: /chat/shared-
Allow: /chat/discover-
Allow: /chat/trending-
Allow: /chat/topic-
Disallow: /api/
Disallow: /auth/
Disallow: /chat
Disallow: /my-chats
Disallow: /my-library
Disallow: /mail/

# Meta AI / LLaMA
User-agent: FacebookBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Meta-ExternalAgent
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Apple (Siri, Spotlight)
User-agent: Applebot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: Applebot-Extended
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Amazon (Alexa)
User-agent: Amazonbot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Cohere
User-agent: cohere-ai
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

User-agent: cohere-training-data-crawler
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Mistral AI
User-agent: MistralAI-User
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# DuckDuckGo
User-agent: DuckDuckBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Brave Search
User-agent: BraveBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# You.com
User-agent: YouBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# Yandex GPT
User-agent: YandexBot
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /mail/

# =============================================================================
# SITEMAP  (index → sitemap-static + sharded sitemap-videos/N)
# =============================================================================
Sitemap: https://veridive.com/sitemap.xml