# Robots.txt for QuickAnalytics - Data Analytics Solutions UAE
# This file provides directives for search engine crawlers
# Learn more: https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt

# Default rule for all web crawlers
User-agent: *
Allow: /

# Sitemap location - helps search engines discover all pages
Sitemap: https://quickanalytics.ae/sitemap.xml
Sitemap: https://quickanalytics.ae/sitemap-images.xml
Sitemap: https://quickanalytics.ae/sitemap-news.xml

# Crawl delay to be respectful to server resources (1 second)
Crawl-delay: 1

# Disallow private and administrative areas
Disallow: /admin/
Disallow: /api/
Disallow: /crm/
Disallow: /_*
Disallow: /*.json$
Disallow: /404
Disallow: /500
Disallow: /auth/
Disallow: /callback
Disallow: /login
Disallow: /logout

# Disallow URL parameters that create duplicate content
Disallow: /*?*utm_source=*
Disallow: /*?*utm_medium=*
Disallow: /*?*utm_campaign=*
Disallow: /*?*utm_content=*
Disallow: /*?*utm_term=*
Disallow: /*?*ref=*
Disallow: /*?*source=*
Disallow: /*?*fbclid=*
Disallow: /*?*gclid=*

# Explicitly allow important public directories
Allow: /services/
Allow: /industries/
Allow: /resources/
Allow: /portfolio/
Allow: /about
Allow: /contact
Allow: /privacy
Allow: /terms
Allow: /cookies
Allow: /sitemap

# Allow CSS, JS, and media files for proper rendering
Allow: /*.css$
Allow: /*.js$
Allow: /*.png$
Allow: /*.jpg$
Allow: /*.jpeg$
Allow: /*.gif$
Allow: /*.webp$
Allow: /*.svg$
Allow: /*.ico$
Allow: /*.pdf$

# Specific directives for major search engines
User-agent: Googlebot
Allow: /
Crawl-delay: 1

User-agent: Bingbot
Allow: /
Crawl-delay: 2

User-agent: Slurp
Allow: /
Crawl-delay: 2

User-agent: DuckDuckBot
Allow: /
Crawl-delay: 1

User-agent: Baiduspider
Allow: /
Crawl-delay: 3

User-agent: YandexBot
Allow: /
Crawl-delay: 2

# Social media crawlers for link previews
User-agent: facebookexternalhit
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

User-agent: WhatsApp
Allow: /

# SEO and analysis tools
User-agent: SemrushBot
Allow: /
Crawl-delay: 5

User-agent: AhrefsBot
Allow: /
Crawl-delay: 10

User-agent: MJ12bot
Allow: /
Crawl-delay: 10

# Block aggressive or unwanted crawlers
User-agent: SemrushBot-SA
Disallow: /

User-agent: MegaIndex
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: Google-Extended
Disallow: /

# Archive crawlers
User-agent: ia_archiver
Allow: /

User-agent: archive.org_bot
Allow: /

# Additional notes:
# - This robots.txt follows best practices for SEO and crawler management
# - All important content pages are explicitly allowed for search engine indexing
# - Private areas and duplicate content are properly blocked
# - Crawl delays are set to balance SEO discovery with server performance
# - Major search engines and social media crawlers are accommodated
# - AI training bots are blocked to protect content