Eddie.surf API

AI-powered web crawling API that extracts structured data from websites

Base URL: https://api.eddie.surf

Quick Start

Get started in 2 minutes with basic company data extraction:

# Submit crawl job
curl -X POST https://api.eddie.surf/crawl \
  -H "X-API-Key: your-api-key-here" \
  -H "Content-Type: application/json" \
  -d '{
  "urls": ["https://data-surfer.com"],
  "context": {"purpose": "Company research"},
  "json": {
    "company_name": {
      "type": "string",
      "description": "Company name"
    }
  }
}'
const response = await fetch('https://api.eddie.surf/crawl', {
  method: 'POST',
  headers: { 
    'X-API-Key': 'your-api-key-here',
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    urls: ['https://data-surfer.com'],
    context: {purpose: 'Company research'},
    json: {
      company_name: {
        type: 'string',
        description: 'Company name'
      }
    }
  })
});

const data = await response.json();
console.log(data);
import requests

response = requests.post('https://api.eddie.surf/crawl', 
    headers={'X-API-Key': 'your-api-key-here'},
    json={
    'urls': ['https://data-surfer.com'],
    'context': {'purpose': 'Company research'},
    'json': {
        'company_name': {
            'type': 'string',
            'description': 'Company name'
        }
    }
})

data = response.json()
print(data)
require 'net/http'
require 'json'
require 'uri'

uri = URI('https://api.eddie.surf/crawl')
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Post.new(uri)
request['X-API-Key'] = 'your-api-key-here'
request['Content-Type'] = 'application/json'
request.body = {
  urls: ['https://data-surfer.com'],
  context: { purpose: 'Company research' },
  json: {
    company_name: {
      type: 'string',
      description: 'Company name'
    }
  }
}.to_json

response = http.request(request)
data = JSON.parse(response.body)
puts data
<?php
$url = 'https://api.eddie.surf/crawl';
$data = [
    'urls' => ['https://data-surfer.com'],
    'context' => ['purpose' => 'Company research'],
    'json' => [
        'company_name' => [
            'type' => 'string',
            'description' => 'Company name'
        ]
    ]
];

$options = [
    'http' => [
        'header' => "X-API-Key: your-api-key-here\r\n" .
                   "Content-Type: application/json\r\n",
        'method' => 'POST',
        'content' => json_encode($data)
    ]
];

$context = stream_context_create($options);
$response = file_get_contents($url, false, $context);
$result = json_decode($response, true);
print_r($result);
?>
package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "net/http"
)

func main() {
    url := "https://api.eddie.surf/crawl"
    
    payload := map[string]interface{}{
        "urls":    []string{"https://data-surfer.com"},
        "context": map[string]string{"purpose": "Company research"},
        "json": map[string]interface{}{
            "company_name": map[string]string{
                "type":        "string",
                "description": "Company name",
            },
        },
    }
    
    jsonData, _ := json.Marshal(payload)
    
    req, _ := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
    req.Header.Set("X-API-Key", "your-api-key-here")
    req.Header.Set("Content-Type", "application/json")
    
    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()
    
    var result map[string]interface{}
    json.NewDecoder(resp.Body).Decode(&result)
    fmt.Println(result)
}

Authentication

All API endpoints require authentication using API keys. Include your API key in the X-API-Key header with every request.

API Key Usage

HTTP Header
X-API-Key: your-api-key-here

Rate Limits

  • Rate Limit: 60 requests per minute (default)
Getting API Keys: API keys are currently managed manually. Contact support for access.

API Workflow

Eddie.surf uses a simple two-step async process for web crawling jobs.

Choose Your Endpoint

  • /crawl: For 1-199 URLs with standard processing
  • /crawl-batch: For 200+ URLs with optimized batch processing

Two Simple Steps

  1. Submit: POST to /crawl or /crawl-batch → Get job ID instantly
  2. Poll or Wait: Either poll /crawl/{job_id} or wait for callback notification
/crawl/{job_id} returns:
  • Processing status and progress while running
  • Complete results when finished

Monitoring Options

  • Callback (Recommended): Provide a callback_url and receive notifications when complete (per site or as grouped array)
  • Polling: Repeatedly check /crawl/{job_id} for status updates
  • Both: Use callbacks for automatic notifications AND polling for real-time progress monitoring

Automatic Processing

  • HTML extraction with advanced scraping
  • AI link discovery and prioritization
  • Content analysis with Claude Sonnet 4
  • Structured data extraction
  • Notifications on completion

POST /crawl

Submit a crawling job for 1-199 URLs. Returns immediately with job ID.

💰 Credit Cost: 1.5 credits per page processed (e.g., 100 pages = 150 credits)

Request Body

Parameter Type Required Description
urls string[] Required Array of URLs to crawl (1-199)
context object Required Background information to guide extraction more

Purpose: Provides background context about your perspective and why you're extracting this data.

Examples:

{ "user_role": "Head of Sales", "purpose": "Researching prospects for outbound campaigns", "target_market": "B2B SaaS companies with 10-500 employees", "focus": "Finding contact info and company size" }
{ "purpose": "Academic research on pricing models", "research_focus": "SaaS subscription tiers and pricing", "institution": "Stanford Business School", "use_case": "MBA thesis on freemium strategies" }

Impact: Better context = AI prioritizes the right pages and extracts more relevant data for your specific use case.

json object Required Schema defining what data to extract more

Purpose: Defines the structure and fields you want extracted from crawled pages.

Basic Example:

{ "company_name": { "type": "string", "description": "Full legal company name" }, "employee_count": { "type": "number", "description": "Number of employees as integer" }, "has_pricing_page": { "type": "boolean", "description": "Whether the company has a dedicated pricing page" } }

Advanced Example:

{ "pricing_tiers": { "type": "array", "description": "List of subscription plans and prices", "priority": "high", "additional_guidelines": [ "Include both monthly and annual pricing if available", "Extract feature differences between tiers" ] }, "website_quality_score": { "type": "score", "description": "Overall quality and professionalism of the website", "additional_guidelines": [ "Score 1-10 based on: has pricing page, contact info available, recent blog posts, professional design", "Higher scores for clear pricing, multiple contact methods, and recent content", "Lower scores for broken links, missing contact info, or outdated content" ] }, "uses_modern_stack": { "type": "boolean", "description": "Whether company uses modern development technologies" }, "founded_year": { "type": "number", "description": "Year the company was founded as integer" } }

Data Types: string, number, boolean, array, score

Field Properties: Each field can have type, description, priority, and additional_guidelines.

max_depth integer Optional Link levels to follow (1-10), default: 3
max_pages integer Optional Maximum pages to crawl (1-1000), default: 15
callback_url string Optional Callback URL for notifications
callback_mode string Optional "once" (default) = one callback when all complete, "multi" = callback per site
timeout_per_page integer Optional Timeout seconds per page (1-180), default: 30
rules string[] Optional Custom processing instructions more

Purpose: Provide specific instructions to guide how the AI processes and extracts data.

Examples:

{ "rules": [ "Focus on recent content from the last 2 years", "Skip footer and sidebar content", "Prioritize content from /about, /team, and /contact pages", "For pricing, extract both listed prices and any promotional discounts" ] }

Output Formatting Rules:

{ "rules": [ "Translate all extracted text to English in the output", "Convert all prices to USD format", "Format dates as YYYY-MM-DD", "Standardize company names to include legal entity (Inc, LLC, etc.)" ] }

Content Filtering Rules:

{ "rules": [ "Ignore job board and career page links", "Skip cookie policies and legal disclaimers", "Extract technology mentions from blog posts and case studies", "Look for partnership and integration information" ] }

Impact: Rules help the AI avoid irrelevant content and focus on what matters most for your specific extraction goals.

mock boolean Optional Test mode - generates fake data without using credits
include_technical boolean Optional Collect technical data (DNS, headers) for the main URL - costs 1 credit more

Purpose: Collects comprehensive technical information about the website's infrastructure and domain registration.

Data Collected:

  • DNS Records: A, AAAA, MX, TXT, NS records for the domain
  • HTTP Headers: Response headers from the homepage request via ScrapingFish

Cost: 1 credit per site (charged before crawling the first page)

Use Cases: Technical due diligence, security analysis, infrastructure research

JSON Schema Fields

Each field in the json object supports these properties:

Property Type Required Description
type string Optional "string", "number", "boolean", "array", "score"
description string Optional What this field represents more

Purpose: Provides context to the AI about exactly what information to extract.

Best Practices:

  • Be specific: "Primary business email address" vs "email"
  • Include format preferences: "Founded year as 4-digit number"
  • Specify source preference: "Company description from About page"
  • Define scope: "Number of full-time employees, excluding contractors"

Examples:

"description": "Primary contact email address for business inquiries" "description": "Annual recurring revenue in USD millions" "description": "List of software development technologies used"
priority string Optional "high", "medium", "low"
additional_guidelines string[] Optional Specific extraction instructions more

Purpose: Provide field-specific instructions to refine extraction quality and consistency.

Common Use Cases:

  • Format standardization: "Extract as 4-digit year only"
  • Source preferences: "Prefer About page over footer contact info"
  • Filtering rules: "Exclude job postings from employee count"
  • Scoring criteria: "Score based on design quality, content freshness, and ease of navigation"

Examples by Field Type:

"email": { "additional_guidelines": [ "Prefer general business emails over personal ones", "Look for contact@, info@, or hello@ addresses first" ] } "technologies": { "additional_guidelines": [ "Include both programming languages and frameworks", "Extract from job postings and technical blog posts", "Focus on current tech stack, not legacy mentions" ] } "company_quality_score": { "additional_guidelines": [ "Factor in: website design, contact information availability, recent content updates", "Higher scores for clear pricing and professional presentation", "Lower scores for broken links or outdated copyright years" ] }

Impact: Helps achieve consistent, high-quality extractions that match your specific requirements.

Initial Response

{
  "status": "success",
  "job_id": 123,
  "total_sites": 2,
  "sites": [
    {
      "site_id": 456,
      "url": "https://data-surfer.com"
    },
    {
      "site_id": 457,
      "url": "https://eddie.surf"
    }
  ],
  "message": "Created crawl job 123 with 2 sites",
  "credits_remaining": 855,
  "credits_used": 0
}
🎮 Try in Playground

POST /crawl-batch

Process large numbers of domains efficiently with batch processing and optimized AI inference.

💰 Credit Cost: 1.0 credits per page processed (e.g., 100 pages = 100 credits)
Batch vs Individual Processing:
  • /crawl: Multiple URLs = 1 job with multiple sites = 1 callback with array of site results
  • /crawl-batch: 200+ URLs = 1 job with multiple sites = 1 callback with array of site results

Key Differences from Individual /crawl

  • Minimum 200 URLs required - Designed for large-scale data collection
  • Single job ID - All URLs processed as one job with individual sites
  • Optimized AI processing - Uses batch inference for cost efficiency
  • Site-organized results - Individual synthesis per site in response array
  • Single callback - One notification when all domains complete

Request Parameters

Same parameters as /crawl with these differences:

Parameter Type Required Batch Requirement Description
urls string[] Required Minimum 200 unique URLs Array of domains to process
max_pages integer Optional Default: 1000 (higher than individual) Maximum pages per domain

Use Cases

  • Lead Generation: Extract contact info from 500+ company websites
  • Market Research: Analyze industry data across hundreds of competitors
  • Data Migration: Extract structured data from legacy websites in bulk
  • Compliance Audits: Check privacy policies across large website portfolios
Important: Each domain gets individual synthesis - data is never mixed between domains. The "whereFound" citations only reference pages from that specific domain.
🎮 Try in Playground

GET /crawl/{job_id}

Check job status and retrieve results from a single endpoint that adapts based on processing state.

# Check status
curl -H "X-API-Key: your-api-key-here" https://api.eddie.surf/crawl/123
This endpoint automatically returns:
  • While processing: Status and progress info
  • When complete: Full crawl results and data

Status Logic

  • "completed": Any data from your JSON schema was extracted successfully. If some pages failed during crawling, this is noted in the message field
  • "failed": No data from your schema was extracted, with error details explaining why
  • "processing": Still crawling or processing pages

Note: The API never returns "partial" status. Jobs with successful data extraction are always marked as "completed" with additional context about any failed pages.

Response Examples

{
  "status": "processing",
  "job_id": 123,
  "total_sites": 3,
  "completed_sites": 1,
  "processing_sites": 1,
  "failed_sites": 0,
  "credits_remaining": 847,
  "credits_used": 7.5
}
{
  "status": "completed",
  "job_id": 123,
  "total_sites": 3,
  "completed_sites": 2,
  "failed_sites": 1,
  "processing_sites": 0,
  "credits_remaining": 835,
  "credits_used": 19.5,
  "sites": [
    {
      "site_id": 456,
      "url": "https://data-surfer.com",
      "status": "completed",
      "completed_pages": 5,
      "failed_pages": 0,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Data Surfer Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:35:00Z"
    },
    {
      "site_id": 457,
      "url": "https://eddie.surf",
      "status": "completed",
      "completed_pages": 4,
      "failed_pages": 0,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Eddie.surf Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://eddie.surf/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:36:00Z"
    },
    {
      "site_id": 458,
      "url": "https://broken-site.com",
      "status": "failed",
      "completed_pages": 0,
      "failed_pages": 3,
      "pending_pages": 0,
      "message": "All pages failed during scraping",
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:31:00Z"
    }
  ],
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:36:00Z"
}
{
  "status": "failed",
  "job_id": 124,
  "total_sites": 1,
  "completed_sites": 0,
  "failed_sites": 1,
  "processing_sites": 0,
  "credits_remaining": 851,
  "credits_used": 4.5,
  "sites": [
    {
      "site_id": 459,
      "url": "https://broken-site.com",
      "status": "failed",
      "completed_pages": 0,
      "failed_pages": 3,
      "pending_pages": 0,
      "created_at": "2025-01-20T11:00:00Z",
      "updated_at": "2025-01-20T11:01:00Z"
    }
  ],
  "created_at": "2025-01-20T11:00:00Z",
  "updated_at": "2025-01-20T11:01:00Z"
}
🎮 Try in Playground

GET /crawl/{job_id}/{site_id}

Get individual site status and results within a job.

# Check individual site status
curl -H "X-API-Key: your-api-key-here" https://api.eddie.surf/crawl/123/456

Returns detailed status and results for a single site within a job. Useful for monitoring progress on specific URLs within a larger job. show example

{
  "status": "processing",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 5,
  "failed_pages": 1,
  "pending_pages": 2,
  "progress": 75,
  "message": "5 pages completed, 2 pending, 1 failed"
}
{
  "status": "completed",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 7,
  "failed_pages": 0,
  "pending_pages": 0,
  "progress": 100,
  "results": {
    "company_name": {
      "value": "Data Surfer Inc.",
      "confidence": 5,
      "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
    }
  },
  "message": "Data extracted successfully",
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:35:00Z",
  "credits_remaining": 844,
  "credits_used": 10.5
}
{
  "status": "failed",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 0,
  "failed_pages": 3,
  "pending_pages": 0,
  "progress": 0,
  "message": "All pages failed during scraping. Sample errors: HTTP 404 Not Found; Connection timeout",
  "credits_remaining": 851,
  "credits_used": 4.5
}
🎮 Try in Playground

POST /smart-search

Find websites using AI-powered search with intelligent filtering and quality control.

💰 Credit Cost: 1 credit for first 10 results + 1 credit per additional 10 results (e.g., 30 results = 3 credits).
Credits calculated on final quality-controlled results.
curl -X POST https://api.eddie.surf/smart-search \
  -H "X-API-Key: your-api-key-here" \
  -H "Content-Type: application/json" \
  -d '{
    "query": "game development studios in San Francisco",
    "max_results": 30,
    "website_only": true,
    "skip_duplicate_domains": true,
    "context": {
      "intent": "find_businesses",
      "location": "San Francisco",
      "business_type": "game development studios"
    },
    "rules": [
      "Include gaming subsidiaries of larger companies",
      "Improve subpage URLs to root domains"
    ],
    "additional_guidelines": [
      "Focus on companies with actual SF presence",
      "Exclude educational institutions"
    ]
  }'

Request Parameters

Parameter Type Required Description
query string Required The search query. Can be simple ("project management software") or complex ("best CRM for startups under $50/month")
max_results integer Optional Maximum number of results to return (1-5000). Default: 10
website_only boolean Optional Only return results with valid website URLs. Default: false
skip_duplicate_domains boolean Optional Skip results from domains already seen. Highly recommended for product/service searches. Default: false
callback_url string Optional URL to receive a POST notification when the job completes. Callback includes all search results and job metadata.
context object Optional JSON object providing search context (e.g., {"intent": "find_businesses", "location": "San Francisco"})
rules string[] Optional Array of search rules (e.g., ["Include subsidiaries", "Improve URLs to root domains"])
additional_guidelines string[] Optional Array of additional guidelines for AI processing (e.g., ["Focus on companies with SF offices", "Exclude educational institutions"])

Key Features

  • Uses multiple search services
  • Smart filtering based on search intent
  • Quality control removes irrelevant results
  • Continues searching until target result count is met

Search Examples

Product/Service Search (with deduplication):
{
  "query": "project management software",
  "max_results": 25,
  "website_only": true,
  "skip_duplicate_domains": true
}

Returns actual PM tools like Asana, Monday.com, ClickUp - not articles about them

Local Business Search:
{
  "query": "italian restaurant chicago",
  "max_results": 20
}

Returns actual restaurants with addresses and phone numbers

Academic Research:
{
  "query": "climate change research papers 2025",
  "max_results": 15
}

Returns actual research papers with citations and publication info

With Callback Notification:
{
  "query": "email marketing platforms",
  "max_results": 25,
  "website_only": true,
  "callback_url": "https://yoursite.com/webhooks/search-complete"
}

Your callback URL will receive a POST request when the search completes with all results

Response Format

{
  "status": "success",
  "job_id": 123,
  "max_results": 30,
  "query": "crm software",
  "message": "Created smart search job 123",
  "credits_remaining": "1250.0",
  "credits_used": 1
}
🎮 Try in Playground

GET /smart-search/{job_id}

Check search status and retrieve results.

curl -H "X-API-Key: your-api-key-here" \
  https://api.eddie.surf/smart-search/123

Response While Processing

{
  "status": "processing",
  "job_id": 123,
  "total_results": 15,
  "created_at": "2025-01-20T10:00:00.000Z",
  "updated_at": "2025-01-20T10:00:05.000Z"
}

Response When Complete

{
  "status": "complete",
  "job_id": 123,
  "total_results": 30,
  "results": [
    {
      "name": "Data Surfer",
      "link": "https://data-surfer.com",
      "snippet": "World's #1 Lead Generation system...",
      "address": null,
      "phone": null,
      "meta_data": {
        "rating": 4.5,
        "review_count": 1250
      }
    }
    // ... more results
  ],
  "created_at": "2025-01-20T10:00:00.000Z",
  "updated_at": "2025-01-20T10:00:25.000Z",
  "completed_at": "2025-01-20T10:00:25.000Z"
}
Tips for Best Results:
  • Always use skip_duplicate_domains: true for product/service searches to avoid multiple locations of the same company (e.g. coffee shops to avoid multiple starbucks)
  • Use website_only: true to ensure all results have clickable links
  • Be specific in your query - "accounting software for small business" yields better results than just "accounting"
  • The AI automatically optimizes your query for the best results
🎮 Try in Playground

Callbacks

Receive automatic notifications when your crawl jobs complete via callbacks.

Setup

Include a callback_url parameter in your POST /crawl request to receive notifications.

Callback Modes

Control how callbacks are delivered using the callback_mode parameter:

  • "once" (default): Single callback with all sites as an array when job completes
  • "multi": Individual callback per site as each completes
Output Differences:
  • Once Mode: 3 URLs = 1 callback with job-level data and sites array
  • Multi Mode: 3 URLs = 3 separate callbacks, each with individual site data
  • Batch Mode: Always uses "once" mode only

Callback Examples

// callback_mode: "once" → Single callback when all sites complete
// Matches GET /crawl/{job_id} response format
{
  "status": "completed",
  "job_id": 123,
  "total_sites": 3,
  "completed_sites": 2,
  "failed_sites": 1,
  "processing_sites": 0,
  "sites": [
    {
      "site_id": 456,
      "url": "https://data-surfer.com",
      "status": "completed",
      "completed_pages": 10,
      "failed_pages": 5,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Data Surfer Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:35:00Z"
    },
    {
      "site_id": 457,
      "url": "https://eddie.surf",
      "status": "completed",
          "completed_pages": 8,
      "failed_pages": 0,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Eddie.surf Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://eddie.surf/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:36:00Z"
    },
    {
      "site_id": 458,
      "url": "https://broken-site.com",
      "status": "failed",
      "completed_pages": 0,
      "failed_pages": 1,
      "pending_pages": 0,
      "results": null,
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:32:00Z"
    }
  ],
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:36:00Z",
  "credits_remaining": 826,
  "credits_used": 28.5
}
// callback_mode: "multi" → 3 separate callbacks as each site completes
// Matches GET /crawl/{job_id}/{site_id} response format
{
  "status": "completed",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 10,
  "failed_pages": 5,
  "pending_pages": 0,
  "progress": 67,
  "results": {
    "company_name": {
      "value": "Data Surfer Inc.",
      "confidence": 5,
      "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
    }
  },
  "message": "Data extracted successfully with 5 pages failed to scrape",
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:35:00Z",
  "credits_remaining": 837,
  "credits_used": 18.0
}

Delivery Details

  • Method: HTTP POST
  • Content-Type: application/json
  • Timeout: 30 seconds
  • Retries: Single attempt (no automatic retries currently)
  • User-Agent: Eddie.surf/1.0

Testing Callbacks

Use webhook.site to generate a test URL and see your callback payloads in real-time.

Important: Your callback endpoint must respond with HTTP 2xx status code within 30 seconds to be considered successful.

Polling Guide

Efficient strategies for monitoring crawl progress in real-time. Use alone or alongside callbacks for comprehensive monitoring.

Recommended Intervals

Time Period Interval
First 2 minutes Every 10 seconds
Next 5 minutes Every 30 seconds
After 7 minutes Every 60 seconds

Polling Examples

async function pollQueue(job_id) {
  let complete = false;
  const startTime = Date.now();
  
  while (!complete) {
    const response = await fetch(`https://api.eddie.surf/crawl/${job_id}`, {
      headers: { 'X-API-Key': 'your-api-key-here' }
    });
    const data = await response.json();
    
    if (data.status === 'completed') {
      console.log(`✅ Complete! Processing ${data.total_sites} sites`);
      return data.sites || data.results;
    }
    
    if (data.status === 'failed') {
      console.log(`❌ Failed: ${data.message}`);
      return null;
    }
    
    console.log(`🔄 Progress: ${data.progress || 0}%`);
    
    // Dynamic polling interval
    const elapsed = Date.now() - startTime;
    const waitTime = elapsed < 120000 ? 10000 : // 10s first 2min
                     elapsed < 420000 ? 30000 : // 30s next 5min
                     60000; // 60s after that
    
    await new Promise(r => setTimeout(r, waitTime));
  }
}
import time
import requests

def poll_queue(job_id):
    complete = False
    start_time = time.time()
    
    while not complete:
        response = requests.get(f'https://api.eddie.surf/crawl/{job_id}', 
                               headers={'X-API-Key': 'your-api-key-here'})
        data = response.json()
        
        if data['status'] == 'completed':
            print(f"✅ Complete! Processing {data['total_sites']} sites")
            return data.get('sites', data.get('results'))
        
        if data['status'] == 'failed':
            print(f"❌ Failed: {data['message']}")
            return None
        
        print(f"🔄 Progress: {data.get('progress', 0)}%")
        
        # Dynamic polling interval
        elapsed = time.time() - start_time
        if elapsed < 120:  # First 2 minutes
            wait_time = 10
        elif elapsed < 420:  # Next 5 minutes
            wait_time = 30
        else:  # After that
            wait_time = 60
        
        time.sleep(wait_time)
require 'net/http'
require 'json'
require 'uri'

def poll_crawl_job(job_id)
  start_time = Time.now
  
  loop do
    begin
      uri = URI("https://api.eddie.surf/crawl/#{job_id}")
      http = Net::HTTP.new(uri.host, uri.port)
      http.use_ssl = true
      
      request = Net::HTTP::Get.new(uri)
      request['X-API-Key'] = 'your-api-key-here'
      request['Content-Type'] = 'application/json'
      
      response = http.request(request)
      raise "HTTP #{response.code}: #{response.message}" unless response.is_a?(Net::HTTPSuccess)
      
      data = JSON.parse(response.body)
      
      case data['status']
      when 'completed'
        completed = data['completed_sites'] || 0
        total = data['total_sites'] || 0
        puts "✅ Job completed! #{completed}/#{total} sites successful"
        return data['sites'] || []
      when 'failed'
        puts "❌ Job failed: #{data['message'] || 'Unknown error'}"
        return nil
      else
        # Processing status
        progress = data['progress'] || 0
        completed = data['completed_sites'] || 0
        total = data['total_sites'] || 0
        puts "🔄 Progress: #{progress}% (#{completed}/#{total} sites)"
      end
      
      # Dynamic polling interval
      elapsed = Time.now - start_time
      wait_time = if elapsed < 120        # First 2 minutes
                    10
                  elsif elapsed < 420     # Next 5 minutes
                    30
                  else                    # After that
                    60
                  end
      
      sleep(wait_time)
      
    rescue => e
      puts "Polling error: #{e}"
      sleep(10)  # Wait 10s on error
    end
  end
end

# Usage
sites = poll_crawl_job(123)
puts "Results: #{sites}" if sites
<?php
function pollCrawlJob($jobId) {
    $startTime = time();
    
    while (true) {
        try {
            $url = "https://api.eddie.surf/crawl/$jobId";
            $context = stream_context_create([
                'http' => [
                    'header' => "X-API-Key: your-api-key-here\r\n",
                    'method' => 'GET'
                ]
            ]);
            
            $response = file_get_contents($url, false, $context);
            if ($response === false) {
                throw new Exception('HTTP request failed');
            }
            
            $data = json_decode($response, true);
            if ($data === null) {
                throw new Exception('Invalid JSON response');
            }
            
            switch ($data['status']) {
                case 'completed':
                    $completed = $data['completed_sites'] ?? 0;
                    $total = $data['total_sites'] ?? 0;
                    echo "✅ Job completed! $completed/$total sites successful\n";
                    return $data['sites'] ?? [];
                    
                case 'failed':
                    $message = $data['message'] ?? 'Unknown error';
                    echo "❌ Job failed: $message\n";
                    return null;
                    
                default:
                    // Processing status
                    $progress = $data['progress'] ?? 0;
                    $completed = $data['completed_sites'] ?? 0;
                    $total = $data['total_sites'] ?? 0;
                    echo "🔄 Progress: {$progress}% ($completed/$total sites)\n";
            }
            
            // Dynamic polling interval
            $elapsed = time() - $startTime;
            if ($elapsed < 120) {        // First 2 minutes
                $waitTime = 10;
            } elseif ($elapsed < 420) {  // Next 5 minutes
                $waitTime = 30;
            } else {                     // After that
                $waitTime = 60;
            }
            
            sleep($waitTime);
            
        } catch (Exception $e) {
            echo "Polling error: " . $e->getMessage() . "\n";
            sleep(10);  // Wait 10s on error
        }
    }
}

// Usage
$sites = pollCrawlJob(123);
if ($sites) {
    print_r($sites);
}
?>
package main

import (
    "encoding/json"
    "fmt"
    "net/http"
    "time"
)

type JobStatus struct {
    Status         string `json:"status"`
    JobID          int    `json:"job_id"`
    TotalSites     int    `json:"total_sites"`
    CompletedSites int    `json:"completed_sites"`
    Progress       int    `json:"progress"`
    Message        string `json:"message"`
    Sites          []interface{} `json:"sites"`
}

func pollCrawlJob(jobID int) ([]interface{}, error) {
    startTime := time.Now()
    
    for {
        url := fmt.Sprintf("https://api.eddie.surf/crawl/%d", jobID)
        
        client := &http.Client{Timeout: 30 * time.Second}
        req, err := http.NewRequest("GET", url, nil)
        req.Header.Set("X-API-Key", "your-api-key-here")
        if err != nil {
            return nil, err
        }
        req.Header.Set("Content-Type", "application/json")
        
        resp, err := client.Do(req)
        if err != nil {
            fmt.Printf("Polling error: %v\n", err)
            time.Sleep(10 * time.Second)
            continue
        }
        defer resp.Body.Close()
        
        var data JobStatus
        if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
            fmt.Printf("JSON decode error: %v\n", err)
            time.Sleep(10 * time.Second)
            continue
        }
        
        switch data.Status {
        case "completed":
            fmt.Printf("✅ Job completed! %d/%d sites successful\n", 
                data.CompletedSites, data.TotalSites)
            return data.Sites, nil
            
        case "failed":
            fmt.Printf("❌ Job failed: %s\n", data.Message)
            return nil, fmt.Errorf("job failed: %s", data.Message)
            
        default:
            // Processing status
            fmt.Printf("🔄 Progress: %d%% (%d/%d sites)\n", 
                data.Progress, data.CompletedSites, data.TotalSites)
        }
        
        // Dynamic polling interval
        elapsed := time.Since(startTime)
        var waitTime time.Duration
        if elapsed < 2*time.Minute {        // First 2 minutes
            waitTime = 10 * time.Second
        } else if elapsed < 7*time.Minute { // Next 5 minutes
            waitTime = 30 * time.Second
        } else {                            // After that
            waitTime = 60 * time.Second
        }
        
        time.Sleep(waitTime)
    }
}

func main() {
    sites, err := pollCrawlJob(123)
    if err != nil {
        fmt.Printf("Error: %v\n", err)
        return
    }
    
    fmt.Printf("Results: %+v\n", sites)
}
#!/bin/bash

poll_queue() {
    local job_id=$1
    local start_time=$(date +%s)
    
    while true; do
        response=$(curl -s -H "X-API-Key: your-api-key-here" "https://api.eddie.surf/crawl/${job_id}")
        status=$(echo $response | jq -r '.status')
        
        if [ "$status" = "completed" ]; then
            total=$(echo $response | jq -r '.total_sites')
            echo "✅ Complete! Processing ${total} sites"
            echo $response | jq '.sites // .results'
            break
        fi
        
        if [ "$status" = "failed" ]; then
            message=$(echo $response | jq -r '.message')
            echo "❌ Failed: ${message}"
            break
        fi
        
        progress=$(echo $response | jq -r '.progress // 0')
        echo "🔄 Progress: ${progress}%"
        
        # Dynamic polling interval
        current_time=$(date +%s)
        elapsed=$((current_time - start_time))
        
        if [ $elapsed -lt 120 ]; then
            sleep 10
        elif [ $elapsed -lt 420 ]; then
            sleep 30
        else
            sleep 60
        fi
    done
}

poll_queue "YOUR_JOB_ID"
Rate Limiting: Don't poll faster than every 5 seconds to avoid rate limits.

Error Codes

Code Description
400 Bad Request - Invalid parameters
429 Rate Limited - Too many requests
500 Internal Error - Contact support