Eddie.surf API

AI-powered web crawling API that extracts structured data from websites

Base URL: https://api.eddie.surf

Quick Start

Get started in 2 minutes with basic company data extraction:

# Submit crawl job
curl -X POST https://api.eddie.surf/crawl \
  -H "X-API-Key: your-api-key-here" \
  -H "Content-Type: application/json" \
  -d '{
  "urls": ["https://data-surfer.com"],
  "context": {"purpose": "Company research"},
  "json": {
    "company_name": {
      "type": "string",
      "description": "Company name"
    }
  }
}'

const response = await fetch('https://api.eddie.surf/crawl', {
  method: 'POST',
  headers: { 
    'X-API-Key': 'your-api-key-here',
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    urls: ['https://data-surfer.com'],
    context: {purpose: 'Company research'},
    json: {
      company_name: {
        type: 'string',
        description: 'Company name'
      }
    }
  })
});

const data = await response.json();
console.log(data);

import requests

response = requests.post('https://api.eddie.surf/crawl', 
    headers={'X-API-Key': 'your-api-key-here'},
    json={
    'urls': ['https://data-surfer.com'],
    'context': {'purpose': 'Company research'},
    'json': {
        'company_name': {
            'type': 'string',
            'description': 'Company name'
        }
    }
})

data = response.json()
print(data)

require 'net/http'
require 'json'
require 'uri'

uri = URI('https://api.eddie.surf/crawl')
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Post.new(uri)
request['X-API-Key'] = 'your-api-key-here'
request['Content-Type'] = 'application/json'
request.body = {
  urls: ['https://data-surfer.com'],
  context: { purpose: 'Company research' },
  json: {
    company_name: {
      type: 'string',
      description: 'Company name'
    }
  }
}.to_json

response = http.request(request)
data = JSON.parse(response.body)
puts data

<?php
$url = 'https://api.eddie.surf/crawl';
$data = [
    'urls' => ['https://data-surfer.com'],
    'context' => ['purpose' => 'Company research'],
    'json' => [
        'company_name' => [
            'type' => 'string',
            'description' => 'Company name'
        ]
    ]
];

$options = [
    'http' => [
        'header' => "X-API-Key: your-api-key-here\r\n" .
                   "Content-Type: application/json\r\n",
        'method' => 'POST',
        'content' => json_encode($data)
    ]
];

$context = stream_context_create($options);
$response = file_get_contents($url, false, $context);
$result = json_decode($response, true);
print_r($result);
?>

package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "net/http"
)

func main() {
    url := "https://api.eddie.surf/crawl"
    
    payload := map[string]interface{}{
        "urls":    []string{"https://data-surfer.com"},
        "context": map[string]string{"purpose": "Company research"},
        "json": map[string]interface{}{
            "company_name": map[string]string{
                "type":        "string",
                "description": "Company name",
            },
        },
    }
    
    jsonData, _ := json.Marshal(payload)
    
    req, _ := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
    req.Header.Set("X-API-Key", "your-api-key-here")
    req.Header.Set("Content-Type", "application/json")
    
    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()
    
    var result map[string]interface{}
    json.NewDecoder(resp.Body).Decode(&result)
    fmt.Println(result)
}

Authentication

All API endpoints require authentication using API keys. Include your API key in the X-API-Key header with every request.

API Key Usage

HTTP Header

X-API-Key: your-api-key-here

Rate Limits

Rate Limit: 60 requests per minute (default)

Getting API Keys: API keys are currently managed manually. Contact support for access.

API Workflow

Eddie.surf uses a simple two-step async process for web crawling jobs.

Choose Your Endpoint

/crawl: For 1-199 URLs with standard processing
/crawl-batch: For 200+ URLs with optimized batch processing

Two Simple Steps

Submit: POST to /crawl or /crawl-batch → Get job ID instantly
Poll or Wait: Either poll /crawl/{job_id} or wait for callback notification

/crawl/{job_id} returns:

Processing status and progress while running
Complete results when finished

Monitoring Options

Callback (Recommended): Provide a callback_url and receive notifications when complete (per site or as grouped array)
Polling: Repeatedly check /crawl/{job_id} for status updates
Both: Use callbacks for automatic notifications AND polling for real-time progress monitoring

Automatic Processing

HTML extraction with advanced scraping
AI link discovery and prioritization
Content analysis with Claude Sonnet 4
Structured data extraction
Notifications on completion

POST /crawl

Submit a crawling job for 1-199 URLs. Returns immediately with job ID.

💰 Credit Cost: 1.5 credits per page processed (e.g., 100 pages = 150 credits)

Request Body

Parameter	Type	Required	Description
`urls`	string[]	Required	Array of URLs to crawl (1-199)
`context`	object	Required	Background information to guide extraction more Purpose: Provides background context about your perspective and why you're extracting this data. Examples: { "user_role": "Head of Sales", "purpose": "Researching prospects for outbound campaigns", "target_market": "B2B SaaS companies with 10-500 employees", "focus": "Finding contact info and company size" } { "purpose": "Academic research on pricing models", "research_focus": "SaaS subscription tiers and pricing", "institution": "Stanford Business School", "use_case": "MBA thesis on freemium strategies" } Impact: Better context = AI prioritizes the right pages and extracts more relevant data for your specific use case.
`json`	object	Required	Schema defining what data to extract more Purpose: Defines the structure and fields you want extracted from crawled pages. Basic Example: { "company_name": { "type": "string", "description": "Full legal company name" }, "employee_count": { "type": "number", "description": "Number of employees as integer" }, "has_pricing_page": { "type": "boolean", "description": "Whether the company has a dedicated pricing page" } } Advanced Example: { "pricing_tiers": { "type": "array", "description": "List of subscription plans and prices", "priority": "high", "additional_guidelines": [ "Include both monthly and annual pricing if available", "Extract feature differences between tiers" ] }, "website_quality_score": { "type": "score", "description": "Overall quality and professionalism of the website", "additional_guidelines": [ "Score 1-10 based on: has pricing page, contact info available, recent blog posts, professional design", "Higher scores for clear pricing, multiple contact methods, and recent content", "Lower scores for broken links, missing contact info, or outdated content" ] }, "uses_modern_stack": { "type": "boolean", "description": "Whether company uses modern development technologies" }, "founded_year": { "type": "number", "description": "Year the company was founded as integer" } } Data Types: `string`, `number`, `boolean`, `array`, `score` Field Properties: Each field can have `type`, `description`, `priority`, `additional_guidelines`, `page_guidelines`, and `synthesis_guidelines`. Field-Level Guidelines: `additional_guidelines`: General instructions for this field `page_guidelines`: Specific instructions for page-level extraction `synthesis_guidelines`: Specific instructions for final data synthesis
`max_depth`	integer	Optional	Link levels to follow (1-10), default: 3
`max_pages`	integer	Optional	Maximum pages to crawl (1-1000), default: 15
`callback_url`	string	Optional	Callback URL for notifications
`callback_mode`	string	Optional	"once" (default) = one callback when all complete, "multi" = callback per site
`timeout_per_page`	integer	Optional	Timeout seconds per page (1-180), default: 30
`rules`	string[]	Optional	Custom processing instructions more Purpose: Provide specific instructions to guide how the AI processes and extracts data. Examples: { "rules": [ "Focus on recent content from the last 2 years", "Skip footer and sidebar content", "Prioritize content from /about, /team, and /contact pages", "For pricing, extract both listed prices and any promotional discounts" ] } Output Formatting Rules: { "rules": [ "Translate all extracted text to English in the output", "Convert all prices to USD format", "Format dates as YYYY-MM-DD", "Standardize company names to include legal entity (Inc, LLC, etc.)" ] } Content Filtering Rules: { "rules": [ "Ignore job board and career page links", "Skip cookie policies and legal disclaimers", "Extract technology mentions from blog posts and case studies", "Look for partnership and integration information" ] } Impact: Rules help the AI avoid irrelevant content and focus on what matters most for your specific extraction goals.
`mock`	boolean	Optional	Test mode - generates fake data without using credits
`include_technical`	boolean	Optional	Collect technical data (DNS, headers) for the main URL - costs 1 credit more Purpose: Collects comprehensive technical information about the website's infrastructure and domain registration. Data Collected: DNS Records: A, AAAA, MX, TXT, NS records for the domain HTTP Headers: Response headers from the homepage request via ScrapingFish Cost: 1 credit per site (charged before crawling the first page) Use Cases: Technical due diligence, security analysis, infrastructure research

JSON Schema Fields

Each field in the json object supports these properties:

Property	Type	Required	Description
`type`	string	Optional	"string", "number", "boolean", "array", "score"
`description`	string	Optional	What this field represents more Purpose: Provides context to the AI about exactly what information to extract. Best Practices: Be specific: "Primary business email address" vs "email" Include format preferences: "Founded year as 4-digit number" Specify source preference: "Company description from About page" Define scope: "Number of full-time employees, excluding contractors" Examples: "description": "Primary contact email address for business inquiries" "description": "Annual recurring revenue in USD millions" "description": "List of software development technologies used"
`priority`	string	Optional	"high", "medium", "low"
`additional_guidelines`	string[]	Optional	Specific extraction instructions more Purpose: Provide field-specific instructions to refine extraction quality and consistency. Common Use Cases: Format standardization: "Extract as 4-digit year only" Source preferences: "Prefer About page over footer contact info" Filtering rules: "Exclude job postings from employee count" Scoring criteria: "Score based on design quality, content freshness, and ease of navigation" Examples by Field Type: "email": { "additional_guidelines": [ "Prefer general business emails over personal ones", "Look for contact@, info@, or hello@ addresses first" ] } "technologies": { "additional_guidelines": [ "Include both programming languages and frameworks", "Extract from job postings and technical blog posts", "Focus on current tech stack, not legacy mentions" ] } "company_quality_score": { "additional_guidelines": [ "Factor in: website design, contact information availability, recent content updates", "Higher scores for clear pricing and professional presentation", "Lower scores for broken links or outdated copyright years" ] } Impact: Helps achieve consistent, high-quality extractions that match your specific requirements.
`page_guidelines`	string[]	Optional	Guidelines specific to page processing more Purpose: Provide instructions specifically for extracting data from individual pages during the crawling phase. Use Cases: Page-specific extraction: "Look for company name in header or title tags" Processing rules: "Extract raw employee numbers, don't format during page processing" Content prioritization: "Prioritize main content over sidebar information" Note: These guidelines are only shown to the AI during individual page processing, not during final synthesis.
`synthesis_guidelines`	string[]	Optional	Guidelines specific to final synthesis more Purpose: Provide instructions specifically for combining and refining data from all pages during final synthesis. Use Cases: Data consolidation: "Use the most frequently found company name across all pages" Source prioritization: "Prioritize homepage data over other pages" Conflict resolution: "If employee counts differ, use the highest confidence value" Final formatting: "Format employee count as 'X employees' in final output" Note: These guidelines are only shown to the AI during final synthesis, not during individual page processing.

Initial Response

{
  "status": "success",
  "job_id": 123,
  "total_sites": 2,
  "sites": [
    {
      "site_id": 456,
      "url": "https://data-surfer.com"
    },
    {
      "site_id": 457,
      "url": "https://eddie.surf"
    }
  ],
  "message": "Created crawl job 123 with 2 sites",
  "credits_remaining": 855,
  "credits_used": 0
}

🎮 Try in Playground

POST /crawl-batch

Process large numbers of domains efficiently with batch processing and optimized AI inference.

💰 Credit Cost: 1.0 credits per page processed (e.g., 100 pages = 100 credits)

Batch vs Individual Processing:

/crawl: Multiple URLs = 1 job with multiple sites = 1 callback with array of site results
/crawl-batch: 200+ URLs = 1 job with multiple sites = 1 callback with array of site results

Key Differences from Individual /crawl

Minimum 200 URLs required - Designed for large-scale data collection
Single job ID - All URLs processed as one job with individual sites
Optimized AI processing - Uses batch inference for cost efficiency
Site-organized results - Individual synthesis per site in response array
Single callback - One notification when all domains complete

Request Parameters

Same parameters as /crawl with these differences:

Parameter	Type	Required	Batch Requirement	Description
`urls`	string[]	Required	Minimum 200 unique URLs	Array of domains to process
`max_pages`	integer	Optional	Default: 1000 (higher than individual)	Maximum pages per domain

Use Cases

Lead Generation: Extract contact info from 500+ company websites
Market Research: Analyze industry data across hundreds of competitors
Data Migration: Extract structured data from legacy websites in bulk
Compliance Audits: Check privacy policies across large website portfolios

Important: Each domain gets individual synthesis - data is never mixed between domains. The "whereFound" citations only reference pages from that specific domain.

🎮 Try in Playground

GET /crawl/{job_id}

Check job status and retrieve results from a single endpoint that adapts based on processing state.

# Check status
curl -H "X-API-Key: your-api-key-here" https://api.eddie.surf/crawl/123

This endpoint automatically returns:

While processing: Status and progress info
When complete: Full crawl results and data

Status Logic

"completed": Any data from your JSON schema was extracted successfully. If some pages failed during crawling, this is noted in the message field
"failed": No data from your schema was extracted, with error details explaining why
"processing": Still crawling or processing pages

Note: The API never returns "partial" status. Jobs with successful data extraction are always marked as "completed" with additional context about any failed pages.

Response Examples

{
  "status": "processing",
  "job_id": 123,
  "total_sites": 3,
  "completed_sites": 1,
  "processing_sites": 1,
  "failed_sites": 0,
  "credits_remaining": 847,
  "credits_used": 7.5
}

{
  "status": "completed",
  "job_id": 123,
  "total_sites": 3,
  "completed_sites": 2,
  "failed_sites": 1,
  "processing_sites": 0,
  "credits_remaining": 835,
  "credits_used": 19.5,
  "sites": [
    {
      "site_id": 456,
      "url": "https://data-surfer.com",
      "status": "completed",
      "completed_pages": 5,
      "failed_pages": 0,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Data Surfer Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:35:00Z"
    },
    {
      "site_id": 457,
      "url": "https://eddie.surf",
      "status": "completed",
      "completed_pages": 4,
      "failed_pages": 0,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Eddie.surf Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://eddie.surf/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:36:00Z"
    },
    {
      "site_id": 458,
      "url": "https://broken-site.com",
      "status": "failed",
      "completed_pages": 0,
      "failed_pages": 3,
      "pending_pages": 0,
      "message": "All pages failed during scraping",
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:31:00Z"
    }
  ],
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:36:00Z"
}

{
  "status": "failed",
  "job_id": 124,
  "total_sites": 1,
  "completed_sites": 0,
  "failed_sites": 1,
  "processing_sites": 0,
  "credits_remaining": 851,
  "credits_used": 4.5,
  "sites": [
    {
      "site_id": 459,
      "url": "https://broken-site.com",
      "status": "failed",
      "completed_pages": 0,
      "failed_pages": 3,
      "pending_pages": 0,
      "created_at": "2025-01-20T11:00:00Z",
      "updated_at": "2025-01-20T11:01:00Z"
    }
  ],
  "created_at": "2025-01-20T11:00:00Z",
  "updated_at": "2025-01-20T11:01:00Z"
}

🎮 Try in Playground

GET /crawl/{job_id}/{site_id}

Get individual site status and results within a job.

# Check individual site status
curl -H "X-API-Key: your-api-key-here" https://api.eddie.surf/crawl/123/456

Returns detailed status and results for a single site within a job. Useful for monitoring progress on specific URLs within a larger job. show example

{
  "status": "processing",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 5,
  "failed_pages": 1,
  "pending_pages": 2,
  "progress": 75,
  "message": "5 pages completed, 2 pending, 1 failed"
}

{
  "status": "completed",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 7,
  "failed_pages": 0,
  "pending_pages": 0,
  "progress": 100,
  "results": {
    "company_name": {
      "value": "Data Surfer Inc.",
      "confidence": 5,
      "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
    }
  },
  "message": "Data extracted successfully",
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:35:00Z",
  "credits_remaining": 844,
  "credits_used": 10.5
}

{
  "status": "failed",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 0,
  "failed_pages": 3,
  "pending_pages": 0,
  "progress": 0,
  "message": "All pages failed during scraping. Sample errors: HTTP 404 Not Found; Connection timeout",
  "credits_remaining": 851,
  "credits_used": 4.5
}

🎮 Try in Playground

POST /smart-search

Find websites using AI-powered search with intelligent filtering and quality control.

💰 Credit Cost: 1 credit for first 10 results + 1 credit per additional 10 results (e.g., 30 results = 3 credits).
Credits calculated on final quality-controlled results.

curl -X POST https://api.eddie.surf/smart-search \
  -H "X-API-Key: your-api-key-here" \
  -H "Content-Type: application/json" \
  -d '{
    "query": "game development studios in San Francisco",
    "max_results": 30,
    "website_only": true,
    "skip_duplicate_domains": true,
    "context": {
      "intent": "find_businesses",
      "location": "San Francisco",
      "business_type": "game development studios"
    },
    "rules": [
      "Include gaming subsidiaries of larger companies",
      "Improve subpage URLs to root domains"
    ],
    "additional_guidelines": [
      "Focus on companies with actual SF presence",
      "Exclude educational institutions"
    ]
  }'

Request Parameters

Parameter	Type	Required	Description
`query`	string	Required	The search query. Can be simple ("project management software") or complex ("best CRM for startups under $50/month")
`max_results`	integer	Optional	Maximum number of results to return (1-5000). Default: 10
`website_only`	boolean	Optional	Only return results with valid website URLs. Default: false
`skip_duplicate_domains`	boolean	Optional	Skip results from domains already seen. Highly recommended for product/service searches. Default: false
`callback_url`	string	Optional	URL to receive a POST notification when the job completes. Callback includes all search results and job metadata.
`context`	object	Optional	JSON object providing search context (e.g., `{"intent": "find_businesses", "location": "San Francisco"}`)
`rules`	string[]	Optional	Array of search rules (e.g., `["Include subsidiaries", "Improve URLs to root domains"]`)
`additional_guidelines`	string[]	Optional	Array of additional guidelines for AI processing (e.g., `["Focus on companies with SF offices", "Exclude educational institutions"]`)

Key Features

Uses multiple search services
Smart filtering based on search intent
Quality control removes irrelevant results
Continues searching until target result count is met

Search Examples

Product/Service Search (with deduplication):

{
  "query": "project management software",
  "max_results": 25,
  "website_only": true,
  "skip_duplicate_domains": true
}

Returns actual PM tools like Asana, Monday.com, ClickUp - not articles about them

Local Business Search:

{
  "query": "italian restaurant chicago",
  "max_results": 20
}

Returns actual restaurants with addresses and phone numbers

Academic Research:

{
  "query": "climate change research papers 2025",
  "max_results": 15
}

Returns actual research papers with citations and publication info

With Callback Notification:

{
  "query": "email marketing platforms",
  "max_results": 25,
  "website_only": true,
  "callback_url": "https://yoursite.com/webhooks/search-complete"
}

Your callback URL will receive a POST request when the search completes with all results

Response Format

{
  "status": "success",
  "job_id": 123,
  "max_results": 30,
  "query": "crm software",
  "message": "Created smart search job 123",
  "credits_remaining": "1250.0",
  "credits_used": 1
}

🎮 Try in Playground

GET /smart-search/{job_id}

Check search status and retrieve results.

curl -H "X-API-Key: your-api-key-here" \
  https://api.eddie.surf/smart-search/123

Response While Processing

{
  "status": "processing",
  "job_id": 123,
  "total_results": 15,
  "created_at": "2025-01-20T10:00:00.000Z",
  "updated_at": "2025-01-20T10:00:05.000Z"
}

Response When Complete

{
  "status": "completed",
  "job_id": 123,
  "total_results": 30,
  "results": [
    {
      "name": "Data Surfer",
      "link": "https://data-surfer.com",
      "snippet": "World's #1 Lead Generation system...",
      "address": null,
      "phone": null,
      "meta_data": {
        "rating": 4.5,
        "review_count": 1250
      }
    }
    // ... more results
  ],
  "created_at": "2025-01-20T10:00:00.000Z",
  "updated_at": "2025-01-20T10:00:25.000Z",
  "completed_at": "2025-01-20T10:00:25.000Z"
}

Tips for Best Results:

Always use skip_duplicate_domains: true for product/service searches to avoid multiple locations of the same company (e.g. coffee shops to avoid multiple starbucks)
Use website_only: true to ensure all results have clickable links
Be specific in your query - "accounting software for small business" yields better results than just "accounting"
The AI automatically optimizes your query for the best results

🎮 Try in Playground

Callbacks

Receive automatic notifications when your crawl jobs complete via callbacks.

Setup

Include a callback_url parameter in your POST /crawl request to receive notifications.

Callback Modes

Control how callbacks are delivered using the callback_mode parameter:

"once" (default): Single callback with all sites as an array when job completes
"multi": Individual callback per site as each completes

Output Differences:

Once Mode: 3 URLs = 1 callback with job-level data and sites array
Multi Mode: 3 URLs = 3 separate callbacks, each with individual site data
Batch Mode: Always uses "once" mode only

Callback Examples

// callback_mode: "once" → Single callback when all sites complete
// Matches GET /crawl/{job_id} response format
{
  "status": "completed",
  "job_id": 123,
  "total_sites": 3,
  "completed_sites": 2,
  "failed_sites": 1,
  "processing_sites": 0,
  "sites": [
    {
      "site_id": 456,
      "url": "https://data-surfer.com",
      "status": "completed",
      "completed_pages": 10,
      "failed_pages": 5,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Data Surfer Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:35:00Z"
    },
    {
      "site_id": 457,
      "url": "https://eddie.surf",
      "status": "completed",
          "completed_pages": 8,
      "failed_pages": 0,
      "pending_pages": 0,
      "results": {
        "company_name": {
          "value": "Eddie.surf Inc.",
          "confidence": 5,
          "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://eddie.surf/"
        }
      },
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:36:00Z"
    },
    {
      "site_id": 458,
      "url": "https://broken-site.com",
      "status": "failed",
      "completed_pages": 0,
      "failed_pages": 1,
      "pending_pages": 0,
      "results": null,
      "created_at": "2025-01-20T10:30:00Z",
      "updated_at": "2025-01-20T10:32:00Z"
    }
  ],
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:36:00Z",
  "credits_remaining": 826,
  "credits_used": 28.5
}

// callback_mode: "multi" → 3 separate callbacks as each site completes
// Matches GET /crawl/{job_id}/{site_id} response format
{
  "status": "completed",
  "job_id": 123,
  "site_id": 456,
  "url": "https://data-surfer.com",
  "completed_pages": 10,
  "failed_pages": 5,
  "pending_pages": 0,
  "progress": 67,
  "results": {
    "company_name": {
      "value": "Data Surfer Inc.",
      "confidence": 5,
      "whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
    }
  },
  "message": "Data extracted successfully with 5 pages failed to scrape",
  "created_at": "2025-01-20T10:30:00Z",
  "updated_at": "2025-01-20T10:35:00Z",
  "credits_remaining": 837,
  "credits_used": 18.0
}

Delivery Details

Method: HTTP POST
Content-Type: application/json
Timeout: 30 seconds
Retries: Single attempt (no automatic retries currently)
User-Agent: Eddie.surf/1.0

Testing Callbacks

Use webhook.site to generate a test URL and see your callback payloads in real-time.

Important: Your callback endpoint must respond with HTTP 2xx status code within 30 seconds to be considered successful.

Polling Guide

Efficient strategies for monitoring crawl progress in real-time. Use alone or alongside callbacks for comprehensive monitoring.

Recommended Intervals

Time Period	Interval
First 2 minutes	Every 10 seconds
Next 5 minutes	Every 30 seconds
After 7 minutes	Every 60 seconds

Polling Examples

async function pollQueue(job_id) {
  let complete = false;
  const startTime = Date.now();
  
  while (!complete) {
    const response = await fetch(`https://api.eddie.surf/crawl/${job_id}`, {
      headers: { 'X-API-Key': 'your-api-key-here' }
    });
    const data = await response.json();
    
    if (data.status === 'completed') {
      console.log(`✅ Complete! Processing ${data.total_sites} sites`);
      return data.sites || data.results;
    }
    
    if (data.status === 'failed') {
      console.log(`❌ Failed: ${data.message}`);
      return null;
    }
    
    console.log(`🔄 Progress: ${data.progress || 0}%`);
    
    // Dynamic polling interval
    const elapsed = Date.now() - startTime;
    const waitTime = elapsed < 120000 ? 10000 : // 10s first 2min
                     elapsed < 420000 ? 30000 : // 30s next 5min
                     60000; // 60s after that
    
    await new Promise(r => setTimeout(r, waitTime));
  }
}

import time
import requests

def poll_queue(job_id):
    complete = False
    start_time = time.time()
    
    while not complete:
        response = requests.get(f'https://api.eddie.surf/crawl/{job_id}', 
                               headers={'X-API-Key': 'your-api-key-here'})
        data = response.json()
        
        if data['status'] == 'completed':
            print(f"✅ Complete! Processing {data['total_sites']} sites")
            return data.get('sites', data.get('results'))
        
        if data['status'] == 'failed':
            print(f"❌ Failed: {data['message']}")
            return None
        
        print(f"🔄 Progress: {data.get('progress', 0)}%")
        
        # Dynamic polling interval
        elapsed = time.time() - start_time
        if elapsed < 120:  # First 2 minutes
            wait_time = 10
        elif elapsed < 420:  # Next 5 minutes
            wait_time = 30
        else:  # After that
            wait_time = 60
        
        time.sleep(wait_time)

require 'net/http'
require 'json'
require 'uri'

def poll_crawl_job(job_id)
  start_time = Time.now
  
  loop do
    begin
      uri = URI("https://api.eddie.surf/crawl/#{job_id}")
      http = Net::HTTP.new(uri.host, uri.port)
      http.use_ssl = true
      
      request = Net::HTTP::Get.new(uri)
      request['X-API-Key'] = 'your-api-key-here'
      request['Content-Type'] = 'application/json'
      
      response = http.request(request)
      raise "HTTP #{response.code}: #{response.message}" unless response.is_a?(Net::HTTPSuccess)
      
      data = JSON.parse(response.body)
      
      case data['status']
      when 'completed'
        completed = data['completed_sites'] || 0
        total = data['total_sites'] || 0
        puts "✅ Job completed! #{completed}/#{total} sites successful"
        return data['sites'] || []
      when 'failed'
        puts "❌ Job failed: #{data['message'] || 'Unknown error'}"
        return nil
      else
        # Processing status
        progress = data['progress'] || 0
        completed = data['completed_sites'] || 0
        total = data['total_sites'] || 0
        puts "🔄 Progress: #{progress}% (#{completed}/#{total} sites)"
      end
      
      # Dynamic polling interval
      elapsed = Time.now - start_time
      wait_time = if elapsed < 120        # First 2 minutes
                    10
                  elsif elapsed < 420     # Next 5 minutes
                    30
                  else                    # After that
                    60
                  end
      
      sleep(wait_time)
      
    rescue => e
      puts "Polling error: #{e}"
      sleep(10)  # Wait 10s on error
    end
  end
end

# Usage
sites = poll_crawl_job(123)
puts "Results: #{sites}" if sites

<?php
function pollCrawlJob($jobId) {
    $startTime = time();
    
    while (true) {
        try {
            $url = "https://api.eddie.surf/crawl/$jobId";
            $context = stream_context_create([
                'http' => [
                    'header' => "X-API-Key: your-api-key-here\r\n",
                    'method' => 'GET'
                ]
            ]);
            
            $response = file_get_contents($url, false, $context);
            if ($response === false) {
                throw new Exception('HTTP request failed');
            }
            
            $data = json_decode($response, true);
            if ($data === null) {
                throw new Exception('Invalid JSON response');
            }
            
            switch ($data['status']) {
                case 'completed':
                    $completed = $data['completed_sites'] ?? 0;
                    $total = $data['total_sites'] ?? 0;
                    echo "✅ Job completed! $completed/$total sites successful\n";
                    return $data['sites'] ?? [];
                    
                case 'failed':
                    $message = $data['message'] ?? 'Unknown error';
                    echo "❌ Job failed: $message\n";
                    return null;
                    
                default:
                    // Processing status
                    $progress = $data['progress'] ?? 0;
                    $completed = $data['completed_sites'] ?? 0;
                    $total = $data['total_sites'] ?? 0;
                    echo "🔄 Progress: {$progress}% ($completed/$total sites)\n";
            }
            
            // Dynamic polling interval
            $elapsed = time() - $startTime;
            if ($elapsed < 120) {        // First 2 minutes
                $waitTime = 10;
            } elseif ($elapsed < 420) {  // Next 5 minutes
                $waitTime = 30;
            } else {                     // After that
                $waitTime = 60;
            }
            
            sleep($waitTime);
            
        } catch (Exception $e) {
            echo "Polling error: " . $e->getMessage() . "\n";
            sleep(10);  // Wait 10s on error
        }
    }
}

// Usage
$sites = pollCrawlJob(123);
if ($sites) {
    print_r($sites);
}
?>

package main

import (
    "encoding/json"
    "fmt"
    "net/http"
    "time"
)

type JobStatus struct {
    Status         string `json:"status"`
    JobID          int    `json:"job_id"`
    TotalSites     int    `json:"total_sites"`
    CompletedSites int    `json:"completed_sites"`
    Progress       int    `json:"progress"`
    Message        string `json:"message"`
    Sites          []interface{} `json:"sites"`
}

func pollCrawlJob(jobID int) ([]interface{}, error) {
    startTime := time.Now()
    
    for {
        url := fmt.Sprintf("https://api.eddie.surf/crawl/%d", jobID)
        
        client := &http.Client{Timeout: 30 * time.Second}
        req, err := http.NewRequest("GET", url, nil)
        req.Header.Set("X-API-Key", "your-api-key-here")
        if err != nil {
            return nil, err
        }
        req.Header.Set("Content-Type", "application/json")
        
        resp, err := client.Do(req)
        if err != nil {
            fmt.Printf("Polling error: %v\n", err)
            time.Sleep(10 * time.Second)
            continue
        }
        defer resp.Body.Close()
        
        var data JobStatus
        if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
            fmt.Printf("JSON decode error: %v\n", err)
            time.Sleep(10 * time.Second)
            continue
        }
        
        switch data.Status {
        case "completed":
            fmt.Printf("✅ Job completed! %d/%d sites successful\n", 
                data.CompletedSites, data.TotalSites)
            return data.Sites, nil
            
        case "failed":
            fmt.Printf("❌ Job failed: %s\n", data.Message)
            return nil, fmt.Errorf("job failed: %s", data.Message)
            
        default:
            // Processing status
            fmt.Printf("🔄 Progress: %d%% (%d/%d sites)\n", 
                data.Progress, data.CompletedSites, data.TotalSites)
        }
        
        // Dynamic polling interval
        elapsed := time.Since(startTime)
        var waitTime time.Duration
        if elapsed < 2*time.Minute {        // First 2 minutes
            waitTime = 10 * time.Second
        } else if elapsed < 7*time.Minute { // Next 5 minutes
            waitTime = 30 * time.Second
        } else {                            // After that
            waitTime = 60 * time.Second
        }
        
        time.Sleep(waitTime)
    }
}

func main() {
    sites, err := pollCrawlJob(123)
    if err != nil {
        fmt.Printf("Error: %v\n", err)
        return
    }
    
    fmt.Printf("Results: %+v\n", sites)
}

#!/bin/bash

poll_queue() {
    local job_id=$1
    local start_time=$(date +%s)
    
    while true; do
        response=$(curl -s -H "X-API-Key: your-api-key-here" "https://api.eddie.surf/crawl/${job_id}")
        status=$(echo $response | jq -r '.status')
        
        if [ "$status" = "completed" ]; then
            total=$(echo $response | jq -r '.total_sites')
            echo "✅ Complete! Processing ${total} sites"
            echo $response | jq '.sites // .results'
            break
        fi
        
        if [ "$status" = "failed" ]; then
            message=$(echo $response | jq -r '.message')
            echo "❌ Failed: ${message}"
            break
        fi
        
        progress=$(echo $response | jq -r '.progress // 0')
        echo "🔄 Progress: ${progress}%"
        
        # Dynamic polling interval
        current_time=$(date +%s)
        elapsed=$((current_time - start_time))
        
        if [ $elapsed -lt 120 ]; then
            sleep 10
        elif [ $elapsed -lt 420 ]; then
            sleep 30
        else
            sleep 60
        fi
    done
}

poll_queue "YOUR_JOB_ID"

Rate Limiting: Don't poll faster than every 5 seconds to avoid rate limits.

Error Codes

Code	Description
`400`	Bad Request - Invalid parameters
`429`	Rate Limited - Too many requests
`500`	Internal Error - Contact support