Eddie.surf API
AI-powered web crawling API that extracts structured data from websites
https://api.eddie.surf
Quick Start
Get started in 2 minutes with basic company data extraction:
# Submit crawl job
curl -X POST https://api.eddie.surf/crawl \
-H "X-API-Key: your-api-key-here" \
-H "Content-Type: application/json" \
-d '{
"urls": ["https://data-surfer.com"],
"context": {"purpose": "Company research"},
"json": {
"company_name": {
"type": "string",
"description": "Company name"
}
}
}'
const response = await fetch('https://api.eddie.surf/crawl', {
method: 'POST',
headers: {
'X-API-Key': 'your-api-key-here',
'Content-Type': 'application/json'
},
body: JSON.stringify({
urls: ['https://data-surfer.com'],
context: {purpose: 'Company research'},
json: {
company_name: {
type: 'string',
description: 'Company name'
}
}
})
});
const data = await response.json();
console.log(data);
import requests
response = requests.post('https://api.eddie.surf/crawl',
headers={'X-API-Key': 'your-api-key-here'},
json={
'urls': ['https://data-surfer.com'],
'context': {'purpose': 'Company research'},
'json': {
'company_name': {
'type': 'string',
'description': 'Company name'
}
}
})
data = response.json()
print(data)
require 'net/http'
require 'json'
require 'uri'
uri = URI('https://api.eddie.surf/crawl')
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Post.new(uri)
request['X-API-Key'] = 'your-api-key-here'
request['Content-Type'] = 'application/json'
request.body = {
urls: ['https://data-surfer.com'],
context: { purpose: 'Company research' },
json: {
company_name: {
type: 'string',
description: 'Company name'
}
}
}.to_json
response = http.request(request)
data = JSON.parse(response.body)
puts data
<?php
$url = 'https://api.eddie.surf/crawl';
$data = [
'urls' => ['https://data-surfer.com'],
'context' => ['purpose' => 'Company research'],
'json' => [
'company_name' => [
'type' => 'string',
'description' => 'Company name'
]
]
];
$options = [
'http' => [
'header' => "X-API-Key: your-api-key-here\r\n" .
"Content-Type: application/json\r\n",
'method' => 'POST',
'content' => json_encode($data)
]
];
$context = stream_context_create($options);
$response = file_get_contents($url, false, $context);
$result = json_decode($response, true);
print_r($result);
?>
package main
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
func main() {
url := "https://api.eddie.surf/crawl"
payload := map[string]interface{}{
"urls": []string{"https://data-surfer.com"},
"context": map[string]string{"purpose": "Company research"},
"json": map[string]interface{}{
"company_name": map[string]string{
"type": "string",
"description": "Company name",
},
},
}
jsonData, _ := json.Marshal(payload)
req, _ := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
req.Header.Set("X-API-Key", "your-api-key-here")
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
var result map[string]interface{}
json.NewDecoder(resp.Body).Decode(&result)
fmt.Println(result)
}
Authentication
All API endpoints require authentication using API keys. Include your API key in the
X-API-Key
header with every request.
API Key Usage
X-API-Key: your-api-key-here
Rate Limits
- Rate Limit: 60 requests per minute (default)
API Workflow
Eddie.surf uses a simple two-step async process for web crawling jobs.
Choose Your Endpoint
- /crawl: For 1-199 URLs with standard processing
- /crawl-batch: For 200+ URLs with optimized batch processing
Two Simple Steps
- Submit: POST to
/crawl
or/crawl-batch
→ Get job ID instantly - Poll or Wait: Either poll
/crawl/{job_id}
or wait for callback notification
/crawl/{job_id}
returns:
- Processing status and progress while running
- Complete results when finished
Monitoring Options
- Callback (Recommended): Provide a
callback_url
and receive notifications when complete (per site or as grouped array) - Polling: Repeatedly check
/crawl/{job_id}
for status updates - Both: Use callbacks for automatic notifications AND polling for real-time progress monitoring
Automatic Processing
- HTML extraction with advanced scraping
- AI link discovery and prioritization
- Content analysis with Claude Sonnet 4
- Structured data extraction
- Notifications on completion
POST /crawl
Submit a crawling job for 1-199 URLs. Returns immediately with job ID.
Request Body
Parameter | Type | Required | Description |
---|---|---|---|
urls |
string[] | Required | Array of URLs to crawl (1-199) |
context |
object | Required | Background information to guide extraction |
json |
object | Required | Schema defining what data to extract |
max_depth |
integer | Optional | Link levels to follow (1-10), default: 3 |
max_pages |
integer | Optional | Maximum pages to crawl (1-1000), default: 15 |
callback_url |
string | Optional | Callback URL for notifications |
callback_mode |
string | Optional | "once" (default) = one callback when all complete, "multi" = callback per site |
timeout_per_page |
integer | Optional | Timeout seconds per page (1-180), default: 30 |
rules |
string[] | Optional | Custom processing instructions |
mock |
boolean | Optional | Test mode - generates fake data without using credits |
include_technical |
boolean | Optional | Collect technical data (DNS, headers) for the main URL - costs 1 credit |
JSON Schema Fields
Each field in the json
object supports these properties:
Property | Type | Required | Description |
---|---|---|---|
type |
string | Optional | "string", "number", "boolean", "array", "score" |
description |
string | Optional | What this field represents |
priority |
string | Optional | "high", "medium", "low" |
additional_guidelines |
string[] | Optional | Specific extraction instructions |
Initial Response
{
"status": "success",
"job_id": 123,
"total_sites": 2,
"sites": [
{
"site_id": 456,
"url": "https://data-surfer.com"
},
{
"site_id": 457,
"url": "https://eddie.surf"
}
],
"message": "Created crawl job 123 with 2 sites",
"credits_remaining": 855,
"credits_used": 0
}
POST /crawl-batch
Process large numbers of domains efficiently with batch processing and optimized AI inference.
- /crawl: Multiple URLs = 1 job with multiple sites = 1 callback with array of site results
- /crawl-batch: 200+ URLs = 1 job with multiple sites = 1 callback with array of site results
Key Differences from Individual /crawl
- Minimum 200 URLs required - Designed for large-scale data collection
- Single job ID - All URLs processed as one job with individual sites
- Optimized AI processing - Uses batch inference for cost efficiency
- Site-organized results - Individual synthesis per site in response array
- Single callback - One notification when all domains complete
Request Parameters
Same parameters as /crawl
with these differences:
Parameter | Type | Required | Batch Requirement | Description |
---|---|---|---|---|
urls |
string[] | Required | Minimum 200 unique URLs | Array of domains to process |
max_pages |
integer | Optional | Default: 1000 (higher than individual) | Maximum pages per domain |
Use Cases
- Lead Generation: Extract contact info from 500+ company websites
- Market Research: Analyze industry data across hundreds of competitors
- Data Migration: Extract structured data from legacy websites in bulk
- Compliance Audits: Check privacy policies across large website portfolios
GET /crawl/{job_id}
Check job status and retrieve results from a single endpoint that adapts based on processing state.
# Check status
curl -H "X-API-Key: your-api-key-here" https://api.eddie.surf/crawl/123
- While processing: Status and progress info
- When complete: Full crawl results and data
Status Logic
- "completed": Any data from your JSON schema was extracted successfully. If some pages failed during crawling, this is noted in the message field
- "failed": No data from your schema was extracted, with error details explaining why
- "processing": Still crawling or processing pages
Note: The API never returns "partial" status. Jobs with successful data extraction are always marked as "completed" with additional context about any failed pages.
Response Examples
{
"status": "processing",
"job_id": 123,
"total_sites": 3,
"completed_sites": 1,
"processing_sites": 1,
"failed_sites": 0,
"credits_remaining": 847,
"credits_used": 7.5
}
{
"status": "completed",
"job_id": 123,
"total_sites": 3,
"completed_sites": 2,
"failed_sites": 1,
"processing_sites": 0,
"credits_remaining": 835,
"credits_used": 19.5,
"sites": [
{
"site_id": 456,
"url": "https://data-surfer.com",
"status": "completed",
"completed_pages": 5,
"failed_pages": 0,
"pending_pages": 0,
"results": {
"company_name": {
"value": "Data Surfer Inc.",
"confidence": 5,
"whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
}
},
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:35:00Z"
},
{
"site_id": 457,
"url": "https://eddie.surf",
"status": "completed",
"completed_pages": 4,
"failed_pages": 0,
"pending_pages": 0,
"results": {
"company_name": {
"value": "Eddie.surf Inc.",
"confidence": 5,
"whereFound": "Found at [1]\\n\\nSources:\\n[1] https://eddie.surf/"
}
},
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:36:00Z"
},
{
"site_id": 458,
"url": "https://broken-site.com",
"status": "failed",
"completed_pages": 0,
"failed_pages": 3,
"pending_pages": 0,
"message": "All pages failed during scraping",
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:31:00Z"
}
],
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:36:00Z"
}
{
"status": "failed",
"job_id": 124,
"total_sites": 1,
"completed_sites": 0,
"failed_sites": 1,
"processing_sites": 0,
"credits_remaining": 851,
"credits_used": 4.5,
"sites": [
{
"site_id": 459,
"url": "https://broken-site.com",
"status": "failed",
"completed_pages": 0,
"failed_pages": 3,
"pending_pages": 0,
"created_at": "2025-01-20T11:00:00Z",
"updated_at": "2025-01-20T11:01:00Z"
}
],
"created_at": "2025-01-20T11:00:00Z",
"updated_at": "2025-01-20T11:01:00Z"
}
GET /crawl/{job_id}/{site_id}
Get individual site status and results within a job.
# Check individual site status
curl -H "X-API-Key: your-api-key-here" https://api.eddie.surf/crawl/123/456
Returns detailed status and results for a single site within a job. Useful for monitoring progress on specific URLs within a larger job.
POST /smart-search
Find websites using AI-powered search with intelligent filtering and quality control.
Credits calculated on final quality-controlled results.
curl -X POST https://api.eddie.surf/smart-search \
-H "X-API-Key: your-api-key-here" \
-H "Content-Type: application/json" \
-d '{
"query": "game development studios in San Francisco",
"max_results": 30,
"website_only": true,
"skip_duplicate_domains": true,
"context": {
"intent": "find_businesses",
"location": "San Francisco",
"business_type": "game development studios"
},
"rules": [
"Include gaming subsidiaries of larger companies",
"Improve subpage URLs to root domains"
],
"additional_guidelines": [
"Focus on companies with actual SF presence",
"Exclude educational institutions"
]
}'
Request Parameters
Parameter | Type | Required | Description |
---|---|---|---|
query |
string | Required | The search query. Can be simple ("project management software") or complex ("best CRM for startups under $50/month") |
max_results |
integer | Optional | Maximum number of results to return (1-5000). Default: 10 |
website_only |
boolean | Optional | Only return results with valid website URLs. Default: false |
skip_duplicate_domains |
boolean | Optional | Skip results from domains already seen. Highly recommended for product/service searches. Default: false |
callback_url |
string | Optional | URL to receive a POST notification when the job completes. Callback includes all search results and job metadata. |
context |
object | Optional | JSON object providing search context (e.g., {"intent": "find_businesses", "location": "San Francisco"} ) |
rules |
string[] | Optional | Array of search rules (e.g., ["Include subsidiaries", "Improve URLs to root domains"] ) |
additional_guidelines |
string[] | Optional | Array of additional guidelines for AI processing (e.g., ["Focus on companies with SF offices", "Exclude educational institutions"] ) |
Key Features
- Uses multiple search services
- Smart filtering based on search intent
- Quality control removes irrelevant results
- Continues searching until target result count is met
Search Examples
{
"query": "project management software",
"max_results": 25,
"website_only": true,
"skip_duplicate_domains": true
}
Returns actual PM tools like Asana, Monday.com, ClickUp - not articles about them
{
"query": "italian restaurant chicago",
"max_results": 20
}
Returns actual restaurants with addresses and phone numbers
{
"query": "climate change research papers 2025",
"max_results": 15
}
Returns actual research papers with citations and publication info
{
"query": "email marketing platforms",
"max_results": 25,
"website_only": true,
"callback_url": "https://yoursite.com/webhooks/search-complete"
}
Your callback URL will receive a POST request when the search completes with all results
Response Format
{
"status": "success",
"job_id": 123,
"max_results": 30,
"query": "crm software",
"message": "Created smart search job 123",
"credits_remaining": "1250.0",
"credits_used": 1
}
GET /smart-search/{job_id}
Check search status and retrieve results.
curl -H "X-API-Key: your-api-key-here" \
https://api.eddie.surf/smart-search/123
Response While Processing
{
"status": "processing",
"job_id": 123,
"total_results": 15,
"created_at": "2025-01-20T10:00:00.000Z",
"updated_at": "2025-01-20T10:00:05.000Z"
}
Response When Complete
{
"status": "complete",
"job_id": 123,
"total_results": 30,
"results": [
{
"name": "Data Surfer",
"link": "https://data-surfer.com",
"snippet": "World's #1 Lead Generation system...",
"address": null,
"phone": null,
"meta_data": {
"rating": 4.5,
"review_count": 1250
}
}
// ... more results
],
"created_at": "2025-01-20T10:00:00.000Z",
"updated_at": "2025-01-20T10:00:25.000Z",
"completed_at": "2025-01-20T10:00:25.000Z"
}
- Always use
skip_duplicate_domains: true
for product/service searches to avoid multiple locations of the same company (e.g. coffee shops to avoid multiple starbucks) - Use
website_only: true
to ensure all results have clickable links - Be specific in your query - "accounting software for small business" yields better results than just "accounting"
- The AI automatically optimizes your query for the best results
Callbacks
Receive automatic notifications when your crawl jobs complete via callbacks.
Setup
Include a callback_url
parameter in your POST /crawl request to receive
notifications.
Callback Modes
Control how callbacks are delivered using the callback_mode
parameter:
- "once" (default): Single callback with all sites as an array when job completes
- "multi": Individual callback per site as each completes
- Once Mode: 3 URLs = 1 callback with job-level data and sites array
- Multi Mode: 3 URLs = 3 separate callbacks, each with individual site data
- Batch Mode: Always uses "once" mode only
Callback Examples
// callback_mode: "once" → Single callback when all sites complete
// Matches GET /crawl/{job_id} response format
{
"status": "completed",
"job_id": 123,
"total_sites": 3,
"completed_sites": 2,
"failed_sites": 1,
"processing_sites": 0,
"sites": [
{
"site_id": 456,
"url": "https://data-surfer.com",
"status": "completed",
"completed_pages": 10,
"failed_pages": 5,
"pending_pages": 0,
"results": {
"company_name": {
"value": "Data Surfer Inc.",
"confidence": 5,
"whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
}
},
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:35:00Z"
},
{
"site_id": 457,
"url": "https://eddie.surf",
"status": "completed",
"completed_pages": 8,
"failed_pages": 0,
"pending_pages": 0,
"results": {
"company_name": {
"value": "Eddie.surf Inc.",
"confidence": 5,
"whereFound": "Found at [1]\\n\\nSources:\\n[1] https://eddie.surf/"
}
},
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:36:00Z"
},
{
"site_id": 458,
"url": "https://broken-site.com",
"status": "failed",
"completed_pages": 0,
"failed_pages": 1,
"pending_pages": 0,
"results": null,
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:32:00Z"
}
],
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:36:00Z",
"credits_remaining": 826,
"credits_used": 28.5
}
// callback_mode: "multi" → 3 separate callbacks as each site completes
// Matches GET /crawl/{job_id}/{site_id} response format
{
"status": "completed",
"job_id": 123,
"site_id": 456,
"url": "https://data-surfer.com",
"completed_pages": 10,
"failed_pages": 5,
"pending_pages": 0,
"progress": 67,
"results": {
"company_name": {
"value": "Data Surfer Inc.",
"confidence": 5,
"whereFound": "Found at [1]\\n\\nSources:\\n[1] https://data-surfer.com/"
}
},
"message": "Data extracted successfully with 5 pages failed to scrape",
"created_at": "2025-01-20T10:30:00Z",
"updated_at": "2025-01-20T10:35:00Z",
"credits_remaining": 837,
"credits_used": 18.0
}
Delivery Details
- Method: HTTP POST
- Content-Type: application/json
- Timeout: 30 seconds
- Retries: Single attempt (no automatic retries currently)
- User-Agent: Eddie.surf/1.0
Testing Callbacks
Use webhook.site to generate a test URL and see your callback payloads in real-time.
Polling Guide
Efficient strategies for monitoring crawl progress in real-time. Use alone or alongside callbacks for comprehensive monitoring.
Recommended Intervals
Time Period | Interval |
---|---|
First 2 minutes | Every 10 seconds |
Next 5 minutes | Every 30 seconds |
After 7 minutes | Every 60 seconds |
Polling Examples
async function pollQueue(job_id) {
let complete = false;
const startTime = Date.now();
while (!complete) {
const response = await fetch(`https://api.eddie.surf/crawl/${job_id}`, {
headers: { 'X-API-Key': 'your-api-key-here' }
});
const data = await response.json();
if (data.status === 'completed') {
console.log(`✅ Complete! Processing ${data.total_sites} sites`);
return data.sites || data.results;
}
if (data.status === 'failed') {
console.log(`❌ Failed: ${data.message}`);
return null;
}
console.log(`🔄 Progress: ${data.progress || 0}%`);
// Dynamic polling interval
const elapsed = Date.now() - startTime;
const waitTime = elapsed < 120000 ? 10000 : // 10s first 2min
elapsed < 420000 ? 30000 : // 30s next 5min
60000; // 60s after that
await new Promise(r => setTimeout(r, waitTime));
}
}
import time
import requests
def poll_queue(job_id):
complete = False
start_time = time.time()
while not complete:
response = requests.get(f'https://api.eddie.surf/crawl/{job_id}',
headers={'X-API-Key': 'your-api-key-here'})
data = response.json()
if data['status'] == 'completed':
print(f"✅ Complete! Processing {data['total_sites']} sites")
return data.get('sites', data.get('results'))
if data['status'] == 'failed':
print(f"❌ Failed: {data['message']}")
return None
print(f"🔄 Progress: {data.get('progress', 0)}%")
# Dynamic polling interval
elapsed = time.time() - start_time
if elapsed < 120: # First 2 minutes
wait_time = 10
elif elapsed < 420: # Next 5 minutes
wait_time = 30
else: # After that
wait_time = 60
time.sleep(wait_time)
require 'net/http'
require 'json'
require 'uri'
def poll_crawl_job(job_id)
start_time = Time.now
loop do
begin
uri = URI("https://api.eddie.surf/crawl/#{job_id}")
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
request = Net::HTTP::Get.new(uri)
request['X-API-Key'] = 'your-api-key-here'
request['Content-Type'] = 'application/json'
response = http.request(request)
raise "HTTP #{response.code}: #{response.message}" unless response.is_a?(Net::HTTPSuccess)
data = JSON.parse(response.body)
case data['status']
when 'completed'
completed = data['completed_sites'] || 0
total = data['total_sites'] || 0
puts "✅ Job completed! #{completed}/#{total} sites successful"
return data['sites'] || []
when 'failed'
puts "❌ Job failed: #{data['message'] || 'Unknown error'}"
return nil
else
# Processing status
progress = data['progress'] || 0
completed = data['completed_sites'] || 0
total = data['total_sites'] || 0
puts "🔄 Progress: #{progress}% (#{completed}/#{total} sites)"
end
# Dynamic polling interval
elapsed = Time.now - start_time
wait_time = if elapsed < 120 # First 2 minutes
10
elsif elapsed < 420 # Next 5 minutes
30
else # After that
60
end
sleep(wait_time)
rescue => e
puts "Polling error: #{e}"
sleep(10) # Wait 10s on error
end
end
end
# Usage
sites = poll_crawl_job(123)
puts "Results: #{sites}" if sites
<?php
function pollCrawlJob($jobId) {
$startTime = time();
while (true) {
try {
$url = "https://api.eddie.surf/crawl/$jobId";
$context = stream_context_create([
'http' => [
'header' => "X-API-Key: your-api-key-here\r\n",
'method' => 'GET'
]
]);
$response = file_get_contents($url, false, $context);
if ($response === false) {
throw new Exception('HTTP request failed');
}
$data = json_decode($response, true);
if ($data === null) {
throw new Exception('Invalid JSON response');
}
switch ($data['status']) {
case 'completed':
$completed = $data['completed_sites'] ?? 0;
$total = $data['total_sites'] ?? 0;
echo "✅ Job completed! $completed/$total sites successful\n";
return $data['sites'] ?? [];
case 'failed':
$message = $data['message'] ?? 'Unknown error';
echo "❌ Job failed: $message\n";
return null;
default:
// Processing status
$progress = $data['progress'] ?? 0;
$completed = $data['completed_sites'] ?? 0;
$total = $data['total_sites'] ?? 0;
echo "🔄 Progress: {$progress}% ($completed/$total sites)\n";
}
// Dynamic polling interval
$elapsed = time() - $startTime;
if ($elapsed < 120) { // First 2 minutes
$waitTime = 10;
} elseif ($elapsed < 420) { // Next 5 minutes
$waitTime = 30;
} else { // After that
$waitTime = 60;
}
sleep($waitTime);
} catch (Exception $e) {
echo "Polling error: " . $e->getMessage() . "\n";
sleep(10); // Wait 10s on error
}
}
}
// Usage
$sites = pollCrawlJob(123);
if ($sites) {
print_r($sites);
}
?>
package main
import (
"encoding/json"
"fmt"
"net/http"
"time"
)
type JobStatus struct {
Status string `json:"status"`
JobID int `json:"job_id"`
TotalSites int `json:"total_sites"`
CompletedSites int `json:"completed_sites"`
Progress int `json:"progress"`
Message string `json:"message"`
Sites []interface{} `json:"sites"`
}
func pollCrawlJob(jobID int) ([]interface{}, error) {
startTime := time.Now()
for {
url := fmt.Sprintf("https://api.eddie.surf/crawl/%d", jobID)
client := &http.Client{Timeout: 30 * time.Second}
req, err := http.NewRequest("GET", url, nil)
req.Header.Set("X-API-Key", "your-api-key-here")
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := client.Do(req)
if err != nil {
fmt.Printf("Polling error: %v\n", err)
time.Sleep(10 * time.Second)
continue
}
defer resp.Body.Close()
var data JobStatus
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
fmt.Printf("JSON decode error: %v\n", err)
time.Sleep(10 * time.Second)
continue
}
switch data.Status {
case "completed":
fmt.Printf("✅ Job completed! %d/%d sites successful\n",
data.CompletedSites, data.TotalSites)
return data.Sites, nil
case "failed":
fmt.Printf("❌ Job failed: %s\n", data.Message)
return nil, fmt.Errorf("job failed: %s", data.Message)
default:
// Processing status
fmt.Printf("🔄 Progress: %d%% (%d/%d sites)\n",
data.Progress, data.CompletedSites, data.TotalSites)
}
// Dynamic polling interval
elapsed := time.Since(startTime)
var waitTime time.Duration
if elapsed < 2*time.Minute { // First 2 minutes
waitTime = 10 * time.Second
} else if elapsed < 7*time.Minute { // Next 5 minutes
waitTime = 30 * time.Second
} else { // After that
waitTime = 60 * time.Second
}
time.Sleep(waitTime)
}
}
func main() {
sites, err := pollCrawlJob(123)
if err != nil {
fmt.Printf("Error: %v\n", err)
return
}
fmt.Printf("Results: %+v\n", sites)
}
#!/bin/bash
poll_queue() {
local job_id=$1
local start_time=$(date +%s)
while true; do
response=$(curl -s -H "X-API-Key: your-api-key-here" "https://api.eddie.surf/crawl/${job_id}")
status=$(echo $response | jq -r '.status')
if [ "$status" = "completed" ]; then
total=$(echo $response | jq -r '.total_sites')
echo "✅ Complete! Processing ${total} sites"
echo $response | jq '.sites // .results'
break
fi
if [ "$status" = "failed" ]; then
message=$(echo $response | jq -r '.message')
echo "❌ Failed: ${message}"
break
fi
progress=$(echo $response | jq -r '.progress // 0')
echo "🔄 Progress: ${progress}%"
# Dynamic polling interval
current_time=$(date +%s)
elapsed=$((current_time - start_time))
if [ $elapsed -lt 120 ]; then
sleep 10
elif [ $elapsed -lt 420 ]; then
sleep 30
else
sleep 60
fi
done
}
poll_queue "YOUR_JOB_ID"
Error Codes
Code | Description |
---|---|
400 |
Bad Request - Invalid parameters |
429 |
Rate Limited - Too many requests |
500 |
Internal Error - Contact support |