153 lines
4.1 KiB
Go
153 lines
4.1 KiB
Go
package apify
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
apiBase = "https://api.apify.com/v2"
|
|
pollEvery = 3 * time.Second
|
|
pollTimeout = 5 * time.Minute
|
|
)
|
|
|
|
// Client is a thin wrapper around the Apify run-and-fetch lifecycle.
|
|
type Client struct {
|
|
APIKey string
|
|
HTTP *http.Client
|
|
}
|
|
|
|
func New(apiKey string) *Client {
|
|
return &Client{
|
|
APIKey: apiKey,
|
|
HTTP: &http.Client{Timeout: 30 * time.Second},
|
|
}
|
|
}
|
|
|
|
type runResponse struct {
|
|
Data struct {
|
|
ID string `json:"id"`
|
|
Status string `json:"status"`
|
|
DefaultDatasetID string `json:"defaultDatasetId"`
|
|
} `json:"data"`
|
|
}
|
|
|
|
// Run starts an actor run, waits for SUCCEEDED, and returns dataset items as raw JSON.
|
|
func (c *Client) Run(ctx context.Context, actorID string, input any) ([]json.RawMessage, error) {
|
|
if c.APIKey == "" {
|
|
return nil, errors.New("apify api_key not configured")
|
|
}
|
|
if actorID == "" {
|
|
return nil, errors.New("apify actor id is empty")
|
|
}
|
|
|
|
body, err := json.Marshal(input)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Apify URLs use "~" to separate username and actor name, never "/".
|
|
// Accept either form in config and normalize before path-escaping.
|
|
urlActorID := strings.ReplaceAll(actorID, "/", "~")
|
|
startURL := fmt.Sprintf("%s/acts/%s/runs?token=%s", apiBase, url.PathEscape(urlActorID), url.QueryEscape(c.APIKey))
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, startURL, bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
resp, err := c.HTTP.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("start run: %w", err)
|
|
}
|
|
var runResp runResponse
|
|
if err := decodeJSON(resp, &runResp); err != nil {
|
|
return nil, fmt.Errorf("start run: %w", err)
|
|
}
|
|
if runResp.Data.ID == "" {
|
|
return nil, errors.New("start run: missing run id")
|
|
}
|
|
|
|
deadline := time.Now().Add(pollTimeout)
|
|
pollCtx, cancel := context.WithDeadline(ctx, deadline)
|
|
defer cancel()
|
|
|
|
status, datasetID, err := c.waitForRun(pollCtx, runResp.Data.ID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if status != "SUCCEEDED" {
|
|
return nil, fmt.Errorf("apify run terminated with status %s", status)
|
|
}
|
|
|
|
return c.fetchDataset(ctx, datasetID)
|
|
}
|
|
|
|
func (c *Client) waitForRun(ctx context.Context, runID string) (string, string, error) {
|
|
pollURL := fmt.Sprintf("%s/actor-runs/%s?token=%s", apiBase, url.PathEscape(runID), url.QueryEscape(c.APIKey))
|
|
for {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, pollURL, nil)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
resp, err := c.HTTP.Do(req)
|
|
if err != nil {
|
|
return "", "", fmt.Errorf("poll run: %w", err)
|
|
}
|
|
var r runResponse
|
|
if err := decodeJSON(resp, &r); err != nil {
|
|
return "", "", fmt.Errorf("poll run: %w", err)
|
|
}
|
|
switch r.Data.Status {
|
|
case "SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT":
|
|
return r.Data.Status, r.Data.DefaultDatasetID, nil
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return "", "", ctx.Err()
|
|
case <-time.After(pollEvery):
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *Client) fetchDataset(ctx context.Context, datasetID string) ([]json.RawMessage, error) {
|
|
if datasetID == "" {
|
|
return nil, errors.New("missing dataset id")
|
|
}
|
|
dsURL := fmt.Sprintf("%s/datasets/%s/items?clean=true&format=json&token=%s", apiBase, url.PathEscape(datasetID), url.QueryEscape(c.APIKey))
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, dsURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
resp, err := c.HTTP.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetch dataset: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode >= 300 {
|
|
b, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
|
|
return nil, fmt.Errorf("dataset returned %d: %s", resp.StatusCode, string(b))
|
|
}
|
|
var items []json.RawMessage
|
|
if err := json.NewDecoder(resp.Body).Decode(&items); err != nil {
|
|
return nil, fmt.Errorf("decode dataset: %w", err)
|
|
}
|
|
return items, nil
|
|
}
|
|
|
|
func decodeJSON(resp *http.Response, dst any) error {
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode >= 300 {
|
|
b, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
|
|
return fmt.Errorf("http %d: %s", resp.StatusCode, string(b))
|
|
}
|
|
return json.NewDecoder(resp.Body).Decode(dst)
|
|
}
|