package apify import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "strings" "time" ) const ( apiBase = "https://api.apify.com/v2" pollEvery = 3 * time.Second pollTimeout = 5 * time.Minute ) // Client is a thin wrapper around the Apify run-and-fetch lifecycle. type Client struct { APIKey string HTTP *http.Client } func New(apiKey string) *Client { return &Client{ APIKey: apiKey, HTTP: &http.Client{Timeout: 30 * time.Second}, } } type runResponse struct { Data struct { ID string `json:"id"` Status string `json:"status"` DefaultDatasetID string `json:"defaultDatasetId"` } `json:"data"` } // Run starts an actor run, waits for SUCCEEDED, and returns dataset items as raw JSON. func (c *Client) Run(ctx context.Context, actorID string, input any) ([]json.RawMessage, error) { if c.APIKey == "" { return nil, errors.New("apify api_key not configured") } if actorID == "" { return nil, errors.New("apify actor id is empty") } body, err := json.Marshal(input) if err != nil { return nil, err } // Apify URLs use "~" to separate username and actor name, never "/". // Accept either form in config and normalize before path-escaping. urlActorID := strings.ReplaceAll(actorID, "/", "~") startURL := fmt.Sprintf("%s/acts/%s/runs?token=%s", apiBase, url.PathEscape(urlActorID), url.QueryEscape(c.APIKey)) req, err := http.NewRequestWithContext(ctx, http.MethodPost, startURL, bytes.NewReader(body)) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/json") resp, err := c.HTTP.Do(req) if err != nil { return nil, fmt.Errorf("start run: %w", err) } var runResp runResponse if err := decodeJSON(resp, &runResp); err != nil { return nil, fmt.Errorf("start run: %w", err) } if runResp.Data.ID == "" { return nil, errors.New("start run: missing run id") } deadline := time.Now().Add(pollTimeout) pollCtx, cancel := context.WithDeadline(ctx, deadline) defer cancel() status, datasetID, err := c.waitForRun(pollCtx, runResp.Data.ID) if err != nil { return nil, err } if status != "SUCCEEDED" { return nil, fmt.Errorf("apify run terminated with status %s", status) } return c.fetchDataset(ctx, datasetID) } func (c *Client) waitForRun(ctx context.Context, runID string) (string, string, error) { pollURL := fmt.Sprintf("%s/actor-runs/%s?token=%s", apiBase, url.PathEscape(runID), url.QueryEscape(c.APIKey)) for { req, err := http.NewRequestWithContext(ctx, http.MethodGet, pollURL, nil) if err != nil { return "", "", err } resp, err := c.HTTP.Do(req) if err != nil { return "", "", fmt.Errorf("poll run: %w", err) } var r runResponse if err := decodeJSON(resp, &r); err != nil { return "", "", fmt.Errorf("poll run: %w", err) } switch r.Data.Status { case "SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT": return r.Data.Status, r.Data.DefaultDatasetID, nil } select { case <-ctx.Done(): return "", "", ctx.Err() case <-time.After(pollEvery): } } } func (c *Client) fetchDataset(ctx context.Context, datasetID string) ([]json.RawMessage, error) { if datasetID == "" { return nil, errors.New("missing dataset id") } dsURL := fmt.Sprintf("%s/datasets/%s/items?clean=true&format=json&token=%s", apiBase, url.PathEscape(datasetID), url.QueryEscape(c.APIKey)) req, err := http.NewRequestWithContext(ctx, http.MethodGet, dsURL, nil) if err != nil { return nil, err } resp, err := c.HTTP.Do(req) if err != nil { return nil, fmt.Errorf("fetch dataset: %w", err) } defer resp.Body.Close() if resp.StatusCode >= 300 { b, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) return nil, fmt.Errorf("dataset returned %d: %s", resp.StatusCode, string(b)) } var items []json.RawMessage if err := json.NewDecoder(resp.Body).Decode(&items); err != nil { return nil, fmt.Errorf("decode dataset: %w", err) } return items, nil } func decodeJSON(resp *http.Response, dst any) error { defer resp.Body.Close() if resp.StatusCode >= 300 { b, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) return fmt.Errorf("http %d: %s", resp.StatusCode, string(b)) } return json.NewDecoder(resp.Body).Decode(dst) }