Initial commit
This commit is contained in:
152
internal/apify/client.go
Normal file
152
internal/apify/client.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package apify
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
apiBase = "https://api.apify.com/v2"
|
||||
pollEvery = 3 * time.Second
|
||||
pollTimeout = 5 * time.Minute
|
||||
)
|
||||
|
||||
// Client is a thin wrapper around the Apify run-and-fetch lifecycle.
|
||||
type Client struct {
|
||||
APIKey string
|
||||
HTTP *http.Client
|
||||
}
|
||||
|
||||
func New(apiKey string) *Client {
|
||||
return &Client{
|
||||
APIKey: apiKey,
|
||||
HTTP: &http.Client{Timeout: 30 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
type runResponse struct {
|
||||
Data struct {
|
||||
ID string `json:"id"`
|
||||
Status string `json:"status"`
|
||||
DefaultDatasetID string `json:"defaultDatasetId"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// Run starts an actor run, waits for SUCCEEDED, and returns dataset items as raw JSON.
|
||||
func (c *Client) Run(ctx context.Context, actorID string, input any) ([]json.RawMessage, error) {
|
||||
if c.APIKey == "" {
|
||||
return nil, errors.New("apify api_key not configured")
|
||||
}
|
||||
if actorID == "" {
|
||||
return nil, errors.New("apify actor id is empty")
|
||||
}
|
||||
|
||||
body, err := json.Marshal(input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Apify URLs use "~" to separate username and actor name, never "/".
|
||||
// Accept either form in config and normalize before path-escaping.
|
||||
urlActorID := strings.ReplaceAll(actorID, "/", "~")
|
||||
startURL := fmt.Sprintf("%s/acts/%s/runs?token=%s", apiBase, url.PathEscape(urlActorID), url.QueryEscape(c.APIKey))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, startURL, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("start run: %w", err)
|
||||
}
|
||||
var runResp runResponse
|
||||
if err := decodeJSON(resp, &runResp); err != nil {
|
||||
return nil, fmt.Errorf("start run: %w", err)
|
||||
}
|
||||
if runResp.Data.ID == "" {
|
||||
return nil, errors.New("start run: missing run id")
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(pollTimeout)
|
||||
pollCtx, cancel := context.WithDeadline(ctx, deadline)
|
||||
defer cancel()
|
||||
|
||||
status, datasetID, err := c.waitForRun(pollCtx, runResp.Data.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if status != "SUCCEEDED" {
|
||||
return nil, fmt.Errorf("apify run terminated with status %s", status)
|
||||
}
|
||||
|
||||
return c.fetchDataset(ctx, datasetID)
|
||||
}
|
||||
|
||||
func (c *Client) waitForRun(ctx context.Context, runID string) (string, string, error) {
|
||||
pollURL := fmt.Sprintf("%s/actor-runs/%s?token=%s", apiBase, url.PathEscape(runID), url.QueryEscape(c.APIKey))
|
||||
for {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, pollURL, nil)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("poll run: %w", err)
|
||||
}
|
||||
var r runResponse
|
||||
if err := decodeJSON(resp, &r); err != nil {
|
||||
return "", "", fmt.Errorf("poll run: %w", err)
|
||||
}
|
||||
switch r.Data.Status {
|
||||
case "SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT":
|
||||
return r.Data.Status, r.Data.DefaultDatasetID, nil
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", "", ctx.Err()
|
||||
case <-time.After(pollEvery):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Client) fetchDataset(ctx context.Context, datasetID string) ([]json.RawMessage, error) {
|
||||
if datasetID == "" {
|
||||
return nil, errors.New("missing dataset id")
|
||||
}
|
||||
dsURL := fmt.Sprintf("%s/datasets/%s/items?clean=true&format=json&token=%s", apiBase, url.PathEscape(datasetID), url.QueryEscape(c.APIKey))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, dsURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetch dataset: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
|
||||
return nil, fmt.Errorf("dataset returned %d: %s", resp.StatusCode, string(b))
|
||||
}
|
||||
var items []json.RawMessage
|
||||
if err := json.NewDecoder(resp.Body).Decode(&items); err != nil {
|
||||
return nil, fmt.Errorf("decode dataset: %w", err)
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func decodeJSON(resp *http.Response, dst any) error {
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
|
||||
return fmt.Errorf("http %d: %s", resp.StatusCode, string(b))
|
||||
}
|
||||
return json.NewDecoder(resp.Body).Decode(dst)
|
||||
}
|
||||
Reference in New Issue
Block a user