Initial commit

This commit is contained in:
2026-05-13 19:42:49 -07:00
commit cfa01bd4ef
54 changed files with 11718 additions and 0 deletions

313
internal/apify/types.go Normal file
View File

@@ -0,0 +1,313 @@
package apify
import (
"encoding/json"
"strconv"
"strings"
)
// ActiveListingInput is the input schema for `automation-lab/ebay-scraper`.
// The actor accepts keyword searches and standard filters; it targets
// ebay.com only (no per-marketplace routing in the actor itself), so
// non-US marketplaces won't return useful results with this actor.
type ActiveListingInput struct {
SearchQueries []string `json:"searchQueries"`
MaxProductsPerSearch int `json:"maxProductsPerSearch,omitempty"`
MaxSearchPages int `json:"maxSearchPages,omitempty"`
Sort string `json:"sort,omitempty"`
ListingType string `json:"listingType,omitempty"`
Condition []string `json:"condition,omitempty"`
MinPrice *int `json:"minPrice,omitempty"`
MaxPrice *int `json:"maxPrice,omitempty"`
ProxyConfiguration *ProxyConfiguration `json:"proxyConfiguration,omitempty"`
}
// ProxyConfiguration is the standard apify input block for proxy routing.
// eBay (and most retail sites) return 403 to datacenter IPs; passing
// {"useApifyProxy": true, "apifyProxyGroups": ["RESIDENTIAL"]} works.
type ProxyConfiguration struct {
UseApifyProxy bool `json:"useApifyProxy"`
ApifyProxyGroups []string `json:"apifyProxyGroups,omitempty"`
ApifyProxyCountry string `json:"apifyProxyCountry,omitempty"`
}
// ActiveListingResult is decoded leniently to handle multiple eBay-scraper
// actors. delicious_zebu/ebay-product-listing-scraper returns productUrl /
// imageUrl / numeric price; harvestlab/ebay-scraper used url / price /
// currency. The decoder coalesces both shapes.
type ActiveListingResult struct {
Title string `json:"title"`
Price any `json:"price"`
OriginalPrice any `json:"originalPrice"`
Currency string `json:"currency"`
URL string `json:"url"`
ProductURL string `json:"productUrl"`
Store string `json:"store"`
ImageURL string `json:"imageUrl"`
Image string `json:"image"`
Thumbnail string `json:"thumbnail"`
Images []string `json:"images"`
Condition string `json:"condition"`
ListingType string `json:"listingType"`
ShippingCost any `json:"shippingCost"`
ShippingPrice any `json:"shippingPrice"`
FreeShipping bool `json:"freeShipping"`
Marketplace string `json:"marketplace"`
MatchConfidence float64 `json:"matchConfidence"`
Availability string `json:"availability"`
WatchersCount int `json:"watchersCount"`
QuantitySold int `json:"quantitySold"`
}
type SoldListingInput struct {
Query string `json:"query"`
Marketplace string `json:"marketplace,omitempty"`
MaxResults int `json:"maxResults,omitempty"`
DaysBack int `json:"daysBack,omitempty"`
ProxyConfiguration *ProxyConfiguration `json:"proxyConfiguration,omitempty"`
}
type SoldListingResult struct {
Title string `json:"title"`
SoldPrice float64 `json:"soldPrice"`
Currency string `json:"soldCurrency"`
SoldAt string `json:"endedAt"`
Condition string `json:"condition"`
ListingType string `json:"listingType"`
ShippingPrice float64 `json:"shippingPrice"`
URL string `json:"url"`
}
type PriceComparisonInput struct {
Query string `json:"query,omitempty"`
URL string `json:"url,omitempty"`
MatchStrictness string `json:"matchStrictness,omitempty"`
ProxyConfiguration *ProxyConfiguration `json:"proxyConfiguration,omitempty"`
}
type PriceComparisonResult struct {
Title string `json:"title"`
Price float64 `json:"price"`
Currency string `json:"currency"`
URL string `json:"url"`
Store string `json:"store"`
ImageURL string `json:"imageUrl"`
Availability string `json:"availability"`
MatchConfidence float64 `json:"matchConfidence"`
}
// YahooAuctionsJPInput targets meron1122/zenmarket-scraper. ZenMarket is a
// buyer-proxy for Yahoo Auctions JP; its scraper returns ZenMarket-proxied
// listing URLs and USD-converted prices.
type YahooAuctionsJPInput struct {
SearchTerm string `json:"searchTerm"`
CategoryID string `json:"categoryID,omitempty"`
MaxPages int `json:"maxPages,omitempty"`
MaxRemainingHours int `json:"maxRemainingHours,omitempty"`
}
// MercariJPInput targets cloud9_ai/mercari-scraper. The actor manages its
// own proxy (Japan datacenter with residential fallback), so we do not send
// a proxyConfiguration block.
type MercariJPInput struct {
SearchKeywords []string `json:"searchKeywords,omitempty"`
ProductUrls []string `json:"productUrls,omitempty"`
Status string `json:"status,omitempty"`
SortBy string `json:"sortBy,omitempty"`
PriceMin *int `json:"priceMin,omitempty"`
PriceMax *int `json:"priceMax,omitempty"`
ItemCondition string `json:"itemCondition,omitempty"`
MaxResults int `json:"maxResults,omitempty"`
}
// YahooAuctionsJPResult matches meron1122/zenmarket-scraper output. Prices
// are USD-converted at the ZenMarket-published rate.
type YahooAuctionsJPResult struct {
Name string `json:"name"`
CurrentPrice any `json:"current_price"`
Photos []string `json:"photos"`
URL string `json:"url"`
EndingDate string `json:"ending_date"`
}
// UnifiedResult is the common shape produced by ParseResults regardless of
// which actor type returned the data. The scheduler consumes this.
type UnifiedResult struct {
Title string
Price float64
Currency string
URL string
Store string
ImageURL string
Source string
MatchConfidence float64
OutOfStock bool
// MatchedQuery records which alias from the item's query list produced
// this row. Empty for URL-only items or rows from non-search sources.
MatchedQuery string
}
// Decode unmarshals a list of raw JSON items into UnifiedResult slices using
// the shape that matches the given source label.
func Decode(items []json.RawMessage, source string) ([]UnifiedResult, error) {
out := make([]UnifiedResult, 0, len(items))
switch source {
case SourceActiveEbay, SourcePriceCompare:
for _, raw := range items {
var r ActiveListingResult
if err := json.Unmarshal(raw, &r); err != nil {
continue
}
url := r.URL
if url == "" {
url = r.ProductURL
}
img := r.ImageURL
if img == "" {
img = r.Image
}
if img == "" {
img = r.Thumbnail
}
if img == "" && len(r.Images) > 0 {
img = r.Images[0]
}
store := r.Store
if store == "" {
store = r.Marketplace
}
if store == "" && source == SourceActiveEbay {
store = "ebay"
}
cur := r.Currency
if cur == "" {
cur = "USD"
}
out = append(out, UnifiedResult{
Title: r.Title,
Price: coercePrice(r.Price),
Currency: cur,
URL: url,
Store: store,
ImageURL: img,
Source: source,
MatchConfidence: r.MatchConfidence,
OutOfStock: isOOS(r.Availability),
})
}
case SourceYahooJP:
for _, raw := range items {
var r YahooAuctionsJPResult
if err := json.Unmarshal(raw, &r); err != nil {
continue
}
img := ""
if len(r.Photos) > 0 {
img = r.Photos[0]
}
out = append(out, UnifiedResult{
Title: r.Name,
Price: coercePrice(r.CurrentPrice),
Currency: "USD",
URL: r.URL,
Store: "yahoo-auctions-jp (via zenmarket)",
ImageURL: img,
Source: source,
})
}
case SourceMercariJP:
// Mercari actors vary in shape; accept either price/currentPrice and title/name.
for _, raw := range items {
var generic struct {
Title string `json:"title"`
Name string `json:"name"`
Price float64 `json:"price"`
CurrentPrice float64 `json:"currentPrice"`
Currency string `json:"currency"`
URL string `json:"url"`
ImageURL string `json:"imageUrl"`
Image string `json:"image"`
Status string `json:"status"`
}
if err := json.Unmarshal(raw, &generic); err != nil {
continue
}
title := generic.Title
if title == "" {
title = generic.Name
}
price := generic.Price
if price == 0 {
price = generic.CurrentPrice
}
img := generic.ImageURL
if img == "" {
img = generic.Image
}
cur := generic.Currency
if cur == "" {
cur = "JPY"
}
out = append(out, UnifiedResult{
Title: title,
Price: price,
Currency: cur,
URL: generic.URL,
Store: "mercari-jp",
ImageURL: img,
Source: source,
OutOfStock: isOOS(generic.Status),
})
}
}
return out, nil
}
const (
SourceActiveEbay = "ebay"
SourcePriceCompare = "price-comparison"
SourceYahooJP = "yahoo-auctions-jp"
SourceMercariJP = "mercari-jp"
SourceSoldEbay = "ebay-sold"
SourceSoldYahooJP = "yahoo-auctions-jp-sold"
)
// coercePrice accepts a price field that might be a number or a string with
// currency symbols / commas (e.g. "$24.99", "1,299.00"). Returns 0 on failure
// so FilterResults can drop the row cleanly.
func coercePrice(v any) float64 {
switch x := v.(type) {
case nil:
return 0
case float64:
return x
case float32:
return float64(x)
case int:
return float64(x)
case int64:
return float64(x)
case string:
s := strings.Map(func(r rune) rune {
switch {
case r >= '0' && r <= '9', r == '.', r == '-':
return r
}
return -1
}, x)
f, err := strconv.ParseFloat(s, 64)
if err != nil {
return 0
}
return f
}
return 0
}
func isOOS(s string) bool {
switch s {
case "out_of_stock", "OUT_OF_STOCK", "sold", "SOLD", "ended":
return true
}
return false
}