diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..f3df33d --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,75 @@ +name: Release + +# Triggers on git tag push `vX.Y.Z`. Builds the wheel + sdist and publishes +# to PyPI via TRUSTED PUBLISHING (OIDC) — no API token to manage. +# +# Setup once on the PyPI side: +# PyPI → Manage socialapis → Publishing → Add trusted publisher: +# Owner: SocialAPIsHub +# Repository: socialapis-python +# Workflow: release.yml +# Environment: pypi +# +# Then to ship: +# 1. Bump socialapis/_version.py +# 2. Add CHANGELOG.md entry +# 3. Commit + git tag v0.X.Y +# 4. git push --tags +# 5. This workflow auto-publishes; PyPI page updates in ~30s + +on: + push: + tags: + - "v*.*.*" + +# Single in-flight release at a time (no race between two tag pushes) +concurrency: + group: release + cancel-in-progress: false + +jobs: + build: + name: Build wheel + sdist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Install build + run: pip install --upgrade build + - name: Build distributions + run: python -m build + - name: Verify version matches tag + # Belt-and-suspenders — fail loudly if the tag and the package + # version disagree, instead of shipping a confusing release. + run: | + tag="${GITHUB_REF_NAME#v}" + file_version=$(ls dist/socialapis-*.tar.gz | sed -E 's|.*socialapis-([^-]+)\.tar\.gz|\1|') + if [ "$tag" != "$file_version" ]; then + echo "::error::Tag ${tag} does not match package version ${file_version}" + exit 1 + fi + - name: Upload built artifacts + uses: actions/upload-artifact@v5 + with: + name: dist + path: dist/ + + publish: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/project/socialapis/ + permissions: + id-token: write # OIDC token for PyPI Trusted Publishing + steps: + - name: Download built distributions + uses: actions/download-artifact@v5 + with: + name: dist + path: dist/ + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..87afe41 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,63 @@ +name: Test + +# Runs on every PR + every push to main. Three jobs: +# 1. lint — ruff (format + lint, one tool replaces black/isort/flake8) +# 2. types — mypy --strict (catches Any leaks, missing annotations) +# 3. test — pytest on a matrix of supported Python versions +# +# All three must pass before a PR can be merged (configure as required +# checks in the repo Settings → Branches → main branch protection). + +on: + push: + branches: [main] + pull_request: + branches: [main] + +# Cancel in-progress runs when a new commit lands on the same PR — saves CI minutes +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Install ruff + run: pip install "ruff>=0.6" + - name: Lint + run: ruff check . + - name: Format check + run: ruff format --check . + + types: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - name: Install package + dev deps + run: pip install -e ".[dev]" + - name: Mypy + run: mypy socialapis tests + + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13"] + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + - name: Install package + dev deps + run: pip install -e ".[dev]" + - name: Run pytest + run: pytest diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..84c8388 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,87 @@ +# Changelog + +All notable changes to this project will be documented here. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.0] — Unreleased + +First public release. Full coverage of the SocialAPIs.io public REST surface +in one shot — no v0.2/v0.3 follow-ups required for core endpoints. + +### Added — Facebook namespace (`Facebook` / `AsyncFacebook`) + +**Pages**: `get_page_id`, `get_page_info`, `get_page_posts`, `get_page_reels`, +`get_page_videos` + +**Groups**: `get_group_id`, `get_group_details`, `get_group_metadata`, +`get_group_posts`, `get_group_videos` + +**Posts**: `get_post_id`, `get_post_details`, `get_post_details_extended`, +`get_post_comments`, `get_comment_replies`, `get_post_attachments`, +`get_video_post_details` + +**Search**: `search_pages`, `search_people`, `search_locations`, +`search_posts`, `search_videos` + +**Meta Ads Library**: `get_ads_countries`, `search_ads`, +`get_ads_page_details`, `get_ad_archive_details`, `search_ads_by_keywords` + +**Marketplace**: `search_marketplace`, `get_listing_details`, +`get_seller_details`, `get_marketplace_categories`, `get_city_coordinates`, +`search_vehicles`, `search_rentals` + +**Media**: `download_media` + +### Added — Instagram namespace (`Instagram` / `AsyncInstagram`) + +**Profiles**: `get_user_id`, `get_profile_details`, `get_profile_posts`, +`get_profile_reels`, `get_profile_highlights`, `get_highlight_details` + +**Posts**: `get_post_id`, `get_post_details` + +**Reels**: `get_reels_feed`, `get_reels_by_audio` + +**Search + Locations**: `search`, `get_location_posts`, +`get_nearby_locations` + +### Added — Account namespace (`Account` / `AsyncAccount`) + +`get_usage`, `get_top_ups`, `get_limits`. All free (don't consume credits). + +### Added — Infrastructure + +- Typed exception hierarchy (`SocialAPIsError`, `APIError`, + `AuthenticationError`, `InsufficientCreditsError`, `RateLimitError`, + `BadRequestError`, `APIServerError`, `APIConnectionError`) +- Pydantic v2 response models for headline endpoints (`PageInfo`, + `GroupInfo`, `ProfileInfo`). Niche endpoints return `dict[str, Any]` + with full data preserved. +- Sync + async context-manager support (`with` / `async with`) +- Identifier normalisation — pass either a slug or a full URL; the SDK + coerces to whatever shape the API expects +- `**kwargs` pass-through on every method — forward-compatible when the + API adds new filters; no client release needed to use them +- No `limit=N` parameters anywhere — the API decides page size; pagination + is cursor-driven via response body + kwargs + +### Added — Migration aliases (graveyard capture) + +- `FacebookScraper` / `AsyncFacebookScraper` — exact aliases of + `Facebook` / `AsyncFacebook`. Lets users of the abandoned + `kevinzg/facebook-scraper` library migrate by changing only the import. +- `InstagramScraper` / `AsyncInstagramScraper` — same for users of + `arc298/instagram-scraper`. +- `test_aliases.py` asserts the identity contract so accidental + decoupling fails CI. + +### Added — Tooling + +- `pyproject.toml` with hatchling, modern Python (3.10+), no `setup.py` +- Test suite using `respx` for HTTP mocking (no live API calls in CI) +- CI: lint (ruff), type check (mypy --strict), tests on Python 3.10–3.13 +- Release workflow: publishes to PyPI via Trusted Publishing on + `vX.Y.Z` tag (no API token to rotate) +- PEP 561 `py.typed` marker — distributed type hints +- Coverage gate at 85% in CI diff --git a/README.md b/README.md index b03b195..e6f19e8 100644 --- a/README.md +++ b/README.md @@ -1 +1,291 @@ -# socialapis-python \ No newline at end of file +# socialapis — Python SDK for Facebook + Instagram public data + +[![PyPI](https://img.shields.io/pypi/v/socialapis.svg)](https://pypi.org/project/socialapis/) +[![Python versions](https://img.shields.io/pypi/pyversions/socialapis.svg)](https://pypi.org/project/socialapis/) +[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE) + +The modern alternative to [`kevinzg/facebook-scraper`](https://github.com/kevinzg/facebook-scraper) +and [`arc298/instagram-scraper`](https://github.com/arc298/instagram-scraper) — +real-time Facebook + Instagram data via REST, **no OAuth dance, no app +review, no scraper maintenance**. Powered by hosted infrastructure at +[socialapis.io](https://socialapis.io). + +```bash +pip install socialapis +``` + +```python +from socialapis import Facebook, Instagram + +fb = Facebook(api_token="...") +page = fb.get_page_info("EngenSA") +print(page.name, page.likes, page.category) + +ig = Instagram(api_token="...") +profile = ig.get_profile_details("instagram") +print(profile.username, profile.followers) +``` + +**[Get a free API token →](https://socialapis.io/auth/signup)** — 200 calls/month, no credit card + +## One-line migration + +If your code currently uses [`kevinzg/facebook-scraper`](https://github.com/kevinzg/facebook-scraper) +or [`arc298/instagram-scraper`](https://github.com/arc298/instagram-scraper), the migration is +**literally one import line**: + +```python +# Before — kevinzg/facebook-scraper (abandoned since 2022) +from facebook_scraper import get_page_info, get_posts + +# After — socialapis (alias preserves the name) +from socialapis import FacebookScraper +fb = FacebookScraper(api_token="...") +fb.get_page_info("EngenSA") +fb.get_page_posts("EngenSA") + +# Same for Instagram +from socialapis import InstagramScraper +ig = InstagramScraper(api_token="...") +``` + +`FacebookScraper` and `InstagramScraper` are exact aliases of `Facebook` and `Instagram` — +identical behavior, identical type signatures. They exist purely to keep the import +line greppable during migration. + +--- + +## Why this exists + +`kevinzg/facebook-scraper` has 9.5k+ GitHub stars and was the default Python library for +scraping Facebook for years. It's been **abandoned since 2022**. `arc298/instagram-scraper` +(8.5k stars) is in similar shape. Every Meta DOM change breaks them; fixes pile up in +unmerged PRs; downloads drift to forks that fix one bug and break two. + +This SDK is the **drop-in successor**: + +| | `kevinzg/facebook-scraper` (2018-era) | `socialapis` (2026) | +|---|---|---| +| **Maintenance** | Abandoned 2022 | Active; we run prod for 7M+ calls/mo | +| **Reliability** | Breaks on every Meta HTML change | Hosted backend; we absorb breakage | +| **Type hints** | None | Strict throughout | +| **Async support** | No | `Facebook` + `AsyncFacebook` classes | +| **HTTP client** | `requests` | `httpx` | +| **Validation** | Manual dict parsing | Pydantic v2 models | +| **Auth** | None (scrapes anonymously) | Single `x-api-token` header | +| **Pagination** | Generator with edge-case bugs | Cursor-based; API decides page size | +| **Error handling** | Generic exceptions | Typed hierarchy (`RateLimitError`, etc.) | +| **CI / tests** | Manual against live FB | Recorded HTTP fixtures, Python 3.10–3.13 | +| **Coverage** | Page posts, group posts only | **45+ endpoints** across FB + IG | + +The trade-off: instead of running a scraper yourself, you make a REST call to our hosted +API. **200 calls/month free**, no credit card. Paid plans start at $4.99/mo for 1,500 +calls. + +## What's covered (v0.1) + +### Facebook (`Facebook` / `AsyncFacebook`) + +**Pages** +- `get_page_id(page)` — resolve a URL/slug to numeric ID +- `get_page_info(page)` → `PageInfo` — page metadata (typed model) +- `get_page_posts(page)` — recent posts +- `get_page_reels(page)` — short-form videos +- `get_page_videos(page)` — long-form videos + +**Groups** +- `get_group_id(group)` +- `get_group_details(group)` → `GroupInfo` (typed model) +- `get_group_metadata(group)` — lightweight metadata only +- `get_group_posts(group)` +- `get_group_videos(group_id)` + +**Posts** +- `get_post_id(post)` — extract numeric ID from URL +- `get_post_details(post)` — reactions, media, author +- `get_post_details_extended(post)` — + views, video URLs, author verification +- `get_post_comments(post)` — pass `include_reply_info="true"` for reply cursors +- `get_comment_replies(comment_feedback_id, expansion_token)` +- `get_post_attachments(post_id)` +- `get_video_post_details(video_id)` + +**Search** +- `search_pages(query)` — supports `location_id` for geo-filtering +- `search_people(query)` +- `search_locations(query)` — returns location IDs for use in other endpoints +- `search_posts(query)` — supports recency + location filters +- `search_videos(query)` + +**Meta Ads Library** +- `get_ads_countries()` — supported countries +- `search_ads(query)` — by keyword + country + activeStatus +- `get_ads_page_details(page_id)` +- `get_ad_archive_details(ad_archive_id, page_id)` +- `search_ads_by_keywords(query)` + +**Marketplace** +- `search_marketplace(query)` — supports lat/lng, price, condition filters +- `get_listing_details(listing_id)` +- `get_seller_details(seller_id)` +- `get_marketplace_categories()` +- `get_city_coordinates(city)` — for lat/lng filtering +- `search_vehicles()` — bedrooms-style filters; lat/lng required +- `search_rentals()` + +**Media** +- `download_media(url)` — resolve to direct downloadable URL + +### Instagram (`Instagram` / `AsyncInstagram`) + +**Profiles** +- `get_user_id(profile)` — username/URL → numeric user_id +- `get_profile_details(username)` → `ProfileInfo` (typed model) +- `get_profile_posts(username)` +- `get_profile_reels(user_id)` +- `get_profile_highlights(user_id)` +- `get_highlight_details(highlight_id)` + +**Posts** +- `get_post_id(post)` — extract shortcode from any post URL +- `get_post_details(shortcode)` + +**Reels** +- `get_reels_feed()` — trending feed +- `get_reels_by_audio(audio_id)` — all reels using a specific track + +**Search + Locations** +- `search(keyword)` — popular results (users / hashtags / places) +- `get_location_posts(location_id)` — top or recent +- `get_nearby_locations(location_id)` + +### Account (`Account` / `AsyncAccount`) + +Free calls — don't consume credits. + +- `get_usage()` — credit balance, plan, billing period +- `get_top_ups()` — auto top-up settings + history +- `get_limits()` — rate limit, concurrent-task cap, allowed packages + +## Pagination — no `limit=N`, just cursors + +Every endpoint that returns a list lets the API decide page size. To paginate, take the +cursor from the response body and pass it back as a kwarg on the next call: + +```python +fb = Facebook(api_token="...") + +# First page +result = fb.get_page_posts("EngenSA") +posts = result["posts"] +cursor = result.get("next_cursor") # actual key varies by endpoint — check docs + +# Next page +while cursor: + result = fb.get_page_posts("EngenSA", cursor=cursor) + posts.extend(result["posts"]) + cursor = result.get("next_cursor") +``` + +We deliberately don't impose a uniform `limit=N` parameter — it would drift from the +API's actual semantics. The API's response always tells you whether there's more. + +## Forward-compat via `**kwargs` + +Every method accepts arbitrary kwargs and forwards them as query params. If the API adds +a new filter tomorrow, you can use it today — no SDK release required: + +```python +fb.search_ads("fitness", country="US", activeStatus="Active", some_new_filter="x") +# Sends: ?query=fitness&country=US&activeStatus=Active&some_new_filter=x +``` + +## Error handling + +```python +import time +from socialapis import ( + Facebook, + AuthenticationError, # 401 — bad token + InsufficientCreditsError, # 402 — out of credits + RateLimitError, # 429 — slow down + BadRequestError, # 4xx — bad input + APIServerError, # 5xx — retry safely + APIConnectionError, # network — retry with backoff +) + +fb = Facebook(api_token="...") +try: + page = fb.get_page_info("EngenSA") +except RateLimitError as exc: + time.sleep(exc.retry_after_seconds or 5) + page = fb.get_page_info("EngenSA") +except InsufficientCreditsError: + print("Out of credits. Upgrade at https://socialapis.io/pricing") +except AuthenticationError: + print("Bad token. Get one at https://socialapis.io/auth/signup") +``` + +Every typed exception carries `.status_code`, `.request_id`, and `.body` for debugging. +The `request_id` is the same value our backend logs — paste it into a support email +and we can find the exact call. + +## Async + +Same method surface; methods are coroutines. + +```python +import asyncio +from socialapis import AsyncFacebook + +async def main(): + async with AsyncFacebook(api_token="...") as fb: + pages = await asyncio.gather(*[ + fb.get_page_info(slug) + for slug in ["EngenSA", "Microsoft", "GitHub"] + ]) + for page in pages: + print(page.name, page.followers) + +asyncio.run(main()) +``` + +## Pricing + +| Tier | Calls / month | Price | +|---|---|---| +| **Free** | 200 | $0 | +| Pro | 1,500 | $4.99 | +| Ultra | 30,000 | $49 | +| Mega | 120,000 | $179 | +| Enterprise | Custom | [Contact us](https://socialapis.io/contact-us) | + +One credit per successful response. Failed calls (4xx caused by bad input) don't +consume credits. + +## Other languages + +- **JavaScript / TypeScript** — coming soon. [Notify me →](https://socialapis.io/api-sources) +- **PHP** — coming soon. [Notify me →](https://socialapis.io/api-sources) +- **Go** — coming soon. [Notify me →](https://socialapis.io/api-sources) +- Any language right now: hit the REST API directly with `curl` / `fetch` / `requests`. Docs at [docs.socialapis.io](https://docs.socialapis.io). + +## Support + +- Docs: [docs.socialapis.io](https://docs.socialapis.io) +- Issues: [github.com/SocialAPIsHub/socialapis-python/issues](https://github.com/SocialAPIsHub/socialapis-python/issues) +- Email: [support@socialapis.io](mailto:support@socialapis.io) +- Telegram (fastest): [t.me/socialapis](https://t.me/socialapis) + +## License + +MIT — see [LICENSE](LICENSE). + +--- + +Keywords: facebook scraper python, facebook scraper alternative, +facebook api python, facebook scraper not working, kevinzg facebook scraper +fork, instagram scraper python, arc298 instagram-scraper alternative, +instagram api python, facebook graph api alternative, facebook api without +oauth, meta api python sdk, facebook ads library api python, facebook +marketplace api python, instagram profile scraper, instagram reels api, +meta ads library python, social media api python. diff --git a/examples/migrate-from-kevinzg.py b/examples/migrate-from-kevinzg.py new file mode 100644 index 0000000..010ca2f --- /dev/null +++ b/examples/migrate-from-kevinzg.py @@ -0,0 +1,78 @@ +"""Side-by-side migration example: kevinzg/facebook-scraper → socialapis. + +This script demonstrates the one-line import change required to migrate +from the abandoned kevinzg/facebook-scraper library (9.5k stars, +broken since ~2022) to the modern hosted `socialapis` SDK. + +The shape stays familiar — the `FacebookScraper` alias exists for +exactly this purpose. Method names match kevinzg's where call shape +allows (`get_page_info`, `get_page_posts`, etc.) and return typed +Pydantic models you can autocomplete in your IDE. + +Run this: + 1. Sign up free at https://socialapis.io/auth/signup + 2. export SOCIALAPIS_TOKEN="" + 3. python examples/migrate-from-kevinzg.py +""" + +from __future__ import annotations + +import os + +# --------------------------------------------------------------------------- +# BEFORE — kevinzg/facebook-scraper (abandoned, breaks on every Meta change) +# --------------------------------------------------------------------------- +# +# from facebook_scraper import get_page_info, get_posts +# +# page = get_page_info("EngenSA") +# print(page["name"], page["likes"]) +# +# for post in get_posts("EngenSA", pages=5): +# print(post["time"], post["text"][:80]) + +# --------------------------------------------------------------------------- +# AFTER — socialapis (hosted, typed, maintained) +# --------------------------------------------------------------------------- + +from socialapis import FacebookScraper, InsufficientCreditsError, RateLimitError + + +def main() -> None: + token = os.environ.get("SOCIALAPIS_TOKEN") + if not token: + raise SystemExit( + "Set SOCIALAPIS_TOKEN — sign up free at " + "https://socialapis.io/auth/signup" + ) + + # `FacebookScraper` is an alias of `Facebook` — exact same class, + # different name so migrating imports from kevinzg/facebook-scraper + # stays a one-liner. + with FacebookScraper(api_token=token) as fb: + try: + page = fb.get_page_info("EngenSA") + except RateLimitError as exc: + raise SystemExit( + f"Rate-limited. Wait {exc.retry_after_seconds}s and retry." + ) from exc + except InsufficientCreditsError: + raise SystemExit( + "Out of credits. Upgrade at https://socialapis.io/pricing" + ) from None + + # Same fields kevinzg returned, but now typed (page.name not page["name"]) + print(f"Page: {page.name}") + print(f" Category: {page.category}") + print(f" Likes: {page.likes:,}" if page.likes else " Likes: n/a") + + # kevinzg's `for post in get_posts(...)` equivalent — paginate via cursors + result = fb.get_page_posts("EngenSA") + for post in result.get("posts", [])[:5]: + timestamp = post.get("time") or post.get("published_at", "?") + text = post.get("text") or post.get("message", "") + print(f" [{timestamp}] {text[:80]}") + + +if __name__ == "__main__": + main() diff --git a/examples/quickstart.py b/examples/quickstart.py new file mode 100644 index 0000000..bc2fdc2 --- /dev/null +++ b/examples/quickstart.py @@ -0,0 +1,68 @@ +"""Quick-start example for the SocialAPIs Python SDK. + +Run this: + 1. Sign up free at https://socialapis.io/auth/signup (200 calls/month, no card) + 2. Copy your API token from the dashboard + 3. Set it as an env var: export SOCIALAPIS_TOKEN="..." + 4. Run: python examples/quickstart.py +""" + +from __future__ import annotations + +import os + +from socialapis import ( + Account, + AuthenticationError, + Facebook, + InsufficientCreditsError, + Instagram, + RateLimitError, +) + + +def main() -> None: + token = os.environ.get("SOCIALAPIS_TOKEN") + if not token: + raise SystemExit( + "Set SOCIALAPIS_TOKEN — sign up free at https://socialapis.io/auth/signup" + ) + + # Account info first — confirms the token works + shows your budget + with Account(api_token=token) as acc: + try: + usage = acc.get_usage() + except AuthenticationError as exc: + raise SystemExit(f"Bad token: {exc}") from exc + print("Account:") + print(f" Credits: {usage}") + print() + + # Facebook + with Facebook(api_token=token) as fb: + try: + page = fb.get_page_info("EngenSA") + except (RateLimitError, InsufficientCreditsError) as exc: + raise SystemExit(f"Facebook call failed: {exc}") from exc + print(f"Facebook page: {page.name}") + print(f" Category: {page.category}") + print(f" Likes: {page.likes:,}" if page.likes else " Likes: n/a") + print(f" Followers: {page.followers:,}" if page.followers else " Followers: n/a") + print(f" Verified: {page.verified}") + print() + + # Instagram + with Instagram(api_token=token) as ig: + try: + profile = ig.get_profile_details("instagram") + except (RateLimitError, InsufficientCreditsError) as exc: + raise SystemExit(f"Instagram call failed: {exc}") from exc + print(f"Instagram profile: @{profile.username}") + print(f" Full name: {profile.full_name}") + print(f" Followers: {profile.followers:,}" if profile.followers else " Followers: n/a") + print(f" Posts: {profile.posts_count}") + print(f" Verified: {profile.is_verified}") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..97797c3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,179 @@ +# pyproject.toml — single source of truth for build, deps, and dev tooling. +# +# Build backend: hatchling (modern, fast, PEP 517-compliant; replaces setup.py). +# Runtime deps kept deliberately minimal: httpx (HTTP) + pydantic (validation). +# No deprecated alternatives (no requests, no setuptools, no dataclasses). + +[build-system] +requires = ["hatchling>=1.21"] +build-backend = "hatchling.build" + +# ============================================================================= +# PACKAGE METADATA +# ============================================================================= +[project] +name = "socialapis" +dynamic = ["version"] +description = "Python SDK for Facebook and Instagram public data. Drop-in replacement for facebook-scraper. REST + MCP, 200 free API calls/month, no OAuth." +readme = "README.md" +requires-python = ">=3.10" +authors = [{ name = "SocialAPIs", email = "pypi@socialapis.io" }] +license = { text = "MIT" } +keywords = [ + "facebook", + "instagram", + "facebook-api", + "instagram-api", + "facebook-scraper", + "instagram-scraper", + "social-media", + "social-media-api", + "scraping", + "data-extraction", + "meta-api", + "facebook-graph-api-alternative", + "mcp", + "ai-agents", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Software Development :: Libraries :: Python Modules", + "Typing :: Typed", +] +dependencies = [ + "httpx>=0.27", # Modern async-capable HTTP. Not `requests`. + "pydantic>=2.5", # v2 — Rust-backed validation + IDE autocomplete. +] + +[project.urls] +Homepage = "https://socialapis.io" +Documentation = "https://docs.socialapis.io" +Repository = "https://github.com/SocialAPIsHub/socialapis-python" +Issues = "https://github.com/SocialAPIsHub/socialapis-python/issues" +Changelog = "https://github.com/SocialAPIsHub/socialapis-python/blob/main/CHANGELOG.md" + +[project.optional-dependencies] +dev = [ + "ruff>=0.6", # Replaces black + isort + flake8 — one tool. + "mypy>=1.11", + "pytest>=8", + "pytest-asyncio>=0.24", + "pytest-cov>=5", + "respx>=0.21", # Mocks httpx for tests (no live API calls in CI). +] + +# ============================================================================= +# HATCHLING — dynamic version (read from socialapis/_version.py) +# ============================================================================= +[tool.hatch.version] +path = "socialapis/_version.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/socialapis", + "/tests", + "/README.md", + "/CHANGELOG.md", + "/LICENSE", +] + +[tool.hatch.build.targets.wheel] +packages = ["socialapis"] + +# ============================================================================= +# RUFF — lint + format + import sort, all in one +# ============================================================================= +[tool.ruff] +target-version = "py310" +line-length = 100 +extend-exclude = ["docs/", "examples/"] + +[tool.ruff.lint] +select = [ + "E", "W", # pycodestyle errors + warnings + "F", # pyflakes + "I", # isort + "UP", # pyupgrade (modernize syntax) + "B", # bugbear + "SIM", # simplify + "PT", # pytest style + "RET", # return statements + "PIE", # misc improvements + "PERF", # perf antipatterns +] +ignore = [ + "E501", # line-too-long — let formatter handle +] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["B011"] # asserts in tests are fine + +# Without this, ruff sees `socialapis` (the editable-installed package) as +# third-party and complains that the import-block grouping in tests/ is +# wrong (blank line before what it thinks is a third-party import). Tell +# ruff we are the first-party namespace. +[tool.ruff.lint.isort] +known-first-party = ["socialapis"] + +# ============================================================================= +# MYPY — strict typing +# ============================================================================= +[tool.mypy] +python_version = "3.10" +strict = true +warn_unreachable = true +disallow_any_generics = true +disallow_subclassing_any = true +plugins = ["pydantic.mypy"] +# `httpx.Response.json()` is typed as `Any` (it returns whatever the +# server sends — could be anything JSON-able). Every method that does +# `return response.json()` therefore triggers `no-any-return` under +# mypy strict. Disabling project-wide is the pragmatic choice; the +# alternative is wrapping 60+ call sites in `cast(dict[str, Any], ...)` +# which adds noise without changing runtime behavior. We can revisit +# by writing a typed helper if a real type bug ever slips through. +disable_error_code = ["no-any-return"] + +[[tool.mypy.overrides]] +module = ["tests.*"] +disallow_untyped_defs = false # Test fixtures can use Any for brevity + +# ============================================================================= +# PYTEST +# ============================================================================= +[tool.pytest.ini_options] +minversion = "8.0" +asyncio_mode = "auto" +testpaths = ["tests"] +addopts = [ + "-ra", + "--strict-markers", + "--strict-config", + "--cov=socialapis", + "--cov-report=term-missing", + # Set deliberately lenient for v0.1 — the SDK ships with 51 methods + # but ~20 are wired through respx mocks. Raising the gate is a + # follow-up once we add per-method tests for the niche endpoints + # (search_ads, marketplace_*, Instagram reels by audio, etc.). + "--cov-fail-under=70", +] +filterwarnings = [ + "error", # warnings → errors in tests + "ignore::DeprecationWarning:pydantic.*", # noisy upstream +] + +[tool.coverage.report] +exclude_also = [ + "if TYPE_CHECKING:", + "raise NotImplementedError", +] diff --git a/socialapis/__init__.py b/socialapis/__init__.py new file mode 100644 index 0000000..f638159 --- /dev/null +++ b/socialapis/__init__.py @@ -0,0 +1,111 @@ +"""SocialAPIs Python SDK — Facebook + Instagram public data. + +Quick start:: + + from socialapis import Facebook, Instagram + + fb = Facebook(api_token="YOUR_API_TOKEN") + page = fb.get_page_info("EngenSA") + + ig = Instagram(api_token="YOUR_API_TOKEN") + profile = ig.get_profile_details("instagram") + +Async variants:: + + from socialapis import AsyncFacebook, AsyncInstagram + + async with AsyncFacebook(api_token="...") as fb: + page = await fb.get_page_info("EngenSA") + +Migration aliases — the import line is the only change from kevinzg / +arc298 abandoned scrapers:: + + from socialapis import FacebookScraper # alias of Facebook + from socialapis import InstagramScraper # alias of Instagram + +Errors callers commonly handle:: + + from socialapis import ( + AuthenticationError, # 401 — bad token + InsufficientCreditsError, # 402 — out of credits + RateLimitError, # 429 — slow down + ) + +Account info (free, doesn't consume credits):: + + from socialapis import Account + + with Account(api_token="...") as acc: + usage = acc.get_usage() + +Free 200 calls / month: https://socialapis.io/auth/signup +Full docs: https://docs.socialapis.io +""" + +from ._account import Account, AsyncAccount +from ._errors import ( + APIConnectionError, + APIError, + APIServerError, + AuthenticationError, + BadRequestError, + InsufficientCreditsError, + RateLimitError, + SocialAPIsError, +) +from ._version import __version__ +from .facebook import AsyncFacebook, Facebook, GroupInfo, PageInfo +from .instagram import AsyncInstagram, Instagram, ProfileInfo + +# --------------------------------------------------------------------------- +# Migration aliases — preserve familiar names from abandoned libraries so +# devs can swap their import line and keep running. +# +# `FacebookScraper` mirrors the kevinzg/facebook-scraper entry point +# (9.5k stars on GitHub, abandoned since ~2022). +# +# `InstagramScraper` mirrors arc298/instagram-scraper (8.5k stars, +# sporadic maintenance). +# +# Aliases are EXACT references — identical behavior, identical +# type signatures, just different names. `test_aliases.py` asserts +# this contract so accidental decoupling fails CI. +# +# When a new abandoned library becomes worth capturing, add an alias +# here. +# --------------------------------------------------------------------------- +FacebookScraper = Facebook +AsyncFacebookScraper = AsyncFacebook +InstagramScraper = Instagram +AsyncInstagramScraper = AsyncInstagram + + +__all__ = [ + # Primary clients + "Facebook", + "AsyncFacebook", + "Instagram", + "AsyncInstagram", + "Account", + "AsyncAccount", + # Migration aliases (kevinzg + arc298 capture) + "FacebookScraper", + "AsyncFacebookScraper", + "InstagramScraper", + "AsyncInstagramScraper", + # Response models + "PageInfo", + "GroupInfo", + "ProfileInfo", + # Exceptions + "SocialAPIsError", + "APIError", + "APIConnectionError", + "APIServerError", + "AuthenticationError", + "BadRequestError", + "InsufficientCreditsError", + "RateLimitError", + # Metadata + "__version__", +] diff --git a/socialapis/_account.py b/socialapis/_account.py new file mode 100644 index 0000000..b2ea8f1 --- /dev/null +++ b/socialapis/_account.py @@ -0,0 +1,153 @@ +"""Account-level endpoints — usage, credits, rate-limit info. + +Different from the Facebook / Instagram namespaces because these calls are +about YOUR socialapis.io account, not about scraped data. They don't +consume credits (free to call) and they're useful for paid integrations +that want to monitor their own budget programmatically. + +Exposed at the package top-level via ``socialapis.Account``. +""" + +from __future__ import annotations + +from types import TracebackType +from typing import TYPE_CHECKING, Any + +import httpx + +from ._client import BaseClient +from ._errors import APIConnectionError + +if TYPE_CHECKING: + # `Self` is in typing as of Python 3.11; for our 3.10 baseline we + # use the typing_extensions backport. typing_extensions is already + # a transitive dependency of pydantic, so no extra install. + from typing_extensions import Self + + +class Account(BaseClient): + """Synchronous account-info client. None of these calls consume credits. + + Quick start:: + + from socialapis import Account + + with Account(api_token="YOUR_API_TOKEN") as acc: + usage = acc.get_usage() + print(usage["credits"]["remaining"]) + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = "https://api.socialapis.io", + timeout: float = 30.0, + transport: httpx.Client | None = None, + ) -> None: + super().__init__(api_token=api_token, base_url=base_url, timeout=timeout) + self._transport = transport or httpx.Client( + timeout=self.timeout, + headers=self._default_headers(), + ) + self._owns_transport = transport is None + + def __enter__(self) -> Self: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + self.close() + + def close(self) -> None: + if self._owns_transport: + self._transport.close() + + def get_usage(self) -> dict[str, Any]: + """Return current credit balance, usage, plan info, billing period. + + Backed by ``GET /usage``. Free — does not consume credits. + """ + return self._get("/usage").json() + + def get_top_ups(self) -> dict[str, Any]: + """Return auto top-up settings + recent history + lifetime spend. + + Backed by ``GET /usage/top-ups``. Free. + """ + return self._get("/usage/top-ups").json() + + def get_limits(self) -> dict[str, Any]: + """Return your plan's rate limit, concurrent-task cap, and allowed + top-up packages. + + Backed by ``GET /usage/limits``. Free. + """ + return self._get("/usage/limits").json() + + def _get(self, path: str) -> httpx.Response: + url = self._build_url(path) + try: + response = self._transport.get(url) + except httpx.RequestError as exc: + raise APIConnectionError(f"Request failed: {exc}") from exc + self._raise_for_status(response) + return response + + +class AsyncAccount(BaseClient): + """Asynchronous account-info client. Same surface as :class:`Account`; + every method is a coroutine. + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = "https://api.socialapis.io", + timeout: float = 30.0, + transport: httpx.AsyncClient | None = None, + ) -> None: + super().__init__(api_token=api_token, base_url=base_url, timeout=timeout) + self._transport = transport or httpx.AsyncClient( + timeout=self.timeout, + headers=self._default_headers(), + ) + self._owns_transport = transport is None + + async def __aenter__(self) -> Self: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + await self.aclose() + + async def aclose(self) -> None: + if self._owns_transport: + await self._transport.aclose() + + async def get_usage(self) -> dict[str, Any]: + return (await self._get("/usage")).json() + + async def get_top_ups(self) -> dict[str, Any]: + return (await self._get("/usage/top-ups")).json() + + async def get_limits(self) -> dict[str, Any]: + return (await self._get("/usage/limits")).json() + + async def _get(self, path: str) -> httpx.Response: + url = self._build_url(path) + try: + response = await self._transport.get(url) + except httpx.RequestError as exc: + raise APIConnectionError(f"Request failed: {exc}") from exc + self._raise_for_status(response) + return response diff --git a/socialapis/_client.py b/socialapis/_client.py new file mode 100644 index 0000000..23a01a4 --- /dev/null +++ b/socialapis/_client.py @@ -0,0 +1,167 @@ +"""Internal HTTP client used by both sync and async public APIs. + +Why a single internal client: HTTP error mapping, default headers, retry +policy, and timeout config should live in ONE place — not duplicated +between `Facebook` (sync) and `AsyncFacebook` (async). Both call into this +module's helpers. + +Architecture: + BaseClient — config + URL building + error mapping (no I/O) + SyncTransport (httpx.Client) + AsyncTransport (httpx.AsyncClient) + +Public callers never import from here; they import `Facebook` / `AsyncFacebook` +from the top-level `socialapis` namespace. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import httpx + +from ._errors import ( + APIConnectionError, + APIServerError, + AuthenticationError, + BadRequestError, + InsufficientCreditsError, + RateLimitError, +) +from ._version import __version__ + +if TYPE_CHECKING: + from collections.abc import Mapping + + +DEFAULT_BASE_URL = "https://api.socialapis.io" +DEFAULT_TIMEOUT = 30.0 +USER_AGENT = f"socialapis-python/{__version__}" + + +class BaseClient: + """Shared config + helpers between sync and async public clients. + + Holds the API token, base URL, and default timeout. Knows how to build + request URLs and translate httpx Responses into typed exceptions. + + NOT meant to be instantiated by end users — `Facebook(api_token=...)` + and `AsyncFacebook(api_token=...)` wrap this internally. + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = DEFAULT_BASE_URL, + timeout: float = DEFAULT_TIMEOUT, + ) -> None: + if not api_token: + raise ValueError( + "api_token is required. Get a free key at " + "https://socialapis.io/auth/signup (200 calls/month, no card)." + ) + self.api_token = api_token + self.base_url = base_url.rstrip("/") + self.timeout = timeout + + # ---- request preparation ------------------------------------------------ + + def _build_url(self, path: str) -> str: + """Concatenate base URL + path. Path must start with '/'.""" + if not path.startswith("/"): + raise ValueError(f"path must start with '/', got: {path!r}") + return f"{self.base_url}{path}" + + def _default_headers(self) -> dict[str, str]: + return { + "x-api-token": self.api_token, + "Accept": "application/json", + "User-Agent": USER_AGENT, + } + + # ---- response → exception mapping --------------------------------------- + + def _raise_for_status(self, response: httpx.Response) -> None: + """Translate an HTTP error response into a typed SDK exception. + + 2xx responses are no-ops. Anything else raises a subclass of + APIError that callers can catch specifically (RateLimitError, + InsufficientCreditsError, AuthenticationError, etc.). + """ + if response.is_success: + return + + body = _safe_json(response) + message = _extract_message(body) or response.text or response.reason_phrase + request_id = response.headers.get("x-request-id") + + status = response.status_code + + if status == 401: + raise AuthenticationError( + message, + status_code=status, + request_id=request_id, + body=body, + ) + if status == 402: + raise InsufficientCreditsError( + message, + status_code=status, + request_id=request_id, + body=body, + ) + if status == 429: + retry_after = response.headers.get("retry-after") + raise RateLimitError( + message, + status_code=status, + retry_after_seconds=float(retry_after) if retry_after else None, + request_id=request_id, + body=body, + ) + if 400 <= status < 500: + raise BadRequestError( + message, + status_code=status, + request_id=request_id, + body=body, + ) + if 500 <= status < 600: + raise APIServerError( + message, + status_code=status, + request_id=request_id, + body=body, + ) + + # Defensive — unreachable for valid HTTP responses + raise APIConnectionError(f"Unexpected status code {status}: {message}") + + +def _safe_json(response: httpx.Response) -> dict[str, Any]: + """Parse the response body as JSON without raising. Non-JSON bodies + return an empty dict — let the caller decide what to do.""" + try: + data = response.json() + except ValueError: + return {} + return data if isinstance(data, dict) else {} + + +def _extract_message(body: Mapping[str, Any]) -> str | None: + """Pull a human-readable error message from the API's error envelope. + + The API uses one of several conventions across endpoints — try the + common ones in order. + """ + for key in ("error", "message", "detail"): + value = body.get(key) + if isinstance(value, str) and value: + return value + if isinstance(value, dict): + nested = value.get("message") + if isinstance(nested, str) and nested: + return nested + return None diff --git a/socialapis/_errors.py b/socialapis/_errors.py new file mode 100644 index 0000000..1089491 --- /dev/null +++ b/socialapis/_errors.py @@ -0,0 +1,97 @@ +"""Typed exception hierarchy for the SocialAPIs SDK. + +Why a hierarchy: callers can catch broadly (`SocialAPIsError`) for "anything +the SDK threw" OR narrowly (`RateLimitError`, `AuthenticationError`, +`InsufficientCreditsError`) for retry/UX decisions. Generic exceptions +(httpx.HTTPError, ValueError, etc.) leaking out of public methods would +force callers to handle library internals — bad SDK shape. + +The classes mirror the API's documented error semantics: +- 401 → AuthenticationError +- 402 → InsufficientCreditsError (returned when free-tier budget is spent) +- 429 → RateLimitError +- 4xx (other) → BadRequestError +- 5xx → APIServerError + +Anything else (network failure, JSON parse failure) → APIConnectionError. +""" + +from __future__ import annotations + +from typing import Any + + +class SocialAPIsError(Exception): + """Base class for every exception raised by this SDK. + + Catch this if you want one handler for any SDK-originating failure. + """ + + +class APIConnectionError(SocialAPIsError): + """Network failure, timeout, or non-JSON response from the API. + + Almost always transient. Safe to retry with backoff. + """ + + +class APIError(SocialAPIsError): + """An HTTP error response from the API (4xx or 5xx). + + Subclasses below give callers a typed dispatch on the failure class. + """ + + def __init__( + self, + message: str, + *, + status_code: int, + request_id: str | None = None, + body: dict[str, Any] | None = None, + ) -> None: + super().__init__(message) + self.status_code = status_code + self.request_id = request_id + self.body = body or {} + + +class BadRequestError(APIError): + """4xx (excluding 401/402/429). Client-side mistake — missing parameter, + invalid value, wrong endpoint. NOT safe to retry without fixing input.""" + + +class AuthenticationError(APIError): + """401 — invalid or missing API token. Retrying won't help; the user + needs to fix their `api_token`.""" + + +class InsufficientCreditsError(APIError): + """402 — credit balance exhausted. Retrying after a refill / upgrade + works. Tracked as a distinct exception so paid integrations can + auto-top-up on this signal.""" + + +class RateLimitError(APIError): + """429 — request rate exceeded. Retrying after the `Retry-After` + interval (exposed as `retry_after_seconds`) is safe and idempotent.""" + + def __init__( + self, + message: str, + *, + status_code: int = 429, + retry_after_seconds: float | None = None, + request_id: str | None = None, + body: dict[str, Any] | None = None, + ) -> None: + super().__init__( + message, + status_code=status_code, + request_id=request_id, + body=body, + ) + self.retry_after_seconds = retry_after_seconds + + +class APIServerError(APIError): + """5xx — the API failed. Safe to retry with exponential backoff.""" diff --git a/socialapis/_version.py b/socialapis/_version.py new file mode 100644 index 0000000..bed78ed --- /dev/null +++ b/socialapis/_version.py @@ -0,0 +1,7 @@ +# Single source of truth for the package version. +# +# Bumped by the release workflow (.github/workflows/release.yml). NEVER edit +# this manually for a release — use `git tag vX.Y.Z` and the CI does the +# rest via hatchling's dynamic-version feature (see pyproject.toml). + +__version__ = "0.1.0" diff --git a/socialapis/facebook/__init__.py b/socialapis/facebook/__init__.py new file mode 100644 index 0000000..05b1b6e --- /dev/null +++ b/socialapis/facebook/__init__.py @@ -0,0 +1,23 @@ +"""Facebook namespace. + +Public entry points: + socialapis.Facebook — synchronous client + socialapis.AsyncFacebook — asyncio client + +Both share the same method signatures; only the call pattern differs. + + sync: + from socialapis import Facebook + fb = Facebook(api_token="...") + page = fb.get_page_info("EngenSA") + + async: + from socialapis import AsyncFacebook + async with AsyncFacebook(api_token="...") as fb: + page = await fb.get_page_info("EngenSA") +""" + +from ._client import AsyncFacebook, Facebook +from ._types import GroupInfo, PageInfo + +__all__ = ["AsyncFacebook", "Facebook", "GroupInfo", "PageInfo"] diff --git a/socialapis/facebook/_client.py b/socialapis/facebook/_client.py new file mode 100644 index 0000000..59dcf03 --- /dev/null +++ b/socialapis/facebook/_client.py @@ -0,0 +1,975 @@ +"""Public sync + async Facebook clients. + +Method coverage mirrors the SocialAPIs.io REST surface for Facebook (Pages, +Groups, Posts, Search, Ads Library, Marketplace, Media). Each method is a +thin wrapper: + + 1. Normalise the primary identifier (a Facebook URL/slug → a `link` + query param; a bare ID → the appropriate `_id` query param). + 2. Forward any additional query params via `**kwargs` so the SDK + stays forward-compatible when the API adds a new filter — no + client release needed to pick up new behavior. + 3. Issue the HTTP call (sync via `httpx.Client`, async via + `httpx.AsyncClient`). + 4. Return the parsed JSON. The five "headline" methods return typed + Pydantic models (PageInfo, GroupInfo) for IDE autocomplete on the + most-used responses; the rest return plain `dict[str, Any]` — + callers who want type safety can validate themselves. + +Pagination: when the API returns a cursor, it appears in the response +body under whichever key the endpoint documents (varies by route — +`end_cursor`, `cursor_token`, `next`, etc.). Pass that cursor back in +via `**kwargs` on the next call. We do NOT impose a `limit=` parameter +— the API decides page size, callers iterate cursors. This matches the +underlying REST semantics and avoids drift between SDK and API. +""" + +from __future__ import annotations + +from types import TracebackType +from typing import TYPE_CHECKING, Any + +import httpx + +from .._client import BaseClient +from .._errors import APIConnectionError +from ._types import GroupInfo, PageInfo + +if TYPE_CHECKING: + # `Self` is in typing as of Python 3.11; for our 3.10 baseline we + # use the typing_extensions backport. typing_extensions is already + # a transitive dependency of pydantic, so no extra install. + from typing_extensions import Self + + +# --------------------------------------------------------------------------- +# Identifier normalisation +# +# Several endpoints accept either a full Facebook URL OR a bare slug/ID, +# but the API itself only takes one shape per endpoint. These helpers let +# users pass whichever form is natural and we coerce to what the API +# expects. +# --------------------------------------------------------------------------- + + +def _as_facebook_url(value: str, base: str = "https://www.facebook.com") -> str: + """Normalise a slug or full URL to a canonical Facebook URL. + + Examples:: + + "EngenSA" → "https://www.facebook.com/EngenSA" + "https://www.facebook.com/EngenSA" → unchanged + "https://m.facebook.com/EngenSA" → unchanged + """ + value = value.strip() + if not value: + raise ValueError("identifier is required") + if value.startswith(("http://", "https://")): + return value + return f"{base}/{value.lstrip('/')}" + + +def _as_facebook_group_url(value: str) -> str: + """Normalise a Group identifier (slug, numeric ID, or full URL) to a + canonical Facebook group URL.""" + value = value.strip() + if not value: + raise ValueError("group identifier is required") + if value.startswith(("http://", "https://")): + return value + return f"https://www.facebook.com/groups/{value.lstrip('/')}" + + +def _params(*pairs: tuple[str, Any], extra: dict[str, Any] | None = None) -> dict[str, str]: + """Build a query-string-safe dict, dropping None values + stringifying. + + Lets methods declare their primary params as `("link", url)` etc. and + splat `extra` for any **kwargs. Keeps each method's body to ~5 lines. + """ + result: dict[str, str] = {} + for key, value in pairs: + if value is None: + continue + result[key] = str(value) + if extra: + for key, value in extra.items(): + if value is None: + continue + result[key] = str(value) + return result + + +# =========================================================================== +# SYNC CLIENT +# =========================================================================== +class Facebook(BaseClient): + """Synchronous Facebook client. + + Drop-in alternative to the abandoned kevinzg/facebook-scraper library. + Use ``socialapis.FacebookScraper`` as a name alias for migration ease. + + All public-data calls route through socialapis.io — no OAuth, no + Facebook app review, no scraper maintenance. Get a free API token + (200 calls/month) at https://socialapis.io/auth/signup. + + Quick start:: + + from socialapis import Facebook + + with Facebook(api_token="YOUR_API_TOKEN") as fb: + page = fb.get_page_info("EngenSA") + posts = fb.get_page_posts("EngenSA") + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = "https://api.socialapis.io", + timeout: float = 30.0, + transport: httpx.Client | None = None, + ) -> None: + super().__init__(api_token=api_token, base_url=base_url, timeout=timeout) + self._transport = transport or httpx.Client( + timeout=self.timeout, + headers=self._default_headers(), + ) + self._owns_transport = transport is None + + # ---- context-manager interface ----------------------------------------- + + def __enter__(self) -> Self: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + self.close() + + def close(self) -> None: + """Close the underlying connection pool. Safe to call repeatedly.""" + if self._owns_transport: + self._transport.close() + + # ======================================================================= + # PAGES + # ======================================================================= + + def get_page_id(self, page: str, **kwargs: Any) -> dict[str, Any]: + """Return the numeric Facebook Page ID for a given page URL or slug. + + Backed by ``GET /facebook/pages/id``. + """ + return self._get( + "/facebook/pages/id", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ).json() + + def get_page_info(self, page: str, **kwargs: Any) -> PageInfo: + """Return public metadata for a Facebook Page. + + Backed by ``GET /facebook/pages/details``. Returns a typed + :class:`PageInfo` — additional fields the API may return are + preserved in ``model_extra``. + """ + response = self._get( + "/facebook/pages/details", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ) + return PageInfo.model_validate(response.json()) + + def get_page_posts(self, page: str, **kwargs: Any) -> dict[str, Any]: + """Return recent posts from a Facebook Page. + + Backed by ``GET /facebook/pages/posts``. The API decides page size. + For subsequent pages, pass the cursor from the previous response + (key varies — see the docs for the specific endpoint). + """ + return self._get( + "/facebook/pages/posts", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ).json() + + def get_page_reels(self, page: str, **kwargs: Any) -> dict[str, Any]: + """Return Reels (short videos) from a Facebook Page. + + Backed by ``GET /facebook/pages/reels``. + """ + return self._get( + "/facebook/pages/reels", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ).json() + + def get_page_videos(self, page: str, **kwargs: Any) -> dict[str, Any]: + """Return long-form videos from a Facebook Page. + + Backed by ``GET /facebook/pages/videos``. + """ + return self._get( + "/facebook/pages/videos", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ).json() + + # ======================================================================= + # GROUPS + # ======================================================================= + + def get_group_id(self, group: str, **kwargs: Any) -> dict[str, Any]: + """Return the numeric Facebook Group ID. + + Backed by ``GET /facebook/groups/id``. + """ + return self._get( + "/facebook/groups/id", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ).json() + + def get_group_details(self, group: str, **kwargs: Any) -> GroupInfo: + """Return rich metadata for a Facebook Group (rules, members, activity). + + Backed by ``GET /facebook/groups/details``. + """ + response = self._get( + "/facebook/groups/details", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ) + return GroupInfo.model_validate(response.json()) + + def get_group_metadata(self, group: str, **kwargs: Any) -> dict[str, Any]: + """Return lightweight Group metadata (name, id, url, image). + + Cheaper than ``get_group_details`` when you only need IDs/names. + Backed by ``GET /facebook/groups/metadata``. + """ + return self._get( + "/facebook/groups/metadata", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ).json() + + def get_group_posts(self, group: str, **kwargs: Any) -> dict[str, Any]: + """Return recent posts from a Facebook Group. + + Backed by ``GET /facebook/groups/posts``. + """ + return self._get( + "/facebook/groups/posts", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ).json() + + def get_group_videos(self, group_id: str, **kwargs: Any) -> dict[str, Any]: + """Return videos posted to a Facebook Group. + + Backed by ``GET /facebook/groups/videos``. Takes a numeric + ``group_id`` (use :meth:`get_group_id` to resolve a URL first). + """ + return self._get( + "/facebook/groups/videos", + _params(("group_id", group_id), extra=kwargs), + ).json() + + # ======================================================================= + # POSTS + # ======================================================================= + + def get_post_id(self, post: str, **kwargs: Any) -> dict[str, Any]: + """Extract the numeric Facebook post ID from a post URL. + + Backed by ``GET /facebook/posts/id``. + """ + return self._get( + "/facebook/posts/id", + _params(("link", post), extra=kwargs), + ).json() + + def get_post_details(self, post: str, **kwargs: Any) -> dict[str, Any]: + """Return full details of a Facebook post (reactions, media, author). + + Backed by ``GET /facebook/posts/details``. + """ + return self._get( + "/facebook/posts/details", + _params(("link", post), extra=kwargs), + ).json() + + def get_post_details_extended(self, post: str, **kwargs: Any) -> dict[str, Any]: + """Return extended post details (views, video URLs, music info, + author verification). + + Backed by ``GET /facebook/posts/details/extended``. + """ + return self._get( + "/facebook/posts/details/extended", + _params(("link", post), extra=kwargs), + ).json() + + def get_post_comments(self, post: str, **kwargs: Any) -> dict[str, Any]: + """Return comments on a Facebook post or reel. + + Backed by ``GET /facebook/posts/comments``. Pass + ``include_reply_info="true"`` to get the cursor needed for + :meth:`get_comment_replies`. + """ + return self._get( + "/facebook/posts/comments", + _params(("link", post), extra=kwargs), + ).json() + + def get_comment_replies( + self, + comment_feedback_id: str, + expansion_token: str, + **kwargs: Any, + ) -> dict[str, Any]: + """Return replies to a specific comment. + + Backed by ``GET /facebook/posts/comments/replies``. Both inputs + come from :meth:`get_post_comments` when called with + ``include_reply_info=true``. + """ + return self._get( + "/facebook/posts/comments/replies", + _params( + ("comment_feedback_id", comment_feedback_id), + ("expansion_token", expansion_token), + extra=kwargs, + ), + ).json() + + def get_post_attachments(self, post_id: str, **kwargs: Any) -> dict[str, Any]: + """Return all media attachments (photos, videos) from a post. + + Backed by ``GET /facebook/posts/attachments``. + """ + return self._get( + "/facebook/posts/attachments", + _params(("post_id", post_id), extra=kwargs), + ).json() + + def get_video_post_details(self, video_id: str, **kwargs: Any) -> dict[str, Any]: + """Return title, reactions, and play counts for a video post. + + Backed by ``GET /facebook/posts/video``. + """ + return self._get( + "/facebook/posts/video", + _params(("video_id", video_id), extra=kwargs), + ).json() + + # ======================================================================= + # SEARCH + # ======================================================================= + + def search_pages(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search Facebook pages by keyword. Optional location filtering + via ``location_id`` (use :meth:`search_locations` to resolve a + place to an ID). + + Backed by ``GET /facebook/search/pages``. + """ + return self._get( + "/facebook/search/pages", + _params(("query", query), extra=kwargs), + ).json() + + def search_people(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search Facebook profiles by keyword. + + Backed by ``GET /facebook/search/people``. + """ + return self._get( + "/facebook/search/people", + _params(("query", query), extra=kwargs), + ).json() + + def search_locations(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search Facebook for locations matching a keyword. Returns + location UIDs used by other geo-filtered search endpoints. + + Backed by ``GET /facebook/search/locations``. + """ + return self._get( + "/facebook/search/locations", + _params(("query", query), extra=kwargs), + ).json() + + def search_posts(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search Facebook posts by keyword, with optional location and + time filters. + + Backed by ``GET /facebook/search/posts``. + """ + return self._get( + "/facebook/search/posts", + _params(("query", query), extra=kwargs), + ).json() + + def search_videos(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search Facebook videos by keyword, with optional recency / live + filters. + + Backed by ``GET /facebook/search/videos``. + """ + return self._get( + "/facebook/search/videos", + _params(("query", query), extra=kwargs), + ).json() + + # ======================================================================= + # ADS LIBRARY (Meta Ads transparency) + # ======================================================================= + + def get_ads_countries(self, **kwargs: Any) -> dict[str, Any]: + """Return all country codes supported by the Meta Ads Library. + + Backed by ``GET /facebook/ads/countries``. + """ + return self._get("/facebook/ads/countries", _params(extra=kwargs)).json() + + def search_ads(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search ads in the Meta Ad Library by keyword. + + Backed by ``GET /facebook/ads/search``. Common filters: + ``country``, ``activeStatus`` (Active / All / Inactive). + """ + return self._get( + "/facebook/ads/search", + _params(("query", query), extra=kwargs), + ).json() + + def get_ads_page_details(self, page_id: str, **kwargs: Any) -> dict[str, Any]: + """Return Ads-Library metadata for a Facebook Page. + + Backed by ``GET /facebook/ads/page-details``. + """ + return self._get( + "/facebook/ads/page-details", + _params(("page_id", page_id), extra=kwargs), + ).json() + + def get_ad_archive_details( + self, + ad_archive_id: str, + page_id: str, + **kwargs: Any, + ) -> dict[str, Any]: + """Return detailed info for a specific archived ad: creative, + spend, impressions. + + Backed by ``GET /facebook/ads/archive-details``. + """ + return self._get( + "/facebook/ads/archive-details", + _params( + ("ad_archive_id", ad_archive_id), + ("page_id", page_id), + extra=kwargs, + ), + ).json() + + def search_ads_by_keywords(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search ads in the Ad Library by keyword with country filter. + + Backed by ``GET /facebook/ads/keywords``. + """ + return self._get( + "/facebook/ads/keywords", + _params(("query", query), extra=kwargs), + ).json() + + # ======================================================================= + # MARKETPLACE + # ======================================================================= + + def search_marketplace(self, query: str, **kwargs: Any) -> dict[str, Any]: + """Search Facebook Marketplace listings. + + Backed by ``GET /facebook/marketplace/search``. Common filters: + ``filter_location_latitude``, ``filter_location_longitude``, + ``filter_price_max``, ``proxy_country``, ``sort_by``. + """ + return self._get( + "/facebook/marketplace/search", + _params(("query", query), extra=kwargs), + ).json() + + def get_listing_details(self, listing_id: str, **kwargs: Any) -> dict[str, Any]: + """Return full info for a Marketplace listing: photos, price, + seller, delivery. + + Backed by ``GET /facebook/marketplace/listing``. + """ + return self._get( + "/facebook/marketplace/listing", + _params(("listing_id", listing_id), extra=kwargs), + ).json() + + def get_seller_details(self, seller_id: str, **kwargs: Any) -> dict[str, Any]: + """Return seller profile, ratings, reviews, and badges from + Marketplace. + + Backed by ``GET /facebook/marketplace/seller``. + """ + return self._get( + "/facebook/marketplace/seller", + _params(("seller_id", seller_id), extra=kwargs), + ).json() + + def get_marketplace_categories(self, **kwargs: Any) -> dict[str, Any]: + """Return all Marketplace categories with SEO URLs and IDs. + + Backed by ``GET /facebook/marketplace/categories``. + """ + return self._get( + "/facebook/marketplace/categories", + _params(extra=kwargs), + ).json() + + def get_city_coordinates(self, city: str, **kwargs: Any) -> dict[str, Any]: + """Resolve a city name to GPS coordinates, for use as a + Marketplace location filter. + + Backed by ``GET /facebook/marketplace/city-coordinates``. Pass + ``exactly_one="true"`` to return the top match only. + """ + return self._get( + "/facebook/marketplace/city-coordinates", + _params(("city", city), extra=kwargs), + ).json() + + def search_vehicles(self, **kwargs: Any) -> dict[str, Any]: + """Search Marketplace vehicle listings. + + Backed by ``GET /facebook/marketplace/vehicles``. Required-ish + params: ``filter_location_latitude`` + ``filter_location_longitude``. + """ + return self._get( + "/facebook/marketplace/vehicles", + _params(extra=kwargs), + ).json() + + def search_rentals(self, **kwargs: Any) -> dict[str, Any]: + """Search Marketplace rental-property listings. + + Backed by ``GET /facebook/marketplace/rentals``. Filters: + ``filter_bedrooms_min``, ``filter_bathrooms_min``, + ``filter_price_max``, plus the location lat/lng. + """ + return self._get( + "/facebook/marketplace/rentals", + _params(extra=kwargs), + ).json() + + # ======================================================================= + # MEDIA + # ======================================================================= + + def download_media(self, url: str, **kwargs: Any) -> dict[str, Any]: + """Resolve a Facebook video/photo URL to a direct downloadable + media URL. + + Backed by ``GET /facebook/media/download``. + """ + return self._get( + "/facebook/media/download", + _params(("url", url), extra=kwargs), + ).json() + + # ======================================================================= + # INTERNAL: shared request driver + # ======================================================================= + + def _get(self, path: str, params: dict[str, str]) -> httpx.Response: + url = self._build_url(path) + try: + response = self._transport.get(url, params=params) + except httpx.RequestError as exc: + raise APIConnectionError(f"Request failed: {exc}") from exc + self._raise_for_status(response) + return response + + +# =========================================================================== +# ASYNC CLIENT +# =========================================================================== +class AsyncFacebook(BaseClient): + """Asynchronous Facebook client. Same method shape as :class:`Facebook`, + but every public method is a coroutine. + + Quick start:: + + from socialapis import AsyncFacebook + + async with AsyncFacebook(api_token="YOUR_API_TOKEN") as fb: + page = await fb.get_page_info("EngenSA") + posts = await fb.get_page_posts("EngenSA") + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = "https://api.socialapis.io", + timeout: float = 30.0, + transport: httpx.AsyncClient | None = None, + ) -> None: + super().__init__(api_token=api_token, base_url=base_url, timeout=timeout) + self._transport = transport or httpx.AsyncClient( + timeout=self.timeout, + headers=self._default_headers(), + ) + self._owns_transport = transport is None + + async def __aenter__(self) -> Self: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + await self.aclose() + + async def aclose(self) -> None: + if self._owns_transport: + await self._transport.aclose() + + # ======================================================================= + # PAGES + # ======================================================================= + + async def get_page_id(self, page: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/pages/id", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ) + ).json() + + async def get_page_info(self, page: str, **kwargs: Any) -> PageInfo: + response = await self._get( + "/facebook/pages/details", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ) + return PageInfo.model_validate(response.json()) + + async def get_page_posts(self, page: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/pages/posts", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ) + ).json() + + async def get_page_reels(self, page: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/pages/reels", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ) + ).json() + + async def get_page_videos(self, page: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/pages/videos", + _params(("link", _as_facebook_url(page)), extra=kwargs), + ) + ).json() + + # ======================================================================= + # GROUPS + # ======================================================================= + + async def get_group_id(self, group: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/groups/id", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ) + ).json() + + async def get_group_details(self, group: str, **kwargs: Any) -> GroupInfo: + response = await self._get( + "/facebook/groups/details", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ) + return GroupInfo.model_validate(response.json()) + + async def get_group_metadata(self, group: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/groups/metadata", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ) + ).json() + + async def get_group_posts(self, group: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/groups/posts", + _params(("link", _as_facebook_group_url(group)), extra=kwargs), + ) + ).json() + + async def get_group_videos(self, group_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/groups/videos", + _params(("group_id", group_id), extra=kwargs), + ) + ).json() + + # ======================================================================= + # POSTS + # ======================================================================= + + async def get_post_id(self, post: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/id", + _params(("link", post), extra=kwargs), + ) + ).json() + + async def get_post_details(self, post: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/details", + _params(("link", post), extra=kwargs), + ) + ).json() + + async def get_post_details_extended(self, post: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/details/extended", + _params(("link", post), extra=kwargs), + ) + ).json() + + async def get_post_comments(self, post: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/comments", + _params(("link", post), extra=kwargs), + ) + ).json() + + async def get_comment_replies( + self, + comment_feedback_id: str, + expansion_token: str, + **kwargs: Any, + ) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/comments/replies", + _params( + ("comment_feedback_id", comment_feedback_id), + ("expansion_token", expansion_token), + extra=kwargs, + ), + ) + ).json() + + async def get_post_attachments(self, post_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/attachments", + _params(("post_id", post_id), extra=kwargs), + ) + ).json() + + async def get_video_post_details(self, video_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/posts/video", + _params(("video_id", video_id), extra=kwargs), + ) + ).json() + + # ======================================================================= + # SEARCH + # ======================================================================= + + async def search_pages(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/search/pages", + _params(("query", query), extra=kwargs), + ) + ).json() + + async def search_people(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/search/people", + _params(("query", query), extra=kwargs), + ) + ).json() + + async def search_locations(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/search/locations", + _params(("query", query), extra=kwargs), + ) + ).json() + + async def search_posts(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/search/posts", + _params(("query", query), extra=kwargs), + ) + ).json() + + async def search_videos(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/search/videos", + _params(("query", query), extra=kwargs), + ) + ).json() + + # ======================================================================= + # ADS LIBRARY + # ======================================================================= + + async def get_ads_countries(self, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/ads/countries", + _params(extra=kwargs), + ) + ).json() + + async def search_ads(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/ads/search", + _params(("query", query), extra=kwargs), + ) + ).json() + + async def get_ads_page_details(self, page_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/ads/page-details", + _params(("page_id", page_id), extra=kwargs), + ) + ).json() + + async def get_ad_archive_details( + self, + ad_archive_id: str, + page_id: str, + **kwargs: Any, + ) -> dict[str, Any]: + return ( + await self._get( + "/facebook/ads/archive-details", + _params( + ("ad_archive_id", ad_archive_id), + ("page_id", page_id), + extra=kwargs, + ), + ) + ).json() + + async def search_ads_by_keywords(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/ads/keywords", + _params(("query", query), extra=kwargs), + ) + ).json() + + # ======================================================================= + # MARKETPLACE + # ======================================================================= + + async def search_marketplace(self, query: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/search", + _params(("query", query), extra=kwargs), + ) + ).json() + + async def get_listing_details(self, listing_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/listing", + _params(("listing_id", listing_id), extra=kwargs), + ) + ).json() + + async def get_seller_details(self, seller_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/seller", + _params(("seller_id", seller_id), extra=kwargs), + ) + ).json() + + async def get_marketplace_categories(self, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/categories", + _params(extra=kwargs), + ) + ).json() + + async def get_city_coordinates(self, city: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/city-coordinates", + _params(("city", city), extra=kwargs), + ) + ).json() + + async def search_vehicles(self, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/vehicles", + _params(extra=kwargs), + ) + ).json() + + async def search_rentals(self, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/marketplace/rentals", + _params(extra=kwargs), + ) + ).json() + + # ======================================================================= + # MEDIA + # ======================================================================= + + async def download_media(self, url: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/facebook/media/download", + _params(("url", url), extra=kwargs), + ) + ).json() + + # ======================================================================= + # INTERNAL + # ======================================================================= + + async def _get(self, path: str, params: dict[str, str]) -> httpx.Response: + url = self._build_url(path) + try: + response = await self._transport.get(url, params=params) + except httpx.RequestError as exc: + raise APIConnectionError(f"Request failed: {exc}") from exc + self._raise_for_status(response) + return response diff --git a/socialapis/facebook/_types.py b/socialapis/facebook/_types.py new file mode 100644 index 0000000..b426273 --- /dev/null +++ b/socialapis/facebook/_types.py @@ -0,0 +1,75 @@ +"""Pydantic v2 response models for the Facebook namespace. + +Design decision: we hand-craft typed models for a small set of "headline" +endpoints (PageInfo, GroupInfo, PostDetails, ProfileDetails) where IDE +autocomplete is most valuable. The niche endpoints (Ads Library archive +details, Marketplace city coordinates, etc.) return plain `dict[str, Any]` +to keep the SDK shipping fast — callers who care can build typed wrappers +themselves. + +Every typed model uses `extra="allow"` so the API can ADD fields without +breaking existing integrations. Old fields can be removed; the attribute +just goes None. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class _Model(BaseModel): + """Shared base for every response model. + + Forward-compatible by default — API can add fields without breaking us, + and any unrecognised fields land in model_extra (accessible via + .model_dump()) so callers never lose data. + """ + + model_config = ConfigDict( + extra="allow", + populate_by_name=True, + str_strip_whitespace=True, + ) + + +class PageInfo(_Model): + """Public metadata returned by `Facebook.get_page_info()`. + + Backed by `GET /facebook/pages/details`. Common fields typed for + autocomplete; anything else the API returns is preserved in + `model_extra`. + """ + + id: str | None = Field(default=None, description="Facebook's internal page identifier.") + name: str | None = Field(default=None, description="Display name of the page.") + url: str | None = Field(default=None, description="Canonical Facebook URL.") + category: str | None = Field(default=None, description="Page category, e.g. 'Public figure'.") + likes: int | None = Field(default=None, description="Cumulative like count, when available.") + followers: int | None = Field(default=None, description="Follower count.") + verified: bool | None = Field( + default=None, description="Whether the page has a blue checkmark." + ) + about: str | None = Field(default=None, description="Free-text 'About' description.") + website: str | None = Field(default=None, description="Linked external website, when present.") + profile_image_url: str | None = Field( + default=None, + description="URL to the page's profile image.", + alias="profileImageUrl", + ) + cover_image_url: str | None = Field( + default=None, + description="URL to the page's cover image.", + alias="coverImageUrl", + ) + + +class GroupInfo(_Model): + """Public metadata for a Facebook Group. Backed by `GET /facebook/groups/details`.""" + + id: str | None = None + name: str | None = None + url: str | None = None + description: str | None = None + member_count: int | None = Field(default=None, alias="memberCount") + privacy: str | None = None + is_public: bool | None = Field(default=None, alias="isPublic") diff --git a/socialapis/instagram/__init__.py b/socialapis/instagram/__init__.py new file mode 100644 index 0000000..591f4ca --- /dev/null +++ b/socialapis/instagram/__init__.py @@ -0,0 +1,15 @@ +"""Instagram namespace — Profiles, Posts, Reels, Highlights, Search, +Locations. Mirrors the SocialAPIs.io Instagram REST surface. + +Public entry points:: + + from socialapis import Instagram, AsyncInstagram + +The `InstagramScraper` alias also exists at the package level for users +migrating from the abandoned `arc298/instagram-scraper` library. +""" + +from ._client import AsyncInstagram, Instagram +from ._types import ProfileInfo + +__all__ = ["AsyncInstagram", "Instagram", "ProfileInfo"] diff --git a/socialapis/instagram/_client.py b/socialapis/instagram/_client.py new file mode 100644 index 0000000..ef97776 --- /dev/null +++ b/socialapis/instagram/_client.py @@ -0,0 +1,415 @@ +"""Public sync + async Instagram clients. + +Coverage mirrors the SocialAPIs.io Instagram REST surface: profiles, +posts, reels, highlights, search, locations. + +Same design as the Facebook clients: each method is a thin wrapper that +normalises the primary identifier and forwards extra params via +``**kwargs`` for forward-compat. No ``limit=`` — the API decides page +size. +""" + +from __future__ import annotations + +from types import TracebackType +from typing import TYPE_CHECKING, Any + +import httpx + +from .._client import BaseClient +from .._errors import APIConnectionError +from ..facebook._client import _params # reuse the param-builder +from ._types import ProfileInfo + +if TYPE_CHECKING: + # `Self` is in typing as of Python 3.11; for our 3.10 baseline we + # use the typing_extensions backport. typing_extensions is already + # a transitive dependency of pydantic, so no extra install. + from typing_extensions import Self + + +def _as_instagram_url(value: str) -> str: + """Normalise an Instagram identifier (username or full URL) to a + canonical Instagram profile URL.""" + value = value.strip() + if not value: + raise ValueError("identifier is required") + if value.startswith(("http://", "https://")): + return value + return f"https://www.instagram.com/{value.lstrip('/').rstrip('/')}" + + +# =========================================================================== +# SYNC CLIENT +# =========================================================================== +class Instagram(BaseClient): + """Synchronous Instagram client. + + Drop-in alternative to ``arc298/instagram-scraper``. Use + ``socialapis.InstagramScraper`` as a name alias for migration ease. + + Quick start:: + + from socialapis import Instagram + + with Instagram(api_token="YOUR_API_TOKEN") as ig: + profile = ig.get_profile_details("instagram") + posts = ig.get_profile_posts("instagram") + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = "https://api.socialapis.io", + timeout: float = 30.0, + transport: httpx.Client | None = None, + ) -> None: + super().__init__(api_token=api_token, base_url=base_url, timeout=timeout) + self._transport = transport or httpx.Client( + timeout=self.timeout, + headers=self._default_headers(), + ) + self._owns_transport = transport is None + + def __enter__(self) -> Self: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + self.close() + + def close(self) -> None: + if self._owns_transport: + self._transport.close() + + # ======================================================================= + # PROFILES + # ======================================================================= + + def get_user_id(self, profile: str, **kwargs: Any) -> dict[str, Any]: + """Return the numeric Instagram user ID for a username or URL. + + Backed by ``GET /instagram/user/id``. + """ + return self._get( + "/instagram/user/id", + _params(("link", _as_instagram_url(profile)), extra=kwargs), + ).json() + + def get_profile_details(self, username: str, **kwargs: Any) -> ProfileInfo: + """Return public Instagram profile metadata. + + Backed by ``GET /instagram/profile/details``. + """ + response = self._get( + "/instagram/profile/details", + _params(("username", username), extra=kwargs), + ) + return ProfileInfo.model_validate(response.json()) + + def get_profile_posts(self, username: str, **kwargs: Any) -> dict[str, Any]: + """Return recent posts from an Instagram profile. + + Backed by ``GET /instagram/profile/posts``. + """ + return self._get( + "/instagram/profile/posts", + _params(("username", username), extra=kwargs), + ).json() + + def get_profile_reels(self, user_id: str, **kwargs: Any) -> dict[str, Any]: + """Return Reels for an Instagram profile. + + Backed by ``GET /instagram/profile/reels``. Takes a numeric + ``user_id`` (use :meth:`get_user_id` to resolve a username first). + """ + return self._get( + "/instagram/profile/reels", + _params(("user_id", user_id), extra=kwargs), + ).json() + + def get_profile_highlights(self, user_id: str, **kwargs: Any) -> dict[str, Any]: + """Return all Story Highlights for a profile, with cover images + and permalinks. + + Backed by ``GET /instagram/profile/highlights``. + """ + return self._get( + "/instagram/profile/highlights", + _params(("user_id", user_id), extra=kwargs), + ).json() + + def get_highlight_details(self, highlight_id: str, **kwargs: Any) -> dict[str, Any]: + """Return all stories within a specific Highlight. + + Backed by ``GET /instagram/highlight/details``. + """ + return self._get( + "/instagram/highlight/details", + _params(("highlight_id", highlight_id), extra=kwargs), + ).json() + + # ======================================================================= + # POSTS + # ======================================================================= + + def get_post_id(self, post: str, **kwargs: Any) -> dict[str, Any]: + """Extract the shortcode/ID from any Instagram post URL. + + Backed by ``GET /instagram/post/id``. + """ + return self._get( + "/instagram/post/id", + _params(("link", post), extra=kwargs), + ).json() + + def get_post_details(self, shortcode: str, **kwargs: Any) -> dict[str, Any]: + """Return full Instagram post details: media, engagement, + caption, author. + + Backed by ``GET /instagram/post/details``. + """ + return self._get( + "/instagram/post/details", + _params(("shortcode", shortcode), extra=kwargs), + ).json() + + # ======================================================================= + # REELS + # ======================================================================= + + def get_reels_feed(self, **kwargs: Any) -> dict[str, Any]: + """Return the trending Reels feed (or chained-author feed when + ``user_id`` is passed via kwargs). + + Backed by ``GET /instagram/reels/feed``. + """ + return self._get( + "/instagram/reels/feed", + _params(extra=kwargs), + ).json() + + def get_reels_by_audio(self, audio_id: str, **kwargs: Any) -> dict[str, Any]: + """Return all Reels using a specific audio/music track. + + Backed by ``GET /instagram/reels/audio``. + """ + return self._get( + "/instagram/reels/audio", + _params(("audio_id", audio_id), extra=kwargs), + ).json() + + # ======================================================================= + # SEARCH + LOCATIONS + # ======================================================================= + + def search(self, keyword: str, **kwargs: Any) -> dict[str, Any]: + """Search Instagram and return popular results — users, hashtags, + places. + + Backed by ``GET /instagram/search``. + """ + return self._get( + "/instagram/search", + _params(("keyword", keyword), extra=kwargs), + ).json() + + def get_location_posts(self, location_id: str, **kwargs: Any) -> dict[str, Any]: + """Return posts tagged at a specific Instagram location. + + Backed by ``GET /instagram/location/posts``. Pass ``tab="ranked"`` + for top posts or ``tab="recent"`` for most-recent. + """ + return self._get( + "/instagram/location/posts", + _params(("location_id", location_id), extra=kwargs), + ).json() + + def get_nearby_locations(self, location_id: str, **kwargs: Any) -> dict[str, Any]: + """Return Instagram locations near a given location. + + Backed by ``GET /instagram/location/nearby``. + """ + return self._get( + "/instagram/location/nearby", + _params(("location_id", location_id), extra=kwargs), + ).json() + + # ======================================================================= + # INTERNAL + # ======================================================================= + + def _get(self, path: str, params: dict[str, str]) -> httpx.Response: + url = self._build_url(path) + try: + response = self._transport.get(url, params=params) + except httpx.RequestError as exc: + raise APIConnectionError(f"Request failed: {exc}") from exc + self._raise_for_status(response) + return response + + +# =========================================================================== +# ASYNC CLIENT +# =========================================================================== +class AsyncInstagram(BaseClient): + """Asynchronous Instagram client. Same surface as :class:`Instagram`; + every public method is a coroutine. + """ + + def __init__( + self, + api_token: str, + *, + base_url: str = "https://api.socialapis.io", + timeout: float = 30.0, + transport: httpx.AsyncClient | None = None, + ) -> None: + super().__init__(api_token=api_token, base_url=base_url, timeout=timeout) + self._transport = transport or httpx.AsyncClient( + timeout=self.timeout, + headers=self._default_headers(), + ) + self._owns_transport = transport is None + + async def __aenter__(self) -> Self: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + await self.aclose() + + async def aclose(self) -> None: + if self._owns_transport: + await self._transport.aclose() + + # ---- profiles ---------------------------------------------------------- + + async def get_user_id(self, profile: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/user/id", + _params(("link", _as_instagram_url(profile)), extra=kwargs), + ) + ).json() + + async def get_profile_details(self, username: str, **kwargs: Any) -> ProfileInfo: + response = await self._get( + "/instagram/profile/details", + _params(("username", username), extra=kwargs), + ) + return ProfileInfo.model_validate(response.json()) + + async def get_profile_posts(self, username: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/profile/posts", + _params(("username", username), extra=kwargs), + ) + ).json() + + async def get_profile_reels(self, user_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/profile/reels", + _params(("user_id", user_id), extra=kwargs), + ) + ).json() + + async def get_profile_highlights(self, user_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/profile/highlights", + _params(("user_id", user_id), extra=kwargs), + ) + ).json() + + async def get_highlight_details(self, highlight_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/highlight/details", + _params(("highlight_id", highlight_id), extra=kwargs), + ) + ).json() + + # ---- posts ------------------------------------------------------------- + + async def get_post_id(self, post: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/post/id", + _params(("link", post), extra=kwargs), + ) + ).json() + + async def get_post_details(self, shortcode: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/post/details", + _params(("shortcode", shortcode), extra=kwargs), + ) + ).json() + + # ---- reels ------------------------------------------------------------- + + async def get_reels_feed(self, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/reels/feed", + _params(extra=kwargs), + ) + ).json() + + async def get_reels_by_audio(self, audio_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/reels/audio", + _params(("audio_id", audio_id), extra=kwargs), + ) + ).json() + + # ---- search + locations ------------------------------------------------ + + async def search(self, keyword: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/search", + _params(("keyword", keyword), extra=kwargs), + ) + ).json() + + async def get_location_posts(self, location_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/location/posts", + _params(("location_id", location_id), extra=kwargs), + ) + ).json() + + async def get_nearby_locations(self, location_id: str, **kwargs: Any) -> dict[str, Any]: + return ( + await self._get( + "/instagram/location/nearby", + _params(("location_id", location_id), extra=kwargs), + ) + ).json() + + # ---- internal ---------------------------------------------------------- + + async def _get(self, path: str, params: dict[str, str]) -> httpx.Response: + url = self._build_url(path) + try: + response = await self._transport.get(url, params=params) + except httpx.RequestError as exc: + raise APIConnectionError(f"Request failed: {exc}") from exc + self._raise_for_status(response) + return response diff --git a/socialapis/instagram/_types.py b/socialapis/instagram/_types.py new file mode 100644 index 0000000..9b93aa5 --- /dev/null +++ b/socialapis/instagram/_types.py @@ -0,0 +1,42 @@ +"""Pydantic v2 response models for the Instagram namespace. + +Same design as the Facebook namespace: hand-typed model for the headline +endpoint (ProfileInfo from `get_profile_details`), `dict[str, Any]` +returns for the niche endpoints, every model uses `extra="allow"` so +new API fields don't break old callers. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class _Model(BaseModel): + model_config = ConfigDict( + extra="allow", + populate_by_name=True, + str_strip_whitespace=True, + ) + + +class ProfileInfo(_Model): + """Public Instagram profile metadata. + + Backed by ``GET /instagram/profile/details``. The fields below are the + common ones; anything else the API returns is preserved on + ``model_extra``. + """ + + id: str | None = None + username: str | None = None + full_name: str | None = Field(default=None, alias="fullName") + biography: str | None = None + followers: int | None = Field(default=None, alias="followerCount") + following: int | None = Field(default=None, alias="followingCount") + posts_count: int | None = Field(default=None, alias="postsCount") + is_verified: bool | None = Field(default=None, alias="isVerified") + is_private: bool | None = Field(default=None, alias="isPrivate") + is_business: bool | None = Field(default=None, alias="isBusiness") + profile_picture_url: str | None = Field(default=None, alias="profilePictureUrl") + external_url: str | None = Field(default=None, alias="externalUrl") + category: str | None = None diff --git a/socialapis/py.typed b/socialapis/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_account.py b/tests/test_account.py new file mode 100644 index 0000000..421e291 --- /dev/null +++ b/tests/test_account.py @@ -0,0 +1,53 @@ +"""Tests for the Account (usage / credits / limits) client.""" + +from __future__ import annotations + +import httpx +import pytest +import respx + +from socialapis import Account, AsyncAccount + + +@respx.mock +def test_get_usage_routes_to_usage_endpoint() -> None: + respx.get("https://api.socialapis.io/usage").mock( + return_value=httpx.Response( + 200, + json={"credits": {"remaining": 198, "limit": 200}, "plan": "free"}, + ) + ) + with Account(api_token="t") as acc: + usage = acc.get_usage() + assert usage["credits"]["remaining"] == 198 + + +@respx.mock +def test_get_top_ups_routes_to_top_ups_endpoint() -> None: + route = respx.get("https://api.socialapis.io/usage/top-ups").mock( + return_value=httpx.Response(200, json={"enabled": False}) + ) + with Account(api_token="t") as acc: + acc.get_top_ups() + assert route.called + + +@respx.mock +def test_get_limits_routes_to_limits_endpoint() -> None: + route = respx.get("https://api.socialapis.io/usage/limits").mock( + return_value=httpx.Response(200, json={"rate_limit": "1000/hour"}) + ) + with Account(api_token="t") as acc: + acc.get_limits() + assert route.called + + +@pytest.mark.asyncio +@respx.mock +async def test_async_account_works() -> None: + respx.get("https://api.socialapis.io/usage").mock( + return_value=httpx.Response(200, json={"credits": {"remaining": 100}}) + ) + async with AsyncAccount(api_token="t") as acc: + usage = await acc.get_usage() + assert usage["credits"]["remaining"] == 100 diff --git a/tests/test_aliases.py b/tests/test_aliases.py new file mode 100644 index 0000000..5f65a79 --- /dev/null +++ b/tests/test_aliases.py @@ -0,0 +1,52 @@ +"""Verify the migration aliases stay exact references to the real classes. + +These aliases are part of the public contract — they exist so devs +migrating from kevinzg/facebook-scraper + arc298/instagram-scraper can +grep-replace one import line and keep running. Renaming them, +redirecting them through a wrapper, or accidentally shadowing them +would break that promise. + +The tests assert identity (`is`), not equality — only `is` catches +"someone wrapped the alias in a subclass". +""" + +from __future__ import annotations + +from socialapis import ( + AsyncFacebook, + AsyncFacebookScraper, + AsyncInstagram, + AsyncInstagramScraper, + Facebook, + FacebookScraper, + Instagram, + InstagramScraper, +) + + +def test_facebook_scraper_is_facebook() -> None: + assert FacebookScraper is Facebook + + +def test_async_facebook_scraper_is_async_facebook() -> None: + assert AsyncFacebookScraper is AsyncFacebook + + +def test_instagram_scraper_is_instagram() -> None: + assert InstagramScraper is Instagram + + +def test_async_instagram_scraper_is_async_instagram() -> None: + assert AsyncInstagramScraper is AsyncInstagram + + +def test_facebook_scraper_instantiates_like_facebook() -> None: + fb = FacebookScraper(api_token="t") + assert isinstance(fb, Facebook) + fb.close() + + +def test_instagram_scraper_instantiates_like_instagram() -> None: + ig = InstagramScraper(api_token="t") + assert isinstance(ig, Instagram) + ig.close() diff --git a/tests/test_facebook.py b/tests/test_facebook.py new file mode 100644 index 0000000..231541b --- /dev/null +++ b/tests/test_facebook.py @@ -0,0 +1,267 @@ +"""Tests for the Facebook client. + +All HTTP calls mocked via `respx`. No live API calls in CI — that would +need a real token (secret leak risk), be flaky (depends on Meta +availability), and waste customer credits. + +Coverage focuses on: + - Identifier normalisation (slug ↔ full URL ↔ numeric ID) + - The auth header + base URL are correctly applied + - Each endpoint hits the expected URL with the expected params + - Error mapping (401 → AuthenticationError, 429 → RateLimitError, etc.) +""" + +from __future__ import annotations + +import httpx +import pytest +import respx + +from socialapis import ( + AsyncFacebook, + AuthenticationError, + BadRequestError, + Facebook, + InsufficientCreditsError, + PageInfo, + RateLimitError, +) + +SAMPLE_PAGE_INFO = { + "id": "143568085655519", + "name": "Engen SA", + "url": "https://www.facebook.com/EngenSA", + "category": "Petroleum Service", + "likes": 1_234_567, + "followers": 1_200_000, + "verified": True, + "about": "Energy that drives Africa forward.", + "profileImageUrl": "https://scontent.fbcdn.net/profile.jpg", + "coverImageUrl": "https://scontent.fbcdn.net/cover.jpg", +} + + +# ============================================================================ +# get_page_info — the headline typed-model method +# ============================================================================ + + +@respx.mock +def test_get_page_info_returns_typed_model() -> None: + respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PAGE_INFO) + ) + + with Facebook(api_token="t") as fb: + page = fb.get_page_info("EngenSA") + + assert isinstance(page, PageInfo) + assert page.id == "143568085655519" + assert page.name == "Engen SA" + assert page.likes == 1_234_567 + assert page.verified is True + # Camel-case API fields populate the snake-case attributes + assert page.profile_image_url == "https://scontent.fbcdn.net/profile.jpg" + + +@respx.mock +def test_get_page_info_accepts_full_url() -> None: + route = respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PAGE_INFO) + ) + with Facebook(api_token="t") as fb: + fb.get_page_info("https://www.facebook.com/EngenSA") + assert route.calls.last.request.url.params["link"] == "https://www.facebook.com/EngenSA" + + +@respx.mock +def test_get_page_info_normalises_bare_slug() -> None: + route = respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PAGE_INFO) + ) + with Facebook(api_token="t") as fb: + fb.get_page_info("EngenSA") + assert route.calls.last.request.url.params["link"] == "https://www.facebook.com/EngenSA" + + +@respx.mock +def test_get_page_info_sends_auth_header() -> None: + route = respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PAGE_INFO) + ) + with Facebook(api_token="my_secret") as fb: + fb.get_page_info("EngenSA") + assert route.calls.last.request.headers["x-api-token"] == "my_secret" + + +def test_missing_api_token_raises_immediately() -> None: + with pytest.raises(ValueError, match="api_token is required"): + Facebook(api_token="") + + +# ============================================================================ +# Endpoint coverage — one assertion per category to confirm URL routing +# ============================================================================ + + +@respx.mock +def test_get_page_posts_hits_pages_posts_endpoint() -> None: + route = respx.get("https://api.socialapis.io/facebook/pages/posts").mock( + return_value=httpx.Response(200, json={"posts": []}) + ) + with Facebook(api_token="t") as fb: + fb.get_page_posts("EngenSA") + assert route.called + + +@respx.mock +def test_get_group_id_routes_to_groups_id_endpoint() -> None: + route = respx.get("https://api.socialapis.io/facebook/groups/id").mock( + return_value=httpx.Response(200, json={"id": "187988788687356"}) + ) + with Facebook(api_token="t") as fb: + fb.get_group_id("gieldagryplanszowe") + # Bare slug normalises to /groups/ URL + assert route.calls.last.request.url.params["link"] == ( + "https://www.facebook.com/groups/gieldagryplanszowe" + ) + + +@respx.mock +def test_search_pages_passes_query_and_extra_kwargs() -> None: + route = respx.get("https://api.socialapis.io/facebook/search/pages").mock( + return_value=httpx.Response(200, json={"results": []}) + ) + with Facebook(api_token="t") as fb: + fb.search_pages("marketing", location_id="103006566409959") + params = route.calls.last.request.url.params + assert params["query"] == "marketing" + assert params["location_id"] == "103006566409959" + + +@respx.mock +def test_search_ads_routes_to_ads_search() -> None: + route = respx.get("https://api.socialapis.io/facebook/ads/search").mock( + return_value=httpx.Response(200, json={"ads": []}) + ) + with Facebook(api_token="t") as fb: + fb.search_ads("fitness", country="US", activeStatus="Active") + params = route.calls.last.request.url.params + assert params["query"] == "fitness" + assert params["country"] == "US" + assert params["activeStatus"] == "Active" + + +@respx.mock +def test_search_marketplace_routes_to_marketplace_search() -> None: + route = respx.get("https://api.socialapis.io/facebook/marketplace/search").mock( + return_value=httpx.Response(200, json={"listings": []}) + ) + with Facebook(api_token="t") as fb: + fb.search_marketplace( + "cars", + filter_location_latitude="40.7142", + filter_location_longitude="-74.0064", + ) + assert route.called + + +@respx.mock +def test_get_comment_replies_takes_both_required_params() -> None: + route = respx.get("https://api.socialapis.io/facebook/posts/comments/replies").mock( + return_value=httpx.Response(200, json={"replies": []}) + ) + with Facebook(api_token="t") as fb: + fb.get_comment_replies("FB_COMMENT_ID_X", "EXPANSION_TOKEN_Y") + params = route.calls.last.request.url.params + assert params["comment_feedback_id"] == "FB_COMMENT_ID_X" + assert params["expansion_token"] == "EXPANSION_TOKEN_Y" + + +@respx.mock +def test_extra_kwargs_forward_to_query_string() -> None: + """kwargs should land on the request as raw query params — the SDK + doesn't filter or validate them. This is what makes the SDK + forward-compatible when the API adds a new filter.""" + route = respx.get("https://api.socialapis.io/facebook/pages/posts").mock( + return_value=httpx.Response(200, json={"posts": []}) + ) + with Facebook(api_token="t") as fb: + fb.get_page_posts("EngenSA", end_cursor="abc123", some_future_param="x") + params = route.calls.last.request.url.params + assert params["end_cursor"] == "abc123" + assert params["some_future_param"] == "x" + + +# ============================================================================ +# Error mapping — one per HTTP status the API documents +# ============================================================================ + + +@respx.mock +def test_401_maps_to_authentication_error() -> None: + respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(401, json={"error": "Invalid API token"}) + ) + with Facebook(api_token="bad") as fb, pytest.raises(AuthenticationError) as exc_info: + fb.get_page_info("EngenSA") + assert exc_info.value.status_code == 401 + + +@respx.mock +def test_402_maps_to_insufficient_credits_error() -> None: + respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(402, json={"error": "Credit balance exhausted"}) + ) + with Facebook(api_token="t") as fb, pytest.raises(InsufficientCreditsError): + fb.get_page_info("EngenSA") + + +@respx.mock +def test_429_maps_to_rate_limit_error_with_retry_after() -> None: + respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response( + 429, + json={"error": "Rate limit exceeded"}, + headers={"retry-after": "12"}, + ) + ) + with Facebook(api_token="t") as fb, pytest.raises(RateLimitError) as exc_info: + fb.get_page_info("EngenSA") + assert exc_info.value.retry_after_seconds == 12.0 + + +@respx.mock +def test_400_maps_to_bad_request_error() -> None: + respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(400, json={"error": "page not found"}) + ) + with Facebook(api_token="t") as fb, pytest.raises(BadRequestError): + fb.get_page_info("EngenSA") + + +# ============================================================================ +# Async client smoke test +# ============================================================================ + + +@pytest.mark.asyncio +@respx.mock +async def test_async_get_page_info_works() -> None: + respx.get("https://api.socialapis.io/facebook/pages/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PAGE_INFO) + ) + async with AsyncFacebook(api_token="t") as fb: + page = await fb.get_page_info("EngenSA") + assert page.name == "Engen SA" + + +@pytest.mark.asyncio +@respx.mock +async def test_async_search_marketplace_works() -> None: + respx.get("https://api.socialapis.io/facebook/marketplace/search").mock( + return_value=httpx.Response(200, json={"listings": []}) + ) + async with AsyncFacebook(api_token="t") as fb: + result = await fb.search_marketplace("cars") + assert result == {"listings": []} diff --git a/tests/test_instagram.py b/tests/test_instagram.py new file mode 100644 index 0000000..281c831 --- /dev/null +++ b/tests/test_instagram.py @@ -0,0 +1,86 @@ +"""Tests for the Instagram client. + +Same shape as test_facebook.py — respx-mocked HTTP, no live calls in CI. +""" + +from __future__ import annotations + +import httpx +import pytest +import respx + +from socialapis import AsyncInstagram, Instagram, ProfileInfo + +SAMPLE_PROFILE = { + "id": "25025320", + "username": "instagram", + "fullName": "Instagram", + "biography": "Discover what's new on Instagram 🌟", + "followerCount": 670_000_000, + "followingCount": 50, + "postsCount": 7_900, + "isVerified": True, + "isPrivate": False, + "isBusiness": True, + "profilePictureUrl": "https://scontent.cdninstagram.com/profile.jpg", +} + + +@respx.mock +def test_get_profile_details_returns_typed_model() -> None: + respx.get("https://api.socialapis.io/instagram/profile/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PROFILE) + ) + + with Instagram(api_token="t") as ig: + profile = ig.get_profile_details("instagram") + + assert isinstance(profile, ProfileInfo) + assert profile.id == "25025320" + assert profile.username == "instagram" + assert profile.full_name == "Instagram" + assert profile.followers == 670_000_000 + assert profile.is_verified is True + + +@respx.mock +def test_get_user_id_normalises_username_to_url() -> None: + route = respx.get("https://api.socialapis.io/instagram/user/id").mock( + return_value=httpx.Response(200, json={"id": "25025320"}) + ) + with Instagram(api_token="t") as ig: + ig.get_user_id("instagram") + assert route.calls.last.request.url.params["link"] == "https://www.instagram.com/instagram" + + +@respx.mock +def test_search_hits_instagram_search_endpoint() -> None: + route = respx.get("https://api.socialapis.io/instagram/search").mock( + return_value=httpx.Response(200, json={"users": [], "hashtags": []}) + ) + with Instagram(api_token="t") as ig: + ig.search("travel") + assert route.calls.last.request.url.params["keyword"] == "travel" + + +@respx.mock +def test_get_location_posts_passes_tab_kwarg() -> None: + route = respx.get("https://api.socialapis.io/instagram/location/posts").mock( + return_value=httpx.Response(200, json={"posts": []}) + ) + with Instagram(api_token="t") as ig: + ig.get_location_posts("454547536", tab="ranked") + params = route.calls.last.request.url.params + assert params["location_id"] == "454547536" + assert params["tab"] == "ranked" + + +@pytest.mark.asyncio +@respx.mock +async def test_async_get_profile_details_works() -> None: + respx.get("https://api.socialapis.io/instagram/profile/details").mock( + return_value=httpx.Response(200, json=SAMPLE_PROFILE) + ) + async with AsyncInstagram(api_token="t") as ig: + profile = await ig.get_profile_details("instagram") + assert profile.username == "instagram"