import time from fastapi import FastAPI from pydantic import BaseModel from seleniumbase import SB app = FastAPI( title="SeleniumBase Image Scraper API", description="An API that scrolls pages get is html content", version="1.0.0" ) class ScrapeRequest(BaseModel): url: str @app.post("/") def _get_image_urls(req: ScrapeRequest): with SB(uc=True, headless=True) as sb: sb.activate_cdp_mode(req.url) sb.sleep(5) # Wait for initial load screen_height = sb.execute_script("return window.screen.height") * 2 i = 1 max_scrolls = 500 # Safety limit to prevent infinite loops # Infinite scroll logic while True: current_scroll = screen_height * i sb.execute_script(f"window.scrollTo(0, {current_scroll});") i += 1 time.sleep(0.5) scroll_height = sb.execute_script("return document.body.scrollHeight") # Stop if reached bottom or hit max scrolls if current_scroll > scroll_height or i > max_scrolls: break # Final wait for lazy-loaded elements sb.sleep(2) # Extract HTML page_source = sb.get_page_source() return {"pageSource": page_source} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001)