web-scrapper/app.py

45 lines
1.2 KiB
Python

import json
import time
import threading
from flask import Flask, request
from seleniumbase import SB
rest_port = 8090
app = Flask(__name__)
# Global lock to ensure only one SeleniumBase instance at a time
sb_lock = threading.Lock()
@app.route("/url", methods=['POST'])
def process_url():
data = request.get_json()
if not data or 'url' not in data:
return json.dumps({"error": "URL parameter is required"}), 400
url = data['url']
# Try to acquire the lock
with sb_lock:
# Only one request at a time can enter this block
try:
with SB(uc=True, pls="none") as sb:
sb.activate_cdp_mode(url)
sb.sleep(5)
sb.uc_gui_click_captcha()
time.sleep(10)
data = {
"page_source": sb.get_page_source()
}
return json.dumps(data), 200, {'Content-Type': 'application/json'}
except Exception as e:
return json.dumps({"error": str(e)}), 500
if __name__ == "__main__":
# Allow Flask to handle concurrent requests, but they'll queue on the lock
app.run(host='0.0.0.0', port=rest_port, threaded=True)