commit a87e00bc807705b905418e29cfc6e57e9d2320af Author: Rodrigo Verdiani Date: Tue Oct 21 13:16:20 2025 -0300 feat: initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..18fb8a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,248 @@ +# Created by https://www.toptal.com/developers/gitignore/api/linux,intellij,jetbrains,java,react +# Edit at https://www.toptal.com/developers/gitignore?templates=linux,intellij,jetbrains,java,react + +### Intellij ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Intellij Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +### Java ### +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* +replay_pid* + +### JetBrains ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# AWS User-specific + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# SonarLint plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### JetBrains Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### react ### +.DS_* +logs +**/*.backup.* +**/*.back.* + +node_modules +bower_components + +*.sublime* + +psd +thumb +sketch + +# End of https://www.toptal.com/developers/gitignore/api/linux,intellij,jetbrains,java,react diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..431a099 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,122 @@ +# SeleniumBase Docker Image +FROM ubuntu:22.04 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + +#====================== +# Locale Configuration +#====================== +RUN apt-get update +RUN apt-get install -y --no-install-recommends tzdata locales +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen +ENV TZ=America/New_York +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 +RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment +RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen +RUN echo "LANG=en_US.UTF-8" > /etc/locale.conf +RUN locale-gen en_US.UTF-8 + +#====================== +# Install Common Fonts +#====================== +RUN apt-get update +RUN apt-get install -y \ + fonts-liberation \ + fonts-liberation2 \ + fonts-font-awesome \ + fonts-ubuntu \ + fonts-terminus \ + fonts-powerline \ + fonts-open-sans \ + fonts-mononoki \ + fonts-roboto \ + fonts-lato + +#============================ +# Install Linux Dependencies +#============================ +RUN apt-get update +RUN apt-get install -y \ + libasound2 \ + libatk-bridge2.0-0 \ + libatk1.0-0 \ + libatspi2.0-0 \ + libcups2 \ + libdbus-1-3 \ + libdrm2 \ + libgbm1 \ + libgtk-3-0 \ + libnspr4 \ + libnss3 \ + libu2f-udev \ + libvulkan1 \ + libwayland-client0 \ + libxcomposite1 \ + libxdamage1 \ + libxfixes3 \ + libxkbcommon0 \ + libxrandr2 + +#========================== +# Install useful utilities +#========================== +RUN apt-get update +RUN apt-get install -y xdg-utils ca-certificates git + +#================================= +# Install Bash Command Line Tools +#================================= +RUN apt-get update +RUN apt-get -qy --no-install-recommends install \ + curl \ + sudo \ + unzip \ + vim \ + wget \ + xvfb + +#================ +# Install Chrome +#================ +RUN apt-get update +RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb +RUN apt-get install -y ./google-chrome-stable_current_amd64.deb +RUN rm ./google-chrome-stable_current_amd64.deb + +#================ +# Install Python +#================ +RUN apt-get update +RUN apt-get install -y python3 python3-pip python3-setuptools python3-dev python3-tk +RUN alias python=python3 +RUN echo "alias python=python3" >> ~/.bashrc +RUN apt-get -qy --no-install-recommends install python3.10 +RUN rm /usr/bin/python3 +RUN ln -s python3.10 /usr/bin/python3 + +#=============== +# Cleanup Lists +#=============== +RUN apt-get clean +RUN rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/seleniumbase/SeleniumBase.git +RUN find . -name '*.pyc' -delete +RUN pip install --upgrade pip setuptools wheel +RUN cd /SeleniumBase && ls && pip install -r requirements.txt --upgrade +RUN cd /SeleniumBase && pip install . +RUN pip install pyautogui + +#======================= +# Download chromedriver +#======================= +RUN seleniumbase get chromedriver --path + +RUN pip install flask py-eureka-client httpx + +COPY flask_app.py ./flask_app.py +CMD ["python3", "./flask_app.py"] diff --git a/flask_app.py b/flask_app.py new file mode 100644 index 0000000..146897e --- /dev/null +++ b/flask_app.py @@ -0,0 +1,35 @@ +import json + +from flask import Flask, request +from seleniumbase import SB +import time + +rest_port = 8090 + +app = Flask(__name__) + +@app.route("/url", methods=['POST']) +def hello(): + data = request.get_json() + + if not data or 'url' not in data: + return json.dumps({"error": "URL parameter is required"}), 400 + + url = data['url'] + + with SB(uc=True) as sb: + sb.driver.page_load_strategy="eager" + sb.maximize_window() + sb.activate_cdp_mode(url) + sb.sleep(5) + sb.uc_gui_click_captcha() + time.sleep(10) + + data = { + "pageSource": sb.get_page_source() + } + + return json.dumps(data), 200, {'Content-Type': 'application/json'} + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=rest_port) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e6b4c19 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +seleniumbase +flask +httpx