feat: initial commit
This commit is contained in:
commit
a87e00bc80
248
.gitignore
vendored
Normal file
248
.gitignore
vendored
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
# Created by https://www.toptal.com/developers/gitignore/api/linux,intellij,jetbrains,java,react
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,intellij,jetbrains,java,react
|
||||||
|
|
||||||
|
### Intellij ###
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff
|
||||||
|
.idea/**/workspace.xml
|
||||||
|
.idea/**/tasks.xml
|
||||||
|
.idea/**/usage.statistics.xml
|
||||||
|
.idea/**/dictionaries
|
||||||
|
.idea/**/shelf
|
||||||
|
|
||||||
|
# AWS User-specific
|
||||||
|
.idea/**/aws.xml
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
.idea/**/contentModel.xml
|
||||||
|
|
||||||
|
# Sensitive or high-churn files
|
||||||
|
.idea/**/dataSources/
|
||||||
|
.idea/**/dataSources.ids
|
||||||
|
.idea/**/dataSources.local.xml
|
||||||
|
.idea/**/sqlDataSources.xml
|
||||||
|
.idea/**/dynamic.xml
|
||||||
|
.idea/**/uiDesigner.xml
|
||||||
|
.idea/**/dbnavigator.xml
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
.idea/**/gradle.xml
|
||||||
|
.idea/**/libraries
|
||||||
|
|
||||||
|
# Gradle and Maven with auto-import
|
||||||
|
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||||
|
# since they will be recreated, and may cause churn. Uncomment if using
|
||||||
|
# auto-import.
|
||||||
|
# .idea/artifacts
|
||||||
|
# .idea/compiler.xml
|
||||||
|
# .idea/jarRepositories.xml
|
||||||
|
# .idea/modules.xml
|
||||||
|
# .idea/*.iml
|
||||||
|
# .idea/modules
|
||||||
|
# *.iml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# CMake
|
||||||
|
cmake-build-*/
|
||||||
|
|
||||||
|
# Mongo Explorer plugin
|
||||||
|
.idea/**/mongoSettings.xml
|
||||||
|
|
||||||
|
# File-based project format
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Cursive Clojure plugin
|
||||||
|
.idea/replstate.xml
|
||||||
|
|
||||||
|
# SonarLint plugin
|
||||||
|
.idea/sonarlint/
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
|
||||||
|
# Editor-based Rest Client
|
||||||
|
.idea/httpRequests
|
||||||
|
|
||||||
|
# Android studio 3.1+ serialized cache file
|
||||||
|
.idea/caches/build_file_checksums.ser
|
||||||
|
|
||||||
|
### Intellij Patch ###
|
||||||
|
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
||||||
|
|
||||||
|
# *.iml
|
||||||
|
# modules.xml
|
||||||
|
# .idea/misc.xml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# Sonarlint plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
||||||
|
.idea/**/sonarlint/
|
||||||
|
|
||||||
|
# SonarQube Plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
||||||
|
.idea/**/sonarIssues.xml
|
||||||
|
|
||||||
|
# Markdown Navigator plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
||||||
|
.idea/**/markdown-navigator.xml
|
||||||
|
.idea/**/markdown-navigator-enh.xml
|
||||||
|
.idea/**/markdown-navigator/
|
||||||
|
|
||||||
|
# Cache file creation bug
|
||||||
|
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
||||||
|
.idea/$CACHE_FILE$
|
||||||
|
|
||||||
|
# CodeStream plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||||
|
.idea/codestream.xml
|
||||||
|
|
||||||
|
# Azure Toolkit for IntelliJ plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
||||||
|
.idea/**/azureSettings.xml
|
||||||
|
|
||||||
|
### Java ###
|
||||||
|
# Compiled class file
|
||||||
|
*.class
|
||||||
|
|
||||||
|
# Log file
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# BlueJ files
|
||||||
|
*.ctxt
|
||||||
|
|
||||||
|
# Mobile Tools for Java (J2ME)
|
||||||
|
.mtj.tmp/
|
||||||
|
|
||||||
|
# Package Files #
|
||||||
|
*.jar
|
||||||
|
*.war
|
||||||
|
*.nar
|
||||||
|
*.ear
|
||||||
|
*.zip
|
||||||
|
*.tar.gz
|
||||||
|
*.rar
|
||||||
|
|
||||||
|
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||||
|
hs_err_pid*
|
||||||
|
replay_pid*
|
||||||
|
|
||||||
|
### JetBrains ###
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff
|
||||||
|
|
||||||
|
# AWS User-specific
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
|
||||||
|
# Sensitive or high-churn files
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
|
||||||
|
# Gradle and Maven with auto-import
|
||||||
|
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||||
|
# since they will be recreated, and may cause churn. Uncomment if using
|
||||||
|
# auto-import.
|
||||||
|
# .idea/artifacts
|
||||||
|
# .idea/compiler.xml
|
||||||
|
# .idea/jarRepositories.xml
|
||||||
|
# .idea/modules.xml
|
||||||
|
# .idea/*.iml
|
||||||
|
# .idea/modules
|
||||||
|
# *.iml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# CMake
|
||||||
|
|
||||||
|
# Mongo Explorer plugin
|
||||||
|
|
||||||
|
# File-based project format
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
|
||||||
|
# Cursive Clojure plugin
|
||||||
|
|
||||||
|
# SonarLint plugin
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
|
||||||
|
# Editor-based Rest Client
|
||||||
|
|
||||||
|
# Android studio 3.1+ serialized cache file
|
||||||
|
|
||||||
|
### JetBrains Patch ###
|
||||||
|
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
||||||
|
|
||||||
|
# *.iml
|
||||||
|
# modules.xml
|
||||||
|
# .idea/misc.xml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# Sonarlint plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
||||||
|
|
||||||
|
# SonarQube Plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
||||||
|
|
||||||
|
# Markdown Navigator plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
||||||
|
|
||||||
|
# Cache file creation bug
|
||||||
|
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
||||||
|
|
||||||
|
# CodeStream plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||||
|
|
||||||
|
# Azure Toolkit for IntelliJ plugin
|
||||||
|
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
||||||
|
|
||||||
|
### Linux ###
|
||||||
|
*~
|
||||||
|
|
||||||
|
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||||
|
.fuse_hidden*
|
||||||
|
|
||||||
|
# KDE directory preferences
|
||||||
|
.directory
|
||||||
|
|
||||||
|
# Linux trash folder which might appear on any partition or disk
|
||||||
|
.Trash-*
|
||||||
|
|
||||||
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
|
.nfs*
|
||||||
|
|
||||||
|
### react ###
|
||||||
|
.DS_*
|
||||||
|
logs
|
||||||
|
**/*.backup.*
|
||||||
|
**/*.back.*
|
||||||
|
|
||||||
|
node_modules
|
||||||
|
bower_components
|
||||||
|
|
||||||
|
*.sublime*
|
||||||
|
|
||||||
|
psd
|
||||||
|
thumb
|
||||||
|
sketch
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/linux,intellij,jetbrains,java,react
|
||||||
122
Dockerfile
Normal file
122
Dockerfile
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
# SeleniumBase Docker Image
|
||||||
|
FROM ubuntu:22.04
|
||||||
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV PYTHONIOENCODING=UTF-8
|
||||||
|
|
||||||
|
#======================
|
||||||
|
# Locale Configuration
|
||||||
|
#======================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y --no-install-recommends tzdata locales
|
||||||
|
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
|
||||||
|
ENV TZ=America/New_York
|
||||||
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
ENV LANG=en_US.UTF-8
|
||||||
|
ENV LANGUAGE=en_US:en
|
||||||
|
ENV LC_ALL=en_US.UTF-8
|
||||||
|
RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment
|
||||||
|
RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
|
||||||
|
RUN echo "LANG=en_US.UTF-8" > /etc/locale.conf
|
||||||
|
RUN locale-gen en_US.UTF-8
|
||||||
|
|
||||||
|
#======================
|
||||||
|
# Install Common Fonts
|
||||||
|
#======================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y \
|
||||||
|
fonts-liberation \
|
||||||
|
fonts-liberation2 \
|
||||||
|
fonts-font-awesome \
|
||||||
|
fonts-ubuntu \
|
||||||
|
fonts-terminus \
|
||||||
|
fonts-powerline \
|
||||||
|
fonts-open-sans \
|
||||||
|
fonts-mononoki \
|
||||||
|
fonts-roboto \
|
||||||
|
fonts-lato
|
||||||
|
|
||||||
|
#============================
|
||||||
|
# Install Linux Dependencies
|
||||||
|
#============================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y \
|
||||||
|
libasound2 \
|
||||||
|
libatk-bridge2.0-0 \
|
||||||
|
libatk1.0-0 \
|
||||||
|
libatspi2.0-0 \
|
||||||
|
libcups2 \
|
||||||
|
libdbus-1-3 \
|
||||||
|
libdrm2 \
|
||||||
|
libgbm1 \
|
||||||
|
libgtk-3-0 \
|
||||||
|
libnspr4 \
|
||||||
|
libnss3 \
|
||||||
|
libu2f-udev \
|
||||||
|
libvulkan1 \
|
||||||
|
libwayland-client0 \
|
||||||
|
libxcomposite1 \
|
||||||
|
libxdamage1 \
|
||||||
|
libxfixes3 \
|
||||||
|
libxkbcommon0 \
|
||||||
|
libxrandr2
|
||||||
|
|
||||||
|
#==========================
|
||||||
|
# Install useful utilities
|
||||||
|
#==========================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y xdg-utils ca-certificates git
|
||||||
|
|
||||||
|
#=================================
|
||||||
|
# Install Bash Command Line Tools
|
||||||
|
#=================================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get -qy --no-install-recommends install \
|
||||||
|
curl \
|
||||||
|
sudo \
|
||||||
|
unzip \
|
||||||
|
vim \
|
||||||
|
wget \
|
||||||
|
xvfb
|
||||||
|
|
||||||
|
#================
|
||||||
|
# Install Chrome
|
||||||
|
#================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||||
|
RUN apt-get install -y ./google-chrome-stable_current_amd64.deb
|
||||||
|
RUN rm ./google-chrome-stable_current_amd64.deb
|
||||||
|
|
||||||
|
#================
|
||||||
|
# Install Python
|
||||||
|
#================
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y python3 python3-pip python3-setuptools python3-dev python3-tk
|
||||||
|
RUN alias python=python3
|
||||||
|
RUN echo "alias python=python3" >> ~/.bashrc
|
||||||
|
RUN apt-get -qy --no-install-recommends install python3.10
|
||||||
|
RUN rm /usr/bin/python3
|
||||||
|
RUN ln -s python3.10 /usr/bin/python3
|
||||||
|
|
||||||
|
#===============
|
||||||
|
# Cleanup Lists
|
||||||
|
#===============
|
||||||
|
RUN apt-get clean
|
||||||
|
RUN rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN git clone https://github.com/seleniumbase/SeleniumBase.git
|
||||||
|
RUN find . -name '*.pyc' -delete
|
||||||
|
RUN pip install --upgrade pip setuptools wheel
|
||||||
|
RUN cd /SeleniumBase && ls && pip install -r requirements.txt --upgrade
|
||||||
|
RUN cd /SeleniumBase && pip install .
|
||||||
|
RUN pip install pyautogui
|
||||||
|
|
||||||
|
#=======================
|
||||||
|
# Download chromedriver
|
||||||
|
#=======================
|
||||||
|
RUN seleniumbase get chromedriver --path
|
||||||
|
|
||||||
|
RUN pip install flask py-eureka-client httpx
|
||||||
|
|
||||||
|
COPY flask_app.py ./flask_app.py
|
||||||
|
CMD ["python3", "./flask_app.py"]
|
||||||
35
flask_app.py
Normal file
35
flask_app.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from seleniumbase import SB
|
||||||
|
import time
|
||||||
|
|
||||||
|
rest_port = 8090
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route("/url", methods=['POST'])
|
||||||
|
def hello():
|
||||||
|
data = request.get_json()
|
||||||
|
|
||||||
|
if not data or 'url' not in data:
|
||||||
|
return json.dumps({"error": "URL parameter is required"}), 400
|
||||||
|
|
||||||
|
url = data['url']
|
||||||
|
|
||||||
|
with SB(uc=True) as sb:
|
||||||
|
sb.driver.page_load_strategy="eager"
|
||||||
|
sb.maximize_window()
|
||||||
|
sb.activate_cdp_mode(url)
|
||||||
|
sb.sleep(5)
|
||||||
|
sb.uc_gui_click_captcha()
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"pageSource": sb.get_page_source()
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.dumps(data), 200, {'Content-Type': 'application/json'}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(host='0.0.0.0', port=rest_port)
|
||||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
seleniumbase
|
||||||
|
flask
|
||||||
|
httpx
|
||||||
Loading…
x
Reference in New Issue
Block a user