feat: initial commit
This commit is contained in:
commit
a87e00bc80
248
.gitignore
vendored
Normal file
248
.gitignore
vendored
Normal file
@ -0,0 +1,248 @@
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/linux,intellij,jetbrains,java,react
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,intellij,jetbrains,java,react
|
||||
|
||||
### Intellij ###
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# SonarLint plugin
|
||||
.idea/sonarlint/
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# Editor-based Rest Client
|
||||
.idea/httpRequests
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
.idea/caches/build_file_checksums.ser
|
||||
|
||||
### Intellij Patch ###
|
||||
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
||||
|
||||
# *.iml
|
||||
# modules.xml
|
||||
# .idea/misc.xml
|
||||
# *.ipr
|
||||
|
||||
# Sonarlint plugin
|
||||
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
||||
.idea/**/sonarlint/
|
||||
|
||||
# SonarQube Plugin
|
||||
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
||||
.idea/**/sonarIssues.xml
|
||||
|
||||
# Markdown Navigator plugin
|
||||
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
||||
.idea/**/markdown-navigator.xml
|
||||
.idea/**/markdown-navigator-enh.xml
|
||||
.idea/**/markdown-navigator/
|
||||
|
||||
# Cache file creation bug
|
||||
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
||||
.idea/$CACHE_FILE$
|
||||
|
||||
# CodeStream plugin
|
||||
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||
.idea/codestream.xml
|
||||
|
||||
# Azure Toolkit for IntelliJ plugin
|
||||
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
||||
.idea/**/azureSettings.xml
|
||||
|
||||
### Java ###
|
||||
# Compiled class file
|
||||
*.class
|
||||
|
||||
# Log file
|
||||
*.log
|
||||
|
||||
# BlueJ files
|
||||
*.ctxt
|
||||
|
||||
# Mobile Tools for Java (J2ME)
|
||||
.mtj.tmp/
|
||||
|
||||
# Package Files #
|
||||
*.jar
|
||||
*.war
|
||||
*.nar
|
||||
*.ear
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.rar
|
||||
|
||||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||
hs_err_pid*
|
||||
replay_pid*
|
||||
|
||||
### JetBrains ###
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff
|
||||
|
||||
# AWS User-specific
|
||||
|
||||
# Generated files
|
||||
|
||||
# Sensitive or high-churn files
|
||||
|
||||
# Gradle
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
|
||||
# Mongo Explorer plugin
|
||||
|
||||
# File-based project format
|
||||
|
||||
# IntelliJ
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
|
||||
# JIRA plugin
|
||||
|
||||
# Cursive Clojure plugin
|
||||
|
||||
# SonarLint plugin
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
|
||||
# Editor-based Rest Client
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
|
||||
### JetBrains Patch ###
|
||||
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
||||
|
||||
# *.iml
|
||||
# modules.xml
|
||||
# .idea/misc.xml
|
||||
# *.ipr
|
||||
|
||||
# Sonarlint plugin
|
||||
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
||||
|
||||
# SonarQube Plugin
|
||||
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
||||
|
||||
# Markdown Navigator plugin
|
||||
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
||||
|
||||
# Cache file creation bug
|
||||
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
||||
|
||||
# CodeStream plugin
|
||||
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||
|
||||
# Azure Toolkit for IntelliJ plugin
|
||||
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### react ###
|
||||
.DS_*
|
||||
logs
|
||||
**/*.backup.*
|
||||
**/*.back.*
|
||||
|
||||
node_modules
|
||||
bower_components
|
||||
|
||||
*.sublime*
|
||||
|
||||
psd
|
||||
thumb
|
||||
sketch
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/linux,intellij,jetbrains,java,react
|
||||
122
Dockerfile
Normal file
122
Dockerfile
Normal file
@ -0,0 +1,122 @@
|
||||
# SeleniumBase Docker Image
|
||||
FROM ubuntu:22.04
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PYTHONIOENCODING=UTF-8
|
||||
|
||||
#======================
|
||||
# Locale Configuration
|
||||
#======================
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y --no-install-recommends tzdata locales
|
||||
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
|
||||
ENV TZ=America/New_York
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
ENV LANG=en_US.UTF-8
|
||||
ENV LANGUAGE=en_US:en
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment
|
||||
RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
|
||||
RUN echo "LANG=en_US.UTF-8" > /etc/locale.conf
|
||||
RUN locale-gen en_US.UTF-8
|
||||
|
||||
#======================
|
||||
# Install Common Fonts
|
||||
#======================
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y \
|
||||
fonts-liberation \
|
||||
fonts-liberation2 \
|
||||
fonts-font-awesome \
|
||||
fonts-ubuntu \
|
||||
fonts-terminus \
|
||||
fonts-powerline \
|
||||
fonts-open-sans \
|
||||
fonts-mononoki \
|
||||
fonts-roboto \
|
||||
fonts-lato
|
||||
|
||||
#============================
|
||||
# Install Linux Dependencies
|
||||
#============================
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y \
|
||||
libasound2 \
|
||||
libatk-bridge2.0-0 \
|
||||
libatk1.0-0 \
|
||||
libatspi2.0-0 \
|
||||
libcups2 \
|
||||
libdbus-1-3 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libnspr4 \
|
||||
libnss3 \
|
||||
libu2f-udev \
|
||||
libvulkan1 \
|
||||
libwayland-client0 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxkbcommon0 \
|
||||
libxrandr2
|
||||
|
||||
#==========================
|
||||
# Install useful utilities
|
||||
#==========================
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y xdg-utils ca-certificates git
|
||||
|
||||
#=================================
|
||||
# Install Bash Command Line Tools
|
||||
#=================================
|
||||
RUN apt-get update
|
||||
RUN apt-get -qy --no-install-recommends install \
|
||||
curl \
|
||||
sudo \
|
||||
unzip \
|
||||
vim \
|
||||
wget \
|
||||
xvfb
|
||||
|
||||
#================
|
||||
# Install Chrome
|
||||
#================
|
||||
RUN apt-get update
|
||||
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||
RUN apt-get install -y ./google-chrome-stable_current_amd64.deb
|
||||
RUN rm ./google-chrome-stable_current_amd64.deb
|
||||
|
||||
#================
|
||||
# Install Python
|
||||
#================
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y python3 python3-pip python3-setuptools python3-dev python3-tk
|
||||
RUN alias python=python3
|
||||
RUN echo "alias python=python3" >> ~/.bashrc
|
||||
RUN apt-get -qy --no-install-recommends install python3.10
|
||||
RUN rm /usr/bin/python3
|
||||
RUN ln -s python3.10 /usr/bin/python3
|
||||
|
||||
#===============
|
||||
# Cleanup Lists
|
||||
#===============
|
||||
RUN apt-get clean
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git clone https://github.com/seleniumbase/SeleniumBase.git
|
||||
RUN find . -name '*.pyc' -delete
|
||||
RUN pip install --upgrade pip setuptools wheel
|
||||
RUN cd /SeleniumBase && ls && pip install -r requirements.txt --upgrade
|
||||
RUN cd /SeleniumBase && pip install .
|
||||
RUN pip install pyautogui
|
||||
|
||||
#=======================
|
||||
# Download chromedriver
|
||||
#=======================
|
||||
RUN seleniumbase get chromedriver --path
|
||||
|
||||
RUN pip install flask py-eureka-client httpx
|
||||
|
||||
COPY flask_app.py ./flask_app.py
|
||||
CMD ["python3", "./flask_app.py"]
|
||||
35
flask_app.py
Normal file
35
flask_app.py
Normal file
@ -0,0 +1,35 @@
|
||||
import json
|
||||
|
||||
from flask import Flask, request
|
||||
from seleniumbase import SB
|
||||
import time
|
||||
|
||||
rest_port = 8090
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/url", methods=['POST'])
|
||||
def hello():
|
||||
data = request.get_json()
|
||||
|
||||
if not data or 'url' not in data:
|
||||
return json.dumps({"error": "URL parameter is required"}), 400
|
||||
|
||||
url = data['url']
|
||||
|
||||
with SB(uc=True) as sb:
|
||||
sb.driver.page_load_strategy="eager"
|
||||
sb.maximize_window()
|
||||
sb.activate_cdp_mode(url)
|
||||
sb.sleep(5)
|
||||
sb.uc_gui_click_captcha()
|
||||
time.sleep(10)
|
||||
|
||||
data = {
|
||||
"pageSource": sb.get_page_source()
|
||||
}
|
||||
|
||||
return json.dumps(data), 200, {'Content-Type': 'application/json'}
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host='0.0.0.0', port=rest_port)
|
||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
seleniumbase
|
||||
flask
|
||||
httpx
|
||||
Loading…
x
Reference in New Issue
Block a user