Skip to content

Commit

Permalink
Minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
MPCodeWriter21 committed Jul 25, 2022
1 parent 6154fd0 commit 1c9e7d7
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 35 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/automatic-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ jobs:
- name: Build
run: |
python setup.py sdist bdist_wheel
pip install dist/*.whl
echo "PACKAGE_VERSION=$(python -c 'import ProxyEater; print(ProxyEater.__version__)')" >> $GITHUB_ENV
- uses: "marvinpinto/action-automatic-releases@latest"
with:
repo_token: "${{ secrets.GITHUB_TOKEN }}"
automatic_release_tag: "latest"
automatic_release_tag: "${{ env.PACKAGE_VERSION }}"
title: "Auto Build"
files: |
dist/*
2 changes: 1 addition & 1 deletion ProxyEater/Proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,13 @@ def check_proxy(proxy_: Proxy):
thread = threading.Thread(target=check_proxy, args=(proxy,))
threads.append(thread)
thread.start()
on_progress_callback(self, (i + 1) / length * 100)
while len(threads) >= threads_no:
for thread in threads:
if not thread.is_alive():
threads.remove(thread)
break
time.sleep(0.1)
on_progress_callback(self, (i + 1) / length * 100)

for thread in threads:
thread.join()
Expand Down
2 changes: 1 addition & 1 deletion ProxyEater/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ProxyEater
# CodeWriter21

__version__ = "1.4.0"
__version__ = "1.4.1"
__author__ = "CodeWriter21"
__email__ = "[email protected]"
__license__ = "Apache-2.0"
Expand Down
67 changes: 39 additions & 28 deletions ProxyEater/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ProxyEater.__main__.py
# CodeWriter21

import os
import sys
import json
import pathlib
Expand Down Expand Up @@ -60,7 +60,8 @@ def scrape(args):
for config in source_data:
progress_callback = finish_callback = error_callback = checking_callback = None
if args.verbose:
logger.progress_bar = log21.ProgressBar(format_='{prefix}{bar}{suffix} {percentage}%', style='{')
logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%',
style='{', additional_variables={'count': 0})

def progress_callback(scraper_: Scraper, progress: float, page: int):
logger.info(f'{scraper_.name}: Collected: {scraper_.proxies.count}; Page: {page}, {progress:.2f}%',
Expand All @@ -74,7 +75,7 @@ def error_callback(scraper_: Scraper, error: Exception):
logger.error(f'{scraper_.name}: {error.__class__.__name__}: {error}')

def checking_callback(proxy_list: ProxyList, progress: float):
logger.progress_bar(progress, 100)
logger.progress_bar(progress, 100, count=proxy_list.count)

logger.info(f'Scraping {config.get("id")}...')
scraper = Scraper(config.get('url'), config.get('parser'), method=config.get('method'),
Expand All @@ -100,18 +101,20 @@ def checking_callback(proxy_list: ProxyList, progress: float):
proxies.update(proxies_)
logger.info(f'Scraped {len(proxies)} proxies.')

if args.verbose:
logger.info(f'Writing {proxies.count} proxies to {args.output}...')
# Write to file
if args.format == 'text':
proxies.to_text_file(args.output, '\n')
elif args.format == 'json':
proxies.to_json_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
elif args.format == 'csv':
proxies.to_csv_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
if proxies.count > 0:
if args.verbose:
logger.info(f'Writing {proxies.count} proxies to {args.output}...')
# Write to file
if args.format == 'text':
proxies.to_text_file(args.output, '\n')
elif args.format == 'json':
proxies.to_json_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
elif args.format == 'csv':
proxies.to_csv_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
if proxies.count > 0:
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')


def check(args):
Expand All @@ -138,10 +141,11 @@ def check(args):
logger.error(f'The source format {args.source_format} is not valid.')
return

logger.progress_bar = log21.ProgressBar(format_='{prefix}{bar}{suffix} {percentage}%', style='{')
logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%', style='{',
additional_variables={'count': 0})

def checking_callback(proxy_list: ProxyList, progress: float):
logger.progress_bar(progress, 100)
logger.progress_bar(progress, 100, count=proxy_list.count)

# Check the proxies
count = proxies.count
Expand All @@ -154,16 +158,17 @@ def checking_callback(proxy_list: ProxyList, progress: float):
logger.info(f'Removed {count - proxies.count} dead proxies.')
logger.info(f'Alive proxies: {proxies.count}')

# Write to file
if args.format == 'text':
proxies.to_text_file(args.output, '\n')
elif args.format == 'json':
proxies.to_json_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
elif args.format == 'csv':
proxies.to_csv_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')
if proxies.count > 0:
# Write to file
if args.format == 'text':
proxies.to_text_file(args.output, '\n')
elif args.format == 'json':
proxies.to_json_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
elif args.format == 'csv':
proxies.to_csv_file(args.output, include_status=args.include_status,
include_geolocation=args.include_geolocation)
logger.info(f'Wrote {proxies.count} proxies to {args.output}.')


def main():
Expand Down Expand Up @@ -249,9 +254,15 @@ def main():
elif args.mode == 'check':
check(args)
except KeyboardInterrupt:
try:
terminal_size = os.get_terminal_size()[0] - 1
except OSError:
terminal_size = 50
if not terminal_size:
terminal_size = 50
logger.clear_line(terminal_size)
logger.error('KeyboardInterrupt: Exiting...')
sys.exit()
return


if __name__ == '__main__':
Expand Down
26 changes: 25 additions & 1 deletion ProxyEater/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@
}
},
{
"id": "github.com/TheSpeedX/PROXY-List",
"id": "github.com/TheSpeedX/PROXY-List/http.txt",
"url": "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt",
"method": "GET",
"parser": {
Expand All @@ -248,6 +248,30 @@
}
}
},
{
"id": "github.com/TheSpeedX/PROXY-List/socks5.txt",
"url": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt",
"method": "GET",
"parser": {
"text": {
"type": {
"default": "SOCKS5"
}
}
}
},
{
"id": "github.com/TheSpeedX/PROXY-List/socks4",
"url": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
"method": "GET",
"parser": {
"text": {
"type": {
"default": "SOCKS4"
}
}
}
},
{
"id": "https://github.com/mertguvencli/http-proxy-list",
"url": "https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ProxyEater\[1.4.0\]
ProxyEater\[1.4.1\]
===================

![version](https://img.shields.io/pypi/v/ProxyEater)
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='ProxyEater',
version='1.4.0',
version='1.4.1',
author='CodeWriter21',
author_email='[email protected]',
description='A Python Proxy Scraper for gathering fresh proxies.',
Expand All @@ -20,7 +20,8 @@
'ProxyEater=ProxyEater.__main__:main'
]
},
install_requires=['requests', 'beautifulsoup4', 'lxml', 'pandas', 'html5lib', 'log21', 'importlib_resources'],
install_requires=['requests', 'beautifulsoup4', 'lxml', 'pandas', 'html5lib', 'log21>=2.3.3',
'importlib_resources', 'random_user_agent'],
classifiers=[
'Programming Language :: Python :: 3',
],
Expand Down

0 comments on commit 1c9e7d7

Please sign in to comment.