diff --git a/.github/workflows/automatic-release.yml b/.github/workflows/automatic-release.yml index 0a609a4..55dc002 100644 --- a/.github/workflows/automatic-release.yml +++ b/.github/workflows/automatic-release.yml @@ -23,10 +23,12 @@ jobs: - name: Build run: | python setup.py sdist bdist_wheel + pip install dist/*.whl + echo "PACKAGE_VERSION=$(python -c 'import ProxyEater; print(ProxyEater.__version__)')" >> $GITHUB_ENV - uses: "marvinpinto/action-automatic-releases@latest" with: repo_token: "${{ secrets.GITHUB_TOKEN }}" - automatic_release_tag: "latest" + automatic_release_tag: "${{ env.PACKAGE_VERSION }}" title: "Auto Build" files: | dist/* diff --git a/ProxyEater/Proxy.py b/ProxyEater/Proxy.py index a22743f..90af38a 100644 --- a/ProxyEater/Proxy.py +++ b/ProxyEater/Proxy.py @@ -243,13 +243,13 @@ def check_proxy(proxy_: Proxy): thread = threading.Thread(target=check_proxy, args=(proxy,)) threads.append(thread) thread.start() - on_progress_callback(self, (i + 1) / length * 100) while len(threads) >= threads_no: for thread in threads: if not thread.is_alive(): threads.remove(thread) break time.sleep(0.1) + on_progress_callback(self, (i + 1) / length * 100) for thread in threads: thread.join() diff --git a/ProxyEater/__init__.py b/ProxyEater/__init__.py index dc33aa0..2025fd4 100644 --- a/ProxyEater/__init__.py +++ b/ProxyEater/__init__.py @@ -1,7 +1,7 @@ # ProxyEater # CodeWriter21 -__version__ = "1.4.0" +__version__ = "1.4.1" __author__ = "CodeWriter21" __email__ = "CodeWriter21@gmail.com" __license__ = "Apache-2.0" diff --git a/ProxyEater/__main__.py b/ProxyEater/__main__.py index 3520086..e8718bd 100644 --- a/ProxyEater/__main__.py +++ b/ProxyEater/__main__.py @@ -1,6 +1,6 @@ # ProxyEater.__main__.py # CodeWriter21 - +import os import sys import json import pathlib @@ -60,7 +60,8 @@ def scrape(args): for config in source_data: progress_callback = finish_callback = error_callback = checking_callback = None if args.verbose: - logger.progress_bar = log21.ProgressBar(format_='{prefix}{bar}{suffix} {percentage}%', style='{') + logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%', + style='{', additional_variables={'count': 0}) def progress_callback(scraper_: Scraper, progress: float, page: int): logger.info(f'{scraper_.name}: Collected: {scraper_.proxies.count}; Page: {page}, {progress:.2f}%', @@ -74,7 +75,7 @@ def error_callback(scraper_: Scraper, error: Exception): logger.error(f'{scraper_.name}: {error.__class__.__name__}: {error}') def checking_callback(proxy_list: ProxyList, progress: float): - logger.progress_bar(progress, 100) + logger.progress_bar(progress, 100, count=proxy_list.count) logger.info(f'Scraping {config.get("id")}...') scraper = Scraper(config.get('url'), config.get('parser'), method=config.get('method'), @@ -100,18 +101,20 @@ def checking_callback(proxy_list: ProxyList, progress: float): proxies.update(proxies_) logger.info(f'Scraped {len(proxies)} proxies.') - if args.verbose: - logger.info(f'Writing {proxies.count} proxies to {args.output}...') - # Write to file - if args.format == 'text': - proxies.to_text_file(args.output, '\n') - elif args.format == 'json': - proxies.to_json_file(args.output, include_status=args.include_status, - include_geolocation=args.include_geolocation) - elif args.format == 'csv': - proxies.to_csv_file(args.output, include_status=args.include_status, - include_geolocation=args.include_geolocation) - logger.info(f'Wrote {proxies.count} proxies to {args.output}.') + if proxies.count > 0: + if args.verbose: + logger.info(f'Writing {proxies.count} proxies to {args.output}...') + # Write to file + if args.format == 'text': + proxies.to_text_file(args.output, '\n') + elif args.format == 'json': + proxies.to_json_file(args.output, include_status=args.include_status, + include_geolocation=args.include_geolocation) + elif args.format == 'csv': + proxies.to_csv_file(args.output, include_status=args.include_status, + include_geolocation=args.include_geolocation) + if proxies.count > 0: + logger.info(f'Wrote {proxies.count} proxies to {args.output}.') def check(args): @@ -138,10 +141,11 @@ def check(args): logger.error(f'The source format {args.source_format} is not valid.') return - logger.progress_bar = log21.ProgressBar(format_='{prefix}{bar}{suffix} {percentage}%', style='{') + logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%', style='{', + additional_variables={'count': 0}) def checking_callback(proxy_list: ProxyList, progress: float): - logger.progress_bar(progress, 100) + logger.progress_bar(progress, 100, count=proxy_list.count) # Check the proxies count = proxies.count @@ -154,16 +158,17 @@ def checking_callback(proxy_list: ProxyList, progress: float): logger.info(f'Removed {count - proxies.count} dead proxies.') logger.info(f'Alive proxies: {proxies.count}') - # Write to file - if args.format == 'text': - proxies.to_text_file(args.output, '\n') - elif args.format == 'json': - proxies.to_json_file(args.output, include_status=args.include_status, - include_geolocation=args.include_geolocation) - elif args.format == 'csv': - proxies.to_csv_file(args.output, include_status=args.include_status, - include_geolocation=args.include_geolocation) - logger.info(f'Wrote {proxies.count} proxies to {args.output}.') + if proxies.count > 0: + # Write to file + if args.format == 'text': + proxies.to_text_file(args.output, '\n') + elif args.format == 'json': + proxies.to_json_file(args.output, include_status=args.include_status, + include_geolocation=args.include_geolocation) + elif args.format == 'csv': + proxies.to_csv_file(args.output, include_status=args.include_status, + include_geolocation=args.include_geolocation) + logger.info(f'Wrote {proxies.count} proxies to {args.output}.') def main(): @@ -249,9 +254,15 @@ def main(): elif args.mode == 'check': check(args) except KeyboardInterrupt: + try: + terminal_size = os.get_terminal_size()[0] - 1 + except OSError: + terminal_size = 50 + if not terminal_size: + terminal_size = 50 + logger.clear_line(terminal_size) logger.error('KeyboardInterrupt: Exiting...') sys.exit() - return if __name__ == '__main__': diff --git a/ProxyEater/sources.json b/ProxyEater/sources.json index 5944487..caa6ed7 100644 --- a/ProxyEater/sources.json +++ b/ProxyEater/sources.json @@ -237,7 +237,7 @@ } }, { - "id": "github.com/TheSpeedX/PROXY-List", + "id": "github.com/TheSpeedX/PROXY-List/http.txt", "url": "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt", "method": "GET", "parser": { @@ -248,6 +248,30 @@ } } }, + { + "id": "github.com/TheSpeedX/PROXY-List/socks5.txt", + "url": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt", + "method": "GET", + "parser": { + "text": { + "type": { + "default": "SOCKS5" + } + } + } + }, + { + "id": "github.com/TheSpeedX/PROXY-List/socks4", + "url": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt", + "method": "GET", + "parser": { + "text": { + "type": { + "default": "SOCKS4" + } + } + } + }, { "id": "https://github.com/mertguvencli/http-proxy-list", "url": "https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt", diff --git a/README.md b/README.md index 31b2932..eb06fe0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -ProxyEater\[1.4.0\] +ProxyEater\[1.4.1\] =================== ![version](https://img.shields.io/pypi/v/ProxyEater) diff --git a/setup.py b/setup.py index 8e43b3d..e304578 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='ProxyEater', - version='1.4.0', + version='1.4.1', author='CodeWriter21', author_email='CodeWriter21@gmail.com', description='A Python Proxy Scraper for gathering fresh proxies.', @@ -20,7 +20,8 @@ 'ProxyEater=ProxyEater.__main__:main' ] }, - install_requires=['requests', 'beautifulsoup4', 'lxml', 'pandas', 'html5lib', 'log21', 'importlib_resources'], + install_requires=['requests', 'beautifulsoup4', 'lxml', 'pandas', 'html5lib', 'log21>=2.3.3', + 'importlib_resources', 'random_user_agent'], classifiers=[ 'Programming Language :: Python :: 3', ],