From c987b41c9b94af2e6922491540cca7572a121ba2 Mon Sep 17 00:00:00 2001 From: jhinzmann Date: Sun, 18 Oct 2015 02:23:30 +0200 Subject: [PATCH] Add support for `.jshintignore` files Search for `.jshintignore` files in parent directories and evaluate ignore patterns by utilizing the python pathspec library This resolves #59 and resolves #113 --- JSHint.py | 51 +++ python-path-specification/.gitignore | 34 ++ python-path-specification/CHANGES.rst | 61 +++ python-path-specification/LICENSE | 373 ++++++++++++++++++ python-path-specification/MANIFEST.in | 1 + python-path-specification/README.rst | 139 +++++++ .../pathspec/__init__.py | 32 ++ python-path-specification/pathspec/compat.py | 23 ++ .../pathspec/gitignore.py | 261 ++++++++++++ .../pathspec/pathspec.py | 98 +++++ python-path-specification/pathspec/pattern.py | 93 +++++ .../pathspec/tests/__init__.py | 0 .../pathspec/tests/test_gitignore.py | 220 +++++++++++ .../pathspec/tests/test_pathspec.py | 35 ++ .../pathspec/tests/test_util.py | 273 +++++++++++++ python-path-specification/pathspec/util.py | 261 ++++++++++++ python-path-specification/pypi-upload.sh | 2 + python-path-specification/setup.cfg | 1 + python-path-specification/setup.py | 40 ++ python-path-specification/tox.ini | 5 + 20 files changed, 2003 insertions(+) create mode 100644 python-path-specification/.gitignore create mode 100644 python-path-specification/CHANGES.rst create mode 100644 python-path-specification/LICENSE create mode 100644 python-path-specification/MANIFEST.in create mode 100644 python-path-specification/README.rst create mode 100644 python-path-specification/pathspec/__init__.py create mode 100644 python-path-specification/pathspec/compat.py create mode 100644 python-path-specification/pathspec/gitignore.py create mode 100644 python-path-specification/pathspec/pathspec.py create mode 100644 python-path-specification/pathspec/pattern.py create mode 100644 python-path-specification/pathspec/tests/__init__.py create mode 100644 python-path-specification/pathspec/tests/test_gitignore.py create mode 100644 python-path-specification/pathspec/tests/test_pathspec.py create mode 100644 python-path-specification/pathspec/tests/test_util.py create mode 100644 python-path-specification/pathspec/util.py create mode 100755 python-path-specification/pypi-upload.sh create mode 100644 python-path-specification/setup.cfg create mode 100644 python-path-specification/setup.py create mode 100644 python-path-specification/tox.ini diff --git a/JSHint.py b/JSHint.py index 2aafc7a..16aa1a6 100644 --- a/JSHint.py +++ b/JSHint.py @@ -6,6 +6,10 @@ import os, sys, subprocess, codecs, re, webbrowser from threading import Timer +sys.path.append(os.path.join(os.path.dirname(__file__), "python-path-specification")) + +import pathspec + try: import commands except ImportError: @@ -13,6 +17,7 @@ PLUGIN_FOLDER = os.path.dirname(os.path.realpath(__file__)) RC_FILE = ".jshintrc" +IGNORE_FILE = ".jshintignore" SETTINGS_FILE = "JSHint.sublime-settings" KEYMAP_FILE = "Default ($PLATFORM).sublime-keymap" OUTPUT_VALID = b"*** JSHint output ***" @@ -23,6 +28,13 @@ def run(self, edit, show_regions=True, show_panel=True): if self.file_unsupported(): return + # Do not lint if file is ignored by a .jshintignore, reset if linted before + if self.file_ignored(): + JshintGlobalStore.reset() + JshintEventListeners.reset() + self.view.erase_regions("jshint_errors") + return + # Get the current text in the buffer and save it in a temporary file. # This allows for scratch buffers and dirty files to be linted as well. temp_file_path = self.save_buffer_to_temp_file() @@ -74,6 +86,45 @@ def file_unsupported(self): has_json_syntax = bool(re.search("JSON", view_settings.get("syntax"), re.I)) return has_json_syntax or (not has_js_or_html_extension and not has_js_or_html_syntax) + def file_ignored(self): + """Check if current file is matched by a .jshintignore file. + Use pathspec library to evaluate the .jshintignore patterns. + """ + ignore_files = self.find_jshintignore_files() + + for ignorefile in reversed(ignore_files): + with open(ignorefile) as jshintignore: + spec = pathspec.PathSpec.from_lines('gitignore', jshintignore) + matches = spec.match_tree(os.path.dirname(ignorefile)) + for match in matches: + abs_match = os.path.join(os.path.dirname(ignorefile),match) + if abs_match == self.view.file_name(): + if PluginUtils.get_pref('print_diagnostics'): + print(abs_match + " ignored in " + ignorefile) + return True + + return False + + def find_jshintignore_files(self): + """Search for .jshintignore files along the root path of the current file. + Return a list of paths to .jshintignore files found, sorted from root to current parent directory. + """ + parent_dir = os.path.abspath(os.path.join(self.view.file_name(), os.pardir)) + root_path = os.path.abspath(os.sep) + + ignore_files = [] + + if os.path.isfile(os.path.join(root_path, IGNORE_FILE)): + ignorefiles.append(os.path.join(root_path, IGNORE_FILE)) + + for path in parent_dir.split(os.sep): + root_path = os.path.join(root_path, path) + ignore_file_path = os.path.join(root_path, IGNORE_FILE) + if os.path.isfile(ignore_file_path): + ignore_files.append(ignore_file_path) + + return ignore_files + def save_buffer_to_temp_file(self): buffer_text = self.view.substr(sublime.Region(0, self.view.size())) temp_file_name = ".__temp__" diff --git a/python-path-specification/.gitignore b/python-path-specification/.gitignore new file mode 100644 index 0000000..5212c65 --- /dev/null +++ b/python-path-specification/.gitignore @@ -0,0 +1,34 @@ +# Compiled python files. +*.py[cod] + +# C extensions. +*.so + +# Packages. +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 +__pycache__ +/MANIFEST +.tox + +# Installer logs. +pip-log.txt + +# Temporary files. +*~ +*.swp + +# Hidden files. +.* +!.gitignore diff --git a/python-path-specification/CHANGES.rst b/python-path-specification/CHANGES.rst new file mode 100644 index 0000000..e3ce289 --- /dev/null +++ b/python-path-specification/CHANGES.rst @@ -0,0 +1,61 @@ + +Change History +============== + + +0.3.4 (2015-08-24) +------------------ + +- Issue #7: Fixed non-recursive links. +- Issue #8: Fixed edge cases in gitignore patterns. +- Issue #9: Fixed minor usage documentation. +- Fixed recursion detection. +- Fixed trivial incompatibility with Python 3.2. + + +0.3.3 (2014-11-21) +------------------ + +- Improved documentation. + + +0.3.2 (2014-11-08) +------------------ + +- Improved documentation. +- Issue #6: Fixed matching Windows paths. +- API change: `spec.match_tree` and `spec.match_files` now return iterators instead of sets + + +0.3.1 (2014-09-17) +------------------ + +- Updated README. + + +0.3.0 (2014-09-17) +------------------ + +- Added registered patterns. +- Issue #3: Fixed trailing slash in gitignore patterns. +- Issue #4: Fixed test for trailing slash in gitignore patterns. + + +0.2.2 (2013-12-17) +------------------ + +- Fixed setup.py + + +0.2.1 (2013-12-17) +------------------ + +- Added tests. +- Fixed comment gitignore patterns. +- Fixed relative path gitignore patterns. + + +0.2.0 (2013-12-07) +------------------ + +- Initial release. diff --git a/python-path-specification/LICENSE b/python-path-specification/LICENSE new file mode 100644 index 0000000..14e2f77 --- /dev/null +++ b/python-path-specification/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/python-path-specification/MANIFEST.in b/python-path-specification/MANIFEST.in new file mode 100644 index 0000000..bb37a27 --- /dev/null +++ b/python-path-specification/MANIFEST.in @@ -0,0 +1 @@ +include *.rst diff --git a/python-path-specification/README.rst b/python-path-specification/README.rst new file mode 100644 index 0000000..1171f37 --- /dev/null +++ b/python-path-specification/README.rst @@ -0,0 +1,139 @@ + +*pathspec*: Path Specification +============================== + +*pathspec* is a utility library for pattern matching of file paths. So +far this only includes `gitignore`_ style pattern matching which itself +incorporates POSIX `glob`_ patterns. + +.. _`gitignore`: http://git-scm.com/docs/gitignore +.. _`glob`: http://man7.org/linux/man-pages/man7/glob.7.html + + +Tutorial +-------- + +Say you have a "Projects" directory and you want to back it up, but only +certain files, and ignore others depending on certain conditions:: + + >>> import pathspec + >>> # The gitignore-style patterns for files to select, but we're including + >>> # instead of ignoring. + >>> spec = """ + ... + ... # This is a comment because the line begins with a hash: "#" + ... + ... # Include several project directories (and all descendants) relative to + ... # the current directory. To reference a directory you must end with a + ... # slash: "/" + ... /project-a/ + ... /project-b/ + ... /project-c/ + ... + ... # Patterns can be negated by prefixing with exclamation mark: "!" + ... + ... # Ignore temporary files beginning or ending with "~" and ending with + ... # ".swp". + ... !~* + ... !*~ + ... !*.swp + ... + ... # These are python projects so ignore compiled python files from + ... # testing. + ... !*.pyc + ... + ... # Ignore the build directories but only directly under the project + ... # directories. + ... !/*/build/q + ... + ... """ + +We want to use the ``GitIgnorePattern`` class to compile our patterns, and the +``PathSpec`` to provide an iterface around them:: + + >>> spec = pathspec.PathSpec.from_lines(pathspec.GitIgnorePattern, spec.splitlines()) + +That may be a mouthful but it allows for additional patterns to be implemented +in the future without them having to deal with anything but matching the paths +sent to them. ``GitIgnorePattern`` is the implementation of the actual pattern +which internally gets converted into a regular expression. ``PathSpec`` is a +simple wrapper around a list of compiled patterns. + +To make things simpler, we can use the registered name for a pattern class +instead of always having to provide a reference to the class itself. The +``GitIgnorePattern`` class is registered as **gitignore**:: + + >>> spec = pathspec.PathSpec.from_lines('gitignore', spec.splitlines()) + +If we wanted to manually compile the patterns we can just do the following:: + + >>> patterns = map(pathspec.GitIgnorePattern, spec.splitlines()) + >>> spec = PathSpec(patterns) + +``PathSpec.from_lines()`` is simply a simple class method to do just that. + +If you want to load the patterns from file, you can pass the instance directly +as well:: + + >>> with open('patterns.list', 'r') as fh: + >>> spec = pathspec.PathSpec.from_lines('gitignore', fh) + +You can perform matching on a whole directory tree with:: + + >>> matches = spec.match_tree('path/to/directory') + +Or you can perform matching on a specific set of file paths with:: + + >>> matches = spec.match_files(file_paths) + + + +License +------- + +*pathspec* is licensed under the `Mozilla Public License Version 2.0`_. See +`LICENSE`_ or the `FAQ`_ for more information. + +In summary, you may use *pathspec* with any closed or open source project +without affecting the license of the larger work so long as you: + +- give credit where credit is due, + +- and release any custom changes made to *pathspec*. + +.. _`Mozilla Public License Version 2.0`: http://www.mozilla.org/MPL/2.0 +.. _`LICENSE`: LICENSE +.. _`FAQ`: http://www.mozilla.org/MPL/2.0/FAQ.html + + +Source +------ + +The source code for *pathspec* is available from the GitHub repo +`cpburnz/python-path-specification`_. + +.. _`cpburnz/python-path-specification`: https://github.com/cpburnz/python-path-specification + + +Installation +------------ + +*pathspec* requires the following packages: + +- `setuptools`_ + +*pathspec* can be installed from source with:: + + python setup.py install + +*pathspec* is also available for install through `PyPI`_:: + + pip install pathspec + +.. _`setuptools`: https://pypi.python.org/pypi/setuptools +.. _`PyPI`: http://pypi.python.org/pypi/pathspec + + +.. image:: https://d2weczhvl823v0.cloudfront.net/cpburnz/python-path-specification/trend.png + :alt: Bitdeli badge + :target: https://bitdeli.com/free diff --git a/python-path-specification/pathspec/__init__.py b/python-path-specification/pathspec/__init__.py new file mode 100644 index 0000000..e93d2cc --- /dev/null +++ b/python-path-specification/pathspec/__init__.py @@ -0,0 +1,32 @@ +# encoding: utf-8 +""" +The *pathspec* package provides pattern matching for file paths. So far +this only includes gitignore style pattern matching. + +See "README.rst" or +for more information. Or you can always scour the source code. +""" +from __future__ import unicode_literals + +__author__ = "Caleb P. Burns" +__copyright__ = "Copyright © 2013-2015 Caleb P. Burns" +__created__ = "2013-10-12" +__credits__ = [ + "dahlia ", + "highb ", + "029xue ", + "mikexstudios ", + "nhumrich ", + "davidfraser ", +] +__email__ = "cpburnz@gmail.com" +__license__ = "MPL 2.0" +__project__ = "pathspec" +__status__ = "Development" +__updated__ = "2015-08-24" +__version__ = "0.3.5.dev1" + +from .gitignore import GitIgnorePattern +from .pathspec import PathSpec +from .pattern import Pattern, RegexPattern +from .util import iter_tree, match_files, RecursionError diff --git a/python-path-specification/pathspec/compat.py b/python-path-specification/pathspec/compat.py new file mode 100644 index 0000000..4f9e450 --- /dev/null +++ b/python-path-specification/pathspec/compat.py @@ -0,0 +1,23 @@ +# encoding: utf-8 +""" +This module provides compatibility between Python 2 and 3. Hardly +anything is used by this project to constitute including `six`_. + +.. _`six`: http://pythonhosted.org/six +""" + +import sys + +if sys.version_info[0] < 3: + # Python 2. + string_types = (basestring,) + + def viewkeys(mapping): + return mapping.viewkeys() + +else: + # Python 3. + string_types = (str,) + + def viewkeys(mapping): + return mapping.keys() diff --git a/python-path-specification/pathspec/gitignore.py b/python-path-specification/pathspec/gitignore.py new file mode 100644 index 0000000..1576100 --- /dev/null +++ b/python-path-specification/pathspec/gitignore.py @@ -0,0 +1,261 @@ +# encoding: utf-8 +""" +This module implements gitignore style pattern matching which +incorporates POSIX glob patterns. +""" + +import re + +from . import util +from .compat import string_types +from .pattern import RegexPattern + + +class GitIgnorePattern(RegexPattern): + """ + The ``GitIgnorePattern`` class represents a compiled gitignore + pattern. + """ + + # Keep the dict-less class hierarchy. + __slots__ = () + + def __init__(self, pattern): + """ + Initializes the ``GitIgnorePattern`` instance. + + *pattern* (``str``) is the gitignore pattern. + """ + + if not isinstance(pattern, string_types): + raise TypeError("pattern:{!r} is not a string.".format(pattern)) + + pattern = pattern.strip() + + if pattern.startswith('#'): + # A pattern starting with a hash ('#') serves as a comment + # (neither includes nor excludes files). Escape the hash with a + # back-slash to match a literal hash (i.e., '\#'). + regex = None + include = None + + elif pattern == '/': + # EDGE CASE: According to git check-ignore (v2.4.1)), a single '/' + # does not match any file. + regex = None + include = None + + elif pattern: + + if pattern.startswith('!'): + # A pattern starting with an exclamation mark ('!') negates the + # pattern (exclude instead of include). Escape the exclamation + # mark with a back-slash to match a literal exclamation mark + # (i.e., '\!'). + include = False + # Remove leading exclamation mark. + pattern = pattern[1:] + else: + include = True + + if pattern.startswith('\\'): + # Remove leading back-slash escape for escaped hash ('#') or + # exclamation mark ('!'). + pattern = pattern[1:] + + # Split pattern into segments. + pattern_segs = pattern.split('/') + + # Normalize pattern to make processing easier. + + if not pattern_segs[0]: + # A pattern beginning with a slash ('/') will only match paths + # directly on the root directory instead of any descendant + # paths. So, remove empty first segment to make pattern relative + # to root. + del pattern_segs[0] + elif len(pattern_segs) == 1 or \ + (len(pattern_segs) == 2 and not pattern_segs[1]): + # A **single** pattern without a beginning slash ('/') will + # match any descendant path. This is equivalent to + # "**/{pattern}". So, prepend with double-asterisks to make + # pattern relative to root. + # EDGE CASE: This also holds for a single pattern with a + # trailing slash (e.g. dir/). + if pattern_segs[0] != '**': + pattern_segs.insert(0, '**') + else: + # EDGE CASE: A pattern without a beginning slash ('/') but + # contains at least one prepended directory (e.g. + # "dir/{pattern}") should not match "**/dir/{pattern}", + # according to `git check-ignore` (v2.4.1). + pass + + if not pattern_segs[-1] and len(pattern_segs) > 1: + # A pattern ending with a slash ('/') will match all descendant + # paths if it is a directory but not if it is a regular file. + # This is equivilent to "{pattern}/**". So, set last segment to + # double asterisks to include all descendants. + pattern_segs[-1] = '**' + + # Build regular expression from pattern. + regex = ['^'] + need_slash = False + end = len(pattern_segs) - 1 + for i, seg in enumerate(pattern_segs): + if seg == '**': + if i == 0 and i == end: + # A pattern consisting solely of double-asterisks ('**') + # will match every path. + regex.append('.+') + elif i == 0: + # A normalized pattern beginning with double-asterisks + # ('**') will match any leading path segments. + regex.append('(?:.+/)?') + need_slash = False + elif i == end: + # A normalized pattern ending with double-asterisks ('**') + # will match any trailing path segments. + regex.append('/.*') + else: + # A pattern with inner double-asterisks ('**') will match + # multiple (or zero) inner path segments. + regex.append('(?:/.+)?') + need_slash = True + elif seg == '*': + # Match single path segment. + if need_slash: + regex.append('/') + regex.append('[^/]+') + need_slash = True + else: + # Match segment glob pattern. + if need_slash: + regex.append('/') + regex.append(self._translate_segment_glob(seg)) + if i == end and include == True: + # A pattern ending without a slash ('/') will match a file + # or a directory (with paths underneath it). + # e.g. foo matches: foo, foo/bar, foo/bar/baz, etc. + # EDGE CASE: However, this does not hold for exclusion cases + # according to `git check-ignore` (v2.4.1). + regex.append('(?:/.*)?') + need_slash = True + regex.append('$') + regex = ''.join(regex) + + else: + # A blank pattern is a null-operation (neither includes nor + # excludes files). + regex = None + include = None + + super(GitIgnorePattern, self).__init__(regex, include) + + @staticmethod + def _translate_segment_glob(pattern): + """ + Translates the glob pattern to a regular expression. This is used in + the constructor to translate a path segment glob pattern to its + corresponding regular expression. + + *pattern* (``str``) is the glob pattern. + + Returns the regular expression (``str``). + """ + # NOTE: This is derived from `fnmatch.translate()` and is similar to + # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. + + escape = False + regex = '' + i, end = 0, len(pattern) + while i < end: + # Get next character. + char = pattern[i] + i += 1 + + if escape: + # Escape the character. + escape = False + regex += re.escape(char) + + elif char == '\\': + # Escape character, escape next character. + escape = True + + elif char == '*': + # Multi-character wildcard. Match any string (except slashes), + # including an empty string. + regex += '[^/]*' + + elif char == '?': + # Single-character wildcard. Match any single character (except + # a slash). + regex += '[^/]' + + elif char == '[': + # Braket expression wildcard. Except for the beginning + # exclamation mark, the whole braket expression can be used + # directly as regex but we have to find where the expression + # ends. + # - "[][!]" matchs ']', '[' and '!'. + # - "[]-]" matchs ']' and '-'. + # - "[!]a-]" matchs any character except ']', 'a' and '-'. + j = i + # Pass brack expression negation. + if j < end and pattern[j] == '!': + j += 1 + # Pass first closing braket if it is at the beginning of the + # expression. + if j < end and pattern[j] == ']': + j += 1 + # Find closing braket. Stop once we reach the end or find it. + while j < end and pattern[j] != ']': + j += 1 + + if j < end: + # Found end of braket expression. Increment j to be one past + # the closing braket: + # + # [...] + # ^ ^ + # i j + # + j += 1 + expr = '[' + + if pattern[i] == '!': + # Braket expression needs to be negated. + expr += '^' + i += 1 + elif pattern[i] == '^': + # POSIX declares that the regex braket expression negation + # "[^...]" is undefined in a glob pattern. Python's + # `fnmatch.translate()` escapes the caret ('^') as a + # literal. To maintain consistency with undefined behavior, + # I am escaping the '^' as well. + expr += '\\^' + i += 1 + + # Build regex braket expression. Escape slashes so they are + # treated as literal slashes by regex as defined by POSIX. + expr += pattern[i:j].replace('\\', '\\\\') + + # Add regex braket expression to regex result. + regex += expr + + # Set i to one past the closing braket. + i = j + + else: + # Failed to find closing braket, treat opening braket as a + # braket literal instead of as an expression. + regex += '\\[' + + else: + # Regular character, escape it for regex. + regex += re.escape(char) + + return regex + +util.register_pattern('gitignore', GitIgnorePattern) diff --git a/python-path-specification/pathspec/pathspec.py b/python-path-specification/pathspec/pathspec.py new file mode 100644 index 0000000..100d5ec --- /dev/null +++ b/python-path-specification/pathspec/pathspec.py @@ -0,0 +1,98 @@ +# encoding: utf-8 +""" +This module provides an object oriented interface for pattern matching +of files. +""" + +import collections + +from . import util +from .compat import string_types, viewkeys + + +class PathSpec(object): + """ + The ``PathSpec`` instance is a wrapper around a list of compiled + ``pathspec.Pattern`` instances. + """ + + def __init__(self, patterns): + """ + Initializes the ``PathSpec`` instance. + + *patterns* (``Container`` or ``Iterable``) yields each compiled + pattern (``pathspec.Pattern``). + """ + + self.patterns = None + """ + *patterns* (``Container``) contains the compiled patterns, + """ + + self.patterns = patterns if isinstance(patterns, collections.Container) else list(patterns) + + def __len__(self): + """ + Returns the number of compiled patterns this path-spec contains + (``int``). + """ + return len(self.patterns) + + @classmethod + def from_lines(cls, pattern_factory, lines): + """ + Compiles the pattern lines. + + *pattern_factory* can be either the name of a registered pattern + factory (``str``), or a ``callable`` used to compile patterns. It + must accept an uncompiled pattern (``str``) and return the compiled + pattern (``pathspec.Pattern``). + + *lines* (``Iterable``) yields each uncompiled pattern (``str``). + This simply has to yield each line so it can be a ``file`` (e.g., + ``open(file)`` or ``io.StringIO(text)``) or the result from + ``str.splitlines()``. + + Returns the ``PathSpec`` instance. + """ + if isinstance(pattern_factory, string_types): + pattern_factory = util.lookup_pattern(pattern_factory) + if not callable(pattern_factory): + raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory)) + + lines = [pattern_factory(line) for line in lines if line] + return cls(lines) + + def match_files(self, files, separators=None): + """ + Matches the files to this path-spec. + + *files* (``Iterable`` of ``str``) contains the files to be matched + against *patterns*. + + *separators* (``Container`` of ``str``) optionally contains the path + separators to normalize. This does not need to include the POSIX + path separator (`/`), but including it will not affect the results. + Default is ``None`` to determine the separators based upon the + current operating system by examining `os.sep` and `os.altsep`. To + prevent normalization, pass an empty container (e.g., an empty tuple + `()`). + + Returns the matched files (``Iterable`` of ``str``). + """ + file_map = util.normalize_files(files, separators=separators) + matched_files = util.match_files(self.patterns, viewkeys(file_map)) + for path in matched_files: + yield file_map[path] + + def match_tree(self, root): + """ + Walks the specified root path for all files and matches them to this + path-spec. + + *root* (``str``) is the root directory to search for files. + + Returns the matched files (``Iterable`` of ``str``). + """ + files = util.iter_tree(root) + return self.match_files(files) diff --git a/python-path-specification/pathspec/pattern.py b/python-path-specification/pathspec/pattern.py new file mode 100644 index 0000000..f46dbf1 --- /dev/null +++ b/python-path-specification/pathspec/pattern.py @@ -0,0 +1,93 @@ +# encoding: utf-8 +""" +This module provides the base definition for patterns. +""" + +import re + +from .compat import string_types + + +class Pattern(object): + """ + The ``Pattern`` class is the abstract definition of a pattern. + """ + + # Make the class dict-less. + __slots__ = ('include',) + + def __init__(self, include): + """ + Initializes the ``Pattern`` instance. + + *include* (``bool``) is whether the matched files should be included + (``True``), excluded (``False``), or is a null-operation (``None``). + """ + + self.include = include + """ + *include* (``bool``) is whether the matched files should be included + (``True``), excluded (``False``), or is a null-operation (``None``). + """ + + def match(self, files): + """ + Matches this pattern against the specified files. + + *files* (``Iterable``) contains each file (``str``) relative to the + root directory (e.g., "relative/path/to/file"). + + Returns an ``Iterable`` yielding each matched file path (``str``). + """ + raise NotImplementedError("{}.{} must override match().".format(self.__class__.__module__, self.__class__.__name__)) + + +class RegexPattern(Pattern): + """ + The ``RegexPattern`` class is an implementation of a pattern using + regular expressions. + """ + + # Make the class dict-less. + __slots__ = ('regex',) + + def __init__(self, regex, *args, **kw): + """ + Initializes the ``RegexPattern`` instance. + + *regex* (``RegexObject`` or ``str``) is the regular expression for + the pattern. + + `*args` are positional arguments to send to the ``Pattern`` + constructor. + + `**kw` are keyword arguments to send to the ``Pattern`` constructor. + """ + + self.regex = None + """ + *regex* (``RegexObject``) is the regular expression for the pattern. + """ + + # NOTE: Make sure to allow a null regex pattern to be passed for a + # null-operation. + if isinstance(regex, string_types): + regex = re.compile(regex) + + super(RegexPattern, self).__init__(*args, **kw) + + self.regex = regex + + def match(self, files): + """ + Matches this pattern against the specified files. + + *files* (``Iterable``) contains each file (``str``) relative to the + root directory (e.g., "relative/path/to/file"). + + Returns an ``Iterable`` yielding each matched file path (``str``). + """ + if self.include is not None: + for path in files: + if self.regex.match(path) is not None: + yield path diff --git a/python-path-specification/pathspec/tests/__init__.py b/python-path-specification/pathspec/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python-path-specification/pathspec/tests/test_gitignore.py b/python-path-specification/pathspec/tests/test_gitignore.py new file mode 100644 index 0000000..af1ee7a --- /dev/null +++ b/python-path-specification/pathspec/tests/test_gitignore.py @@ -0,0 +1,220 @@ +# encoding: utf-8 +""" +This script tests ``GitIgnorePattern``. +""" + +import unittest + +import pathspec.util +from pathspec import GitIgnorePattern + +class GitIgnoreTest(unittest.TestCase): + """ + The ``GitIgnoreTest`` class tests the ``GitIgnorePattern`` + implementation. + """ + + def test_00_empty(self): + """ + Tests an empty pattern. + """ + spec = GitIgnorePattern('') + self.assertIsNone(spec.include) + self.assertIsNone(spec.regex) + + def test_01_absolute_root(self): + """ + Tests a single root absolute path pattern. + + This should NOT match any file (according to git check-ignore (v2.4.1)). + """ + spec = GitIgnorePattern('/') + self.assertIsNone(spec.include) + self.assertIsNone(spec.regex) + + def test_01_absolute(self): + """ + Tests an absolute path pattern. + + This should match: + an/absolute/file/path + an/absolute/file/path/foo + + This should NOT match: + foo/an/absolute/file/path + """ + spec = GitIgnorePattern('/an/absolute/file/path') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^an/absolute/file/path(?:/.*)?$') + + def test_01_relative(self): + """ + Tests a relative path pattern. + + This should match: + spam + spam/ + foo/spam + spam/foo + foo/spam/bar + """ + spec = GitIgnorePattern('spam') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?spam(?:/.*)?$') + + def test_01_relative_nested(self): + """ + Tests a relative nested path pattern. + + This should match: + foo/spam + foo/spam/bar + + This should **not** match (according to git check-ignore (v2.4.1)): + bar/foo/spam + """ + spec = GitIgnorePattern('foo/spam') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^foo/spam(?:/.*)?$') + + def test_02_comment(self): + """ + Tests a comment pattern. + """ + spec = GitIgnorePattern('# Cork soakers.') + self.assertIsNone(spec.include) + self.assertIsNone(spec.regex) + + def test_02_ignore(self): + """ + Tests an exclude pattern. + + This should NOT match (according to git check-ignore (v2.4.1)): + temp/foo + """ + spec = GitIgnorePattern('!temp') + self.assertIsNotNone(spec.include) + self.assertFalse(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?temp$') + + def test_03_child_double_asterisk(self): + """ + Tests a directory name with a double-asterisk child + directory. + + This should match: + spam/bar + + This should **not** match (according to git check-ignore (v2.4.1)): + foo/spam/bar + """ + spec = GitIgnorePattern('spam/**') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^spam/.*$') + + def test_03_inner_double_asterisk(self): + """ + Tests a path with an inner double-asterisk directory. + + This should match: + left/bar/right + left/foo/bar/right + left/bar/right/foo + + This should **not** match (according to git check-ignore (v2.4.1)): + foo/left/bar/right + """ + spec = GitIgnorePattern('left/**/right') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^left(?:/.+)?/right(?:/.*)?$') + + def test_03_only_double_asterisk(self): + """ + Tests a double-asterisk pattern which matches everything. + """ + spec = GitIgnorePattern('**') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^.+$') + + def test_03_parent_double_asterisk(self): + """ + Tests a file name with a double-asterisk parent directory. + + This should match: + foo/spam + foo/spam/bar + """ + spec = GitIgnorePattern('**/spam') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?spam(?:/.*)?$') + + def test_04_infix_wildcard(self): + """ + Tests a pattern with an infix wildcard. + + This should match: + foo--bar + foo-hello-bar + a/foo-hello-bar + foo-hello-bar/b + a/foo-hello-bar/b + """ + spec = GitIgnorePattern('foo-*-bar') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?foo\\-[^/]*\\-bar(?:/.*)?$') + + def test_04_postfix_wildcard(self): + """ + Tests a pattern with a postfix wildcard. + + This should match: + ~temp- + ~temp-foo + ~temp-foo/bar + foo/~temp-bar + foo/~temp-bar/baz + """ + spec = GitIgnorePattern('~temp-*') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?\\~temp\\-[^/]*(?:/.*)?$') + + def test_04_prefix_wildcard(self): + """ + Tests a pattern with a prefix wildcard. + + This should match: + bar.py + bar.py/ + foo/bar.py + foo/bar.py/baz + """ + spec = GitIgnorePattern('*.py') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?[^/]*\\.py(?:/.*)?$') + + def test_05_directory(self): + """ + Tests a directory pattern. + + This should match: + dir/ + foo/dir/ + foo/dir/bar + + This should **not** match: + dir + """ + spec = GitIgnorePattern('dir/') + self.assertTrue(spec.include) + self.assertEqual(spec.regex.pattern, '^(?:.+/)?dir/.*$') + + def test_05_registered(self): + """ + Tests that the pattern is registered. + """ + self.assertIs(pathspec.util.lookup_pattern('gitignore'), GitIgnorePattern) + + +if __name__ == '__main__': + suite = unittest.TestLoader().loadTestsFromTestCase(GitIgnoreTest) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/python-path-specification/pathspec/tests/test_pathspec.py b/python-path-specification/pathspec/tests/test_pathspec.py new file mode 100644 index 0000000..75adb92 --- /dev/null +++ b/python-path-specification/pathspec/tests/test_pathspec.py @@ -0,0 +1,35 @@ +# encoding: utf-8 +""" +This script tests ``PathSpec``. +""" + +import unittest + +import pathspec + +class PathSpecTest(unittest.TestCase): + """ + The ``PathSpecTest`` class tests the ``PathSpec`` class. + """ + + def test_01_windows_paths(self): + """ + Tests that Windows paths will be properly normalized and matched. + """ + spec = pathspec.PathSpec.from_lines('gitignore', [ + '*.txt', + '!test1/', + ]) + results = set(spec.match_files([ + 'src\\test1\\a.txt', + 'src\\test1\\b.txt', + 'src\\test1\\c\\c.txt', + 'src\\test2\\a.txt', + 'src\\test2\\b.txt', + 'src\\test2\\c\\c.txt', + ], separators=('\\',))) + self.assertEqual(results, { + 'src\\test2\\a.txt', + 'src\\test2\\b.txt', + 'src\\test2\\c\\c.txt', + }) diff --git a/python-path-specification/pathspec/tests/test_util.py b/python-path-specification/pathspec/tests/test_util.py new file mode 100644 index 0000000..2bdec8f --- /dev/null +++ b/python-path-specification/pathspec/tests/test_util.py @@ -0,0 +1,273 @@ +# encoding: utf-8 +""" +This script tests utility functions. +""" + +import os +import os.path +import shutil +import sys +import tempfile +import unittest + +from pathspec.util import iter_tree, RecursionError + +class IterTreeTest(unittest.TestCase): + """ + The ``IterTreeTest`` class tests `pathspec.util.iter_tree()`. + """ + + def make_dirs(self, dirs): + """ + Create the specified directories. + """ + for dir in dirs: + os.mkdir(os.path.join(self.temp_dir, self.ospath(dir))) + + def make_files(self, files): + """ + Create the specified files. + """ + for file in files: + self.mkfile(os.path.join(self.temp_dir, self.ospath(file))) + + def make_links(self, links): + """ + Create the specified links. + """ + for link, node in links: + os.symlink(os.path.join(self.temp_dir, self.ospath(node)), os.path.join(self.temp_dir, self.ospath(link))) + + @staticmethod + def mkfile(file): + """ + Creates an empty file. + """ + with open(file, 'wb'): + pass + + @staticmethod + def ospath(path): + """ + Convert the POSIX path to a native OS path. + """ + return os.path.join(*path.split('/')) + + def require_realpath(self): + """ + Skips the test if `os.path.realpath` does not properly support + symlinks. + """ + if self.broken_realpath: + raise unittest.SkipTest("`os.path.realpath` is broken.") + + def require_symlink(self): + """ + Skips the test if `os.symlink` is not supported. + """ + if self.no_symlink: + raise unittest.SkipTest("`os.symlink` is not supported.") + + def setUp(self): + """ + Called before each test. + """ + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self): + """ + Called after each test. + """ + shutil.rmtree(self.temp_dir) + + def test_1_files(self): + """ + Tests to make sure all files are found. + """ + self.make_dirs([ + 'Empty', + 'Dir', + 'Dir/Inner', + ]) + self.make_files([ + 'a', + 'b', + 'Dir/c', + 'Dir/d', + 'Dir/Inner/e', + 'Dir/Inner/f', + ]) + results = set(iter_tree(self.temp_dir)) + self.assertEqual(results, set(map(self.ospath, { + 'a', + 'b', + 'Dir/c', + 'Dir/d', + 'Dir/Inner/e', + 'Dir/Inner/f', + }))) + + def test_2_0_check_symlink(self): + """ + Tests whether links can be created. + """ + # NOTE: Windows does not support `os.symlink` for Python 2. Windows Vista + # and greater supports `os.symlink` for Python 3.2+. + no_symlink = None + try: + file = os.path.join(self.temp_dir, 'file') + link = os.path.join(self.temp_dir, 'link') + self.mkfile(file) + + try: + os.symlink(file, link) + except (AttributeError, NotImplementedError): + no_symlink = True + raise + no_symlink = False + + finally: + self.__class__.no_symlink = no_symlink + + def test_2_1_check_realpath(self): + """ + Tests whether `os.path.realpath` works properly with symlinks. + """ + # NOTE: Windows does not follow symlinks with `os.path.realpath` which is + # what we use to detect recursion. See + # for details. + broken_realpath = None + try: + self.require_symlink() + file = os.path.join(self.temp_dir, 'file') + link = os.path.join(self.temp_dir, 'link') + self.mkfile(file) + os.symlink(file, link) + + try: + self.assertEqual(os.path.realpath(file), os.path.realpath(link)) + except AssertionError: + broken_realpath = True + raise + broken_realpath = False + + finally: + self.__class__.broken_realpath = broken_realpath + + def test_2_2_links(self): + """ + Tests to make sure links to directories and files work. + """ + self.require_symlink() + self.make_dirs([ + 'Dir' + ]) + self.make_files([ + 'a', + 'b', + 'Dir/c', + 'Dir/d', + ]) + self.make_links([ + ('ax', 'a'), + ('bx', 'b'), + ('Dir/cx', 'Dir/c'), + ('Dir/dx', 'Dir/d'), + ]) + results = set(iter_tree(self.temp_dir)) + self.assertEqual(results, set(map(self.ospath, { + 'a', + 'ax', + 'b', + 'bx', + 'Dir/c', + 'Dir/cx', + 'Dir/d', + 'Dir/dx', + }))) + + def test_2_3_sideways_links(self): + """ + Tests to make sure the same directory can be encountered multiple + times via links. + """ + self.require_symlink() + self.make_dirs([ + 'Dir', + 'Dir/Target', + ]) + self.make_files([ + 'Dir/Target/file', + ]) + self.make_links([ + ('Ax', 'Dir'), + ('Bx', 'Dir'), + ('Cx', 'Dir/Target'), + ('Dx', 'Dir/Target'), + ('Dir/Ex', 'Dir/Target'), + ('Dir/Fx', 'Dir/Target'), + ]) + results = set(iter_tree(self.temp_dir)) + self.assertEqual(results, set(map(self.ospath, { + 'Ax/Ex/file', + 'Ax/Fx/file', + 'Ax/Target/file', + 'Bx/Ex/file', + 'Bx/Fx/file', + 'Bx/Target/file', + 'Cx/file', + 'Dx/file', + 'Dir/Ex/file', + 'Dir/Fx/file', + 'Dir/Target/file', + }))) + + def test_2_4_recursive_links(self): + """ + Tests detection of recursive links. + """ + self.require_symlink() + self.require_realpath() + self.make_dirs([ + 'Dir', + ]) + self.make_files([ + 'Dir/file', + ]) + self.make_links([ + ('Dir/Self', 'Dir'), + ]) + with self.assertRaises(RecursionError) as context: + set(iter_tree(self.temp_dir)) + self.assertEqual(context.exception.first_path, 'Dir') + self.assertEqual(context.exception.second_path, self.ospath('Dir/Self')) + + def test_2_5_recursive_circular_links(self): + """ + Tests detection of recursion through circular links. + """ + self.require_symlink() + self.require_realpath() + self.make_dirs([ + 'A', + 'B', + 'C', + ]) + self.make_files([ + 'A/d', + 'B/e', + 'C/f' + ]) + self.make_links([ + ('A/Bx', 'B'), + ('B/Cx', 'C'), + ('C/Ax', 'A'), + ]) + with self.assertRaises(RecursionError) as context: + set(iter_tree(self.temp_dir)) + self.assertIn(context.exception.first_path, ('A', 'B', 'C')) + self.assertEqual(context.exception.second_path, { + 'A': self.ospath('A/Bx/Cx/Ax'), + 'B': self.ospath('B/Cx/Ax/Bx'), + 'C': self.ospath('C/Ax/Bx/Cx'), + }[context.exception.first_path]) diff --git a/python-path-specification/pathspec/util.py b/python-path-specification/pathspec/util.py new file mode 100644 index 0000000..045ac7a --- /dev/null +++ b/python-path-specification/pathspec/util.py @@ -0,0 +1,261 @@ +# encoding: utf-8 +""" +This module provides utility methods for dealing with path-specs. +""" + +import collections +import os +import os.path +import posixpath +import stat + +from .compat import string_types + +NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != posixpath.sep] +""" +*NORMALIZE_PATH_SEPS* (``list`` of ``str``) contains the path separators +that need to be normalized to the POSIX separator for the current +operating system. +""" + +_registered_patterns = {} +""" +*_registered_patterns* (``dict``) maps a name (``str``) to the +registered pattern factory (``callable``). +""" + +def iter_tree(root): + """ + Walks the specified directory for all files. + + *root* (``str``) is the root directory to search for files. + + Raises ``RecursionError`` if recursion is detected. + + Returns an ``Iterable`` yielding the path to each file (``str``) + relative to *root*. + """ + for file_rel in _iter_tree_next(os.path.abspath(root), '', {}): + yield file_rel + +def _iter_tree_next(root_full, dir_rel, memo): + """ + Scan the directory for all descendant files. + + *root_full* (``str``) the absolute path to the root directory. + + *dir_rel* (``str``) the path to the directory to scan relative to + *root_full*. + + *memo* (``dict``) keeps track of ancestor directories encountered. + Maps each ancestor real path (``str``) to relative path (``str``). + """ + dir_full = os.path.join(root_full, dir_rel) + dir_real = os.path.realpath(dir_full) + + # Remember each encountered ancestor directory and its canonical + # (real) path. If a canonical path is encountered more than once, + # recursion has occurred. + if dir_real not in memo: + memo[dir_real] = dir_rel + else: + raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel) + + for node in os.listdir(dir_full): + node_rel = os.path.join(dir_rel, node) + node_full = os.path.join(root_full, node_rel) + node_stat = os.stat(node_full) + + if stat.S_ISDIR(node_stat.st_mode): + # Child node is a directory, recurse into it and yield its + # decendant files. + for file_rel in _iter_tree_next(root_full, node_rel, memo): + yield file_rel + + elif stat.S_ISREG(node_stat.st_mode): + # Child node is a file, yield it. + yield node_rel + + # NOTE: Make sure to remove the canonical (real) path of the directory + # from the ancestors memo once we are done with it. This allows the + # same directory to appear multiple times. If this is not done, the + # second occurance of the directory will be incorrectly interpreted as + # a recursion. See . + del memo[dir_real] + +def lookup_pattern(name): + """ + Lookups a registered pattern factory by name. + + *name* (``str``) is the name of the pattern factory. + + Returns the registered pattern factory (``callable``). If no pattern + factory is registered, raises ``KeyError``. + """ + return _registered_patterns[name] + +def match_files(patterns, files): + """ + Matches the files to the patterns. + + *patterns* (``Iterable`` of ``pathspec.Pattern``) contains the + patterns to use. + + *files* (``Iterable`` of ``str``) contains the normalized files to be + matched against *patterns*. + + Returns the matched files (``set`` of ``str``). + """ + all_files = files if isinstance(files, collections.Container) else list(files) + return_files = set() + for pattern in patterns: + if pattern.include is not None: + result_files = pattern.match(all_files) + if pattern.include: + return_files.update(result_files) + else: + return_files.difference_update(result_files) + return return_files + +def normalize_files(files, separators=None): + """ + Normalizes the file paths to use the POSIX path separator (i.e., `/`). + + *files* (``Iterable`` of ``str``) contains the file paths to be + normalized. + + *separators* (``Container`` of ``str``) optionally contains the path + separators to normalize. + + Returns a ``dict`` mapping the normalized file path (``str``) to the + original file path (``str``) + """ + if separators is None: + separators = NORMALIZE_PATH_SEPS + file_map = {} + for path in files: + norm = path + for sep in separators: + norm = norm.replace(sep, posixpath.sep) + file_map[norm] = path + return file_map + +def register_pattern(name, pattern_factory, override=None): + """ + Registers the specified pattern factory. + + *name* (``str``) is the name to register the pattern factory under. + + *pattern_factory* (``callable``) is used to compile patterns. It must + accept an uncompiled pattern (``str``) and return the compiled pattern + (``pathspec.Pattern``). + + *override* (``bool``) optionally is whether to allow overriding an + already registered pattern under the same name (``True``), instead of + raising an ``AlreadyRegisteredError`` (``False``). Default is ``None`` + for ``False``. + """ + if not isinstance(name, string_types): + raise TypeError("name:{!r} is not a string.".format(name)) + if not callable(pattern_factory): + raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory)) + if name in _registered_patterns and not override: + raise AlreadyRegisteredError(name, _registered_patterns[name]) + _registered_patterns[name] = pattern_factory + + +class AlreadyRegisteredError(Exception): + """ + The ``AlreadyRegisteredError`` exception is raised when a pattern + factory is registered under a name already in use. + """ + + def __init__(self, name, pattern_factory): + """ + Initializes the ``AlreadyRegisteredError`` instance. + + *name* (``str``) is the name of the registered pattern. + + *pattern_factory* (``callable``) is the registered pattern factory. + """ + super(AlreadyRegisteredError, self).__init__(name, pattern_factory) + + @property + def message(self): + """ + *message* (``str``) is the error message. + """ + return "{name!r} is already registered for pattern factory:{!r}.".format( + name=self.name, + pattern_factory=self.pattern_factory, + ) + + @property + def name(self): + """ + *name* (``str``) is the name of the registered pattern. + """ + return self.args[0] + + @property + def pattern_factory(self): + """ + *pattern_factory* (``callable``) is the registered pattern factory. + """ + return self.args[1] + + +class RecursionError(Exception): + """ + The ``RecursionError`` exception is raised when recursion is detected. + """ + + def __init__(self, real_path, first_path, second_path): + """ + Initializes the ``RecursionError`` instance. + + *real_path* (``str``) is the real path that recursion was + encountered on. + + *first_path* (``str``) is the first path encountered for + *real_path*. + + *second_path* (``str``) is the second path encountered for + *real_path*. + """ + super(RecursionError, self).__init__(real_path, first_path, second_path) + + @property + def first_path(self): + """ + *first_path* (``str``) is the first path encountered for + *real_path*. + """ + return self.args[1] + + @property + def message(self): + """ + *message* (``str``) is the error message. + """ + return "Real path {real!r} was encountered at {first!r} and then {second!r}.".format( + real=self.real_path, + first=self.first_path, + second=self.second_path, + ) + + @property + def real_path(self): + """ + *real_path* (``str``) is the real path that recursion was + encountered on. + """ + return self.args[0] + + @property + def second_path(self): + """ + *second_path* (``str``) is the second path encountered for + *real_path*. + """ + return self.args[2] diff --git a/python-path-specification/pypi-upload.sh b/python-path-specification/pypi-upload.sh new file mode 100755 index 0000000..936d078 --- /dev/null +++ b/python-path-specification/pypi-upload.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python setup.py sdist register upload diff --git a/python-path-specification/setup.cfg b/python-path-specification/setup.cfg new file mode 100644 index 0000000..fa9fe18 --- /dev/null +++ b/python-path-specification/setup.cfg @@ -0,0 +1 @@ +[egg_info] diff --git a/python-path-specification/setup.py b/python-path-specification/setup.py new file mode 100644 index 0000000..b65135b --- /dev/null +++ b/python-path-specification/setup.py @@ -0,0 +1,40 @@ +# encoding: utf-8 + +import io +from setuptools import setup, find_packages + +from pathspec import __author__, __email__, __license__, __project__, __version__ + +# Read readme and changes files. +with io.open('README.rst', mode='r', encoding='UTF-8') as fh: + readme = fh.read().strip() +with io.open('CHANGES.rst', mode='r', encoding='UTF-8') as fh: + changes = fh.read().strip() + +setup( + name=__project__, + version=__version__, + author=__author__, + author_email=__email__, + url="https://github.com/cpburnz/python-path-specification", + description="Utility library for gitignore style pattern matching of file paths.", + long_description=readme + "\n\n" + changes, + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.2", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Utilities", + ], + license=__license__, + packages=find_packages(), + test_suite='pathspec.tests', +) diff --git a/python-path-specification/tox.ini b/python-path-specification/tox.ini new file mode 100644 index 0000000..c3d2972 --- /dev/null +++ b/python-path-specification/tox.ini @@ -0,0 +1,5 @@ +[tox] +envlist = py27, py32, py33, py34, pypy, pypy3 + +[testenv] +commands = python setup.py test