diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b677c2a --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/_data/* filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/dissect-ci.yml b/.github/workflows/dissect-ci.yml new file mode 100644 index 0000000..b7b844a --- /dev/null +++ b/.github/workflows/dissect-ci.yml @@ -0,0 +1,37 @@ +name: Dissect CI +on: + push: + branches: + - main + tags: + - '*' + pull_request: + workflow_dispatch: + +jobs: + ci: + uses: fox-it/dissect-workflow-templates/.github/workflows/dissect-ci-template.yml@main + secrets: inherit + + publish: + if: ${{ github.ref_name == 'main' || github.ref_type == 'tag' }} + needs: [ci] + runs-on: ubuntu-latest + environment: dissect_publish + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + name: packages + path: dist/ + # According to the documentation, it automatically looks inside the `dist/` folder for packages. + - name: Publish package distributions to Pypi + uses: pypa/gh-action-pypi-publish@release/v1 + + trigger-tests: + needs: [publish] + uses: fox-it/dissect-workflow-templates/.github/workflows/dissect-ci-demand-test-template.yml@main + secrets: inherit + with: + on-demand-test: 'dissect.target' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..74cecaf --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +coverage.xml +.coverage +dist/ +.eggs/ +*.egg-info/ +*.pyc +__pycache__/ +.pytest_cache/ +tests/docs/api +tests/docs/build +.tox/ diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..f941877 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,5 @@ +Dissect is released as open source by Fox-IT (https://www.fox-it.com) part of NCC Group Plc (https://www.nccgroup.com) + +Developed by the Dissect Team (dissect@fox-it.com) and made available at https://github.com/fox-it/dissect.fve + +License terms: AGPL3 (https://www.gnu.org/licenses/agpl-3.0.html) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..be3f7b2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9ae349b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +exclude .gitignore +recursive-exclude .github/ * diff --git a/README.md b/README.md new file mode 100644 index 0000000..4a24523 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# dissect.fve + +A Dissect module implementing parsers for full volume encryption implementations, currently Linux Unified Key Setup (LUKS1 and LUKS2) and Microsoft's Bitlocker Disk Encryption. +For more information, please see [the documentation](https://docs.dissect.tools/en/latest/projects/dissect.fve/index.html). + +## Requirements + +This project is part of the Dissect framework and requires Python. + +Information on the supported Python versions can be found in the Getting Started section of [the documentation](https://docs.dissect.tools/en/latest/index.html#getting-started). + +## Installation + +`dissect.fve` is available on [PyPI](https://pypi.org/project/dissect.fve/). + +```bash +pip install dissect.fve +``` + +This module is also automatically installed if you install the `dissect` package. + +## Build and test instructions + +This project uses `tox` to build source and wheel distributions. Run the following command from the root folder to build +these: + +```bash +tox -e build +``` + +The build artifacts can be found in the `dist/` directory. + +`tox` is also used to run linting and unit tests in a self-contained environment. To run both linting and unit tests +using the default installed Python version, run: + +```bash +tox +``` + +For a more elaborate explanation on how to build and test the project, please see [the +documentation](https://docs.dissect.tools/en/latest/contributing/tooling.html). + +## Contributing + +The Dissect project encourages any contribution to the codebase. To make your contribution fit into the project, please +refer to [the development guide](https://docs.dissect.tools/en/latest/contributing/developing.html). + +## Copyright and license + +Dissect is released as open source by Fox-IT () part of NCC Group Plc +(). + +Developed by the Dissect Team () and made available at . + +License terms: AGPL3 (). For more information, see the LICENSE file. diff --git a/dissect/fve/__init__.py b/dissect/fve/__init__.py new file mode 100644 index 0000000..93416be --- /dev/null +++ b/dissect/fve/__init__.py @@ -0,0 +1,5 @@ +from dissect.fve.exceptions import Error + +__all__ = [ + "Error", +] diff --git a/dissect/fve/bde/__init__.py b/dissect/fve/bde/__init__.py new file mode 100644 index 0000000..396526c --- /dev/null +++ b/dissect/fve/bde/__init__.py @@ -0,0 +1,6 @@ +from dissect.fve.bde.bde import BDE, is_bde_volume + +__all__ = [ + "BDE", + "is_bde_volume", +] diff --git a/dissect/fve/bde/bde.py b/dissect/fve/bde/bde.py new file mode 100644 index 0000000..439c831 --- /dev/null +++ b/dissect/fve/bde/bde.py @@ -0,0 +1,521 @@ +# References: +# - https://github.com/libyal/libbde +# - https://github.com/Aorimn/dislocker +# - https://github.com/thewhiteninja/ntfstool +# - https://gitlab.com/cryptsetup/cryptsetup +# - fvevol.sys +# - fveapi.dll + +from __future__ import annotations + +import io +import logging +import os +import struct +from bisect import bisect_right +from operator import itemgetter +from typing import BinaryIO, Iterator +from uuid import UUID + +from dissect.util.stream import AlignedStream + +from dissect.fve.bde.c_bde import ( + BITLOCKER_SIGNATURE, + CIPHER_MAP, + EOW_INFORMATION_OFFSET_GUID, + FVE_DATUM_ROLE, + FVE_DATUM_TYPE, + FVE_STATE, + INFORMATION_OFFSET_GUID, + c_bde, +) +from dissect.fve.bde.eow import EowInformation +from dissect.fve.bde.information import Dataset, Information, KeyDatum, VmkInfoDatum +from dissect.fve.bde.keys import derive_recovery_key, derive_user_key, stretch +from dissect.fve.crypto import create_cipher +from dissect.fve.exceptions import InvalidHeaderError + +Run = tuple[int, int, int] + +log = logging.getLogger(__name__) +log.setLevel(os.getenv("DISSECT_LOG_BDE", "CRITICAL")) + + +class BDE: + """Bitlocker disk encryption.""" + + def __init__(self, fh: BinaryIO): + self.fh = fh + self.boot_sector = BootSector(fh) + + self._available_information: list[Information] = [] + for offset in self.boot_sector.information_offsets: + try: + self._available_information.append(Information(self.fh, offset)) + except InvalidHeaderError as e: + log.warning("Failed to parse BDE information at offset 0x%x", offset, exc_info=e) + + self._valid_information = [info for info in self._available_information if info.is_valid()] + if not self._valid_information: + raise InvalidHeaderError("No valid BDE information found") + self.information = self._valid_information[0] + + self.eow_information = None + self._available_eow_information: list[EowInformation] = [] + for offset in self.boot_sector.eow_offsets: + try: + self._available_eow_information.append(EowInformation(self.fh, offset)) + except InvalidHeaderError as e: + log.warning("Failed to parse BDE EOW information at offset 0x%x", offset, exc_info=e) + + self._valid_eow_information = [info for info in self._available_eow_information if info.is_valid()] + if self._available_eow_information and not self._valid_eow_information: + raise InvalidHeaderError("No valid EOW information found") + elif self._valid_eow_information: + self.eow_information = self._valid_eow_information[0] + + self._fvek = None + + @property + def identifiers(self) -> list[UUID]: + datums = self.information.dataset.find_datum( + role=FVE_DATUM_ROLE.VOLUME_MASTER_KEY_INFO, + type_=FVE_DATUM_TYPE.VOLUME_MASTER_KEY_INFO, + ) + identifiers = [d.identifier for d in datums] + return identifiers + + @property + def sector_size(self) -> int: + return self.boot_sector.sector_size + + @property + def version(self) -> int: + return self.information.version + + @property + def paused(self) -> bool: + return self.information.current_state == FVE_STATE.PAUSED + + @property + def decrypted(self) -> bool: + return self.information.current_state == FVE_STATE.DECRYPTED + + @property + def encrypted(self) -> bool: + return not self.decrypted + + @property + def switching(self) -> bool: + return self.information.current_state not in (FVE_STATE.DECRYPTED, FVE_STATE.ENCRYPTED) + + @property + def unlocked(self) -> bool: + return self._fvek is not None or self.information.current_state == FVE_STATE.DECRYPTED + + def description(self) -> str | None: + """Return the volume description, if present.""" + return self.information.dataset.find_description() + + def has_clear_key(self) -> bool: + """Return whether this volume has a clear/obfuscated encryption key. Used in paused volumes.""" + return self.information.dataset.find_clear_vmk() is not None + + def has_recovery_password(self) -> bool: + """Return whether this volume can be unlocked with a recovery password.""" + return len(list(self.information.dataset.find_recovery_vmk())) != 0 + + def has_passphrase(self) -> bool: + """Return whether this volume can be unlocked with a user passphrase.""" + return len(list(self.information.dataset.find_passphrase_vmk())) != 0 + + def has_bek(self) -> bool: + """Return whether this volume can be unlocked with a BEK file.""" + return len(list(self.information.dataset.find_external_vmk())) != 0 + + def unlock(self, key: bytes) -> None: + """Unlock this volume with the specified encryption key.""" + self.information.check_integrity(key) + + fvek = self.information.dataset.find_fvek() + if not fvek: + raise ValueError("No FVEK found") + + fvek = fvek.unbox(key) + if not isinstance(fvek, KeyDatum): + raise ValueError("Invalid unboxed FVEK") + + self._fvek = fvek + + def unlock_with_clear_key(self) -> None: + """Unlock this volume with the clear/obfuscated key.""" + vmk = self.information.dataset.find_clear_vmk() + if not vmk: + raise ValueError("No clear VMK found") + + self.unlock(vmk.decrypt(vmk.clear_key())) + + def unlock_with_recovery_password(self, recovery_password: str, identifier: UUID | str | None = None) -> None: + """Unlock this volume with the recovery password.""" + recovery_key = derive_recovery_key(recovery_password) + self._unlock_with_user_key(self.information.dataset.find_recovery_vmk(), recovery_key, identifier) + + def unlock_with_passphrase(self, passphrase: str, identifier: UUID | str | None = None) -> None: + """Unlock this volume with the user passphrase.""" + user_key = derive_user_key(passphrase) + self._unlock_with_user_key(self.information.dataset.find_passphrase_vmk(), user_key, identifier) + + def unlock_with_bek(self, bek_fh: BinaryIO) -> None: + """Unlock this volume with a BEK file.""" + bek_ds = Dataset(bek_fh) + startup_key = bek_ds.find_startup_key() + if not startup_key: + raise ValueError("No startup key found") + + for vmk in self.information.dataset.find_external_vmk(): + if vmk.identifier == startup_key.identifier: + break + else: + raise ValueError("No compatible VMK found") + + decrypted_key = vmk.decrypt(startup_key.external_key()) + self.unlock(decrypted_key) + + def _unlock_with_user_key( + self, vmks: list[VmkInfoDatum], user_key: bytes, identifier: UUID | str | None = None + ) -> None: + decrypted_key = None + for vmk in vmks: + if identifier and str(identifier) != str(vmk.identifier): + continue + + # There should only be one stretch key + stretch_key = vmk.stretch_key(None) + if not stretch_key: + continue # Shouldn't happen + + aes_key = stretch(user_key, stretch_key.salt) + try: + decrypted_key = vmk.decrypt(aes_key) + break + except ValueError: + continue + else: + raise ValueError("No compatible VMK found") + + self.unlock(decrypted_key) + + def open(self) -> BitlockerStream: + """Open this volume and return a readable (decrypted) stream.""" + if not self.unlocked: + raise ValueError("Volume is locked") + return BitlockerStream(self) + + def reserved_regions(self) -> list[tuple[int, int]]: + """Return a list of reserved regions for this volume. + + Some areas of the volume must "fake" return all null bytes when read. + This includes things like the information regions. + + Reference: + - InitializeFilterData + - FveLibIdentifyCurrentRegionTypeAndEnd + """ + regions = [] + + if self.version == 1: + information_size = (self.boot_sector.cluster_size + 0x3FFF) & ~(self.boot_sector.cluster_size - 1) + elif self.version >= 2: + information_size = ~(self.sector_size - 1) & (self.sector_size + 0xFFFF) + + # All information offsets are reserved regions + for offset in self.information.information_offset: + regions.append((offset // self.sector_size, information_size // self.sector_size)) + + if self.version >= 2: + num_sectors = self.information.virtualized_sectors or 1 + regions.append((self.information.virtualized_block_offset // self.sector_size, num_sectors)) + + for eow_info in self._valid_eow_information: + eow_information_size = ~(self.sector_size - 1) & (eow_info.size + self.sector_size - 1) + regions.append((eow_info.offset // self.sector_size, eow_information_size // self.sector_size)) + + for bitmap in eow_info.bitmaps: + regions.append((bitmap.offset // self.sector_size, bitmap.size // self.sector_size)) + regions.append((bitmap.conv_log_offset // self.sector_size, eow_info.conv_log_size // self.sector_size)) + + # In progress encryption/decryption with dirty state + if self.information.current_state == FVE_STATE.SWITCHING_DIRTY and self.information.state_size: + regions.append( + ( + self.information.state_offset // self.sector_size, + self.information.state_size // self.sector_size, + ) + ) + + return sorted(set(regions), key=itemgetter(0)) + + +class BootSector: + """Bitlocker boot sector parsing. + + Bitlocker seems to do some funny stuff with the boot sector. Instead of trying to make sense of that, + just do what Microsoft does in their driver: looking for specific GUIDs to determine the version. + + If no GUIDs can be found, but the Oem string still says -FVE-FS-, we're dealing with a legacy Vista volume. + """ + + def __init__(self, fh: BinaryIO): + buf = fh.read(512) + + self.boot_sector = c_bde.BOOT_SECTOR(buf) + self.sector_size = self.boot_sector.Bpb.BytesPerSector + self.cluster_size = self.sector_size * self.boot_sector.Bpb.SectorsPerCluster + + self.guid = None + self.information_offsets = [] + self.eow_offsets = [] + + info_guid_offset = buf.find(INFORMATION_OFFSET_GUID.bytes_le) + eow_guid_offset = buf.find(EOW_INFORMATION_OFFSET_GUID.bytes_le) + + if eow_guid_offset != -1: + info = c_bde.FVE_EOW_GUID_RECOGNITION(buf[eow_guid_offset:]) + self.guid = EOW_INFORMATION_OFFSET_GUID + self.information_offsets = info.InformationOffset + self.eow_offsets = info.EowOffset + elif info_guid_offset != -1: + info = c_bde.FVE_GUID_RECOGNITION(buf[info_guid_offset:]) + self.guid = INFORMATION_OFFSET_GUID + self.information_offsets = info.InformationOffset + elif self.boot_sector.Oem == BITLOCKER_SIGNATURE: + self.information_offsets = [self.boot_sector.InformationLcn * self.cluster_size] + else: + raise ValueError("Not a BDE volume") + + +class BitlockerStream(AlignedStream): + """Transparently decrypting Bitlocker stream. + + Provides a transparently decrypted Bitlocker stream for reading. Takes care of the reserved regions, as well + as the virtualized blocks in Vista and newer Bitlocker versions. + + For Vista, the first 0x2000 bytes aren't actually encrypted. The very first sector is obviously modified to + contain the Bitlocker information, so when reading that sector we must patch the Oem ID to be the NTFS one, + as well as replacing the secondary MFT location with one that's located in the Information structure. + + For newer versions, the first N sectors (usually 16) _are_ encrypted, but have been placed elsewhere on the + volume. The location and amount of so-called virtualized sectors are specified in the Information structure. + + The Microsoft implementation works on a byte level, for the time being it's easier for us to work on sector + level. I haven't seen a reason why this would break, yet. + """ + + RUN_PLAIN = 0 + RUN_VISTA_HEADER = 1 + RUN_SPARSE = 2 + RUN_ENCRYPTED = 3 + + def __init__(self, bde: BDE): + self.bde = bde + self._fh = bde.fh + + size = getattr(bde.fh, "size", None) + try: + if size is None: + bde.fh.seek(0, io.SEEK_END) + size = bde.fh.tell() + except Exception: + pass + + self.sector_size = bde.sector_size + if self.bde.encrypted: + self.encrypted = True + self.cipher = create_cipher( + CIPHER_MAP[bde._fvek.key_type], + bde._fvek.data, + sector_size=self.sector_size, + iv_sector_size=self.sector_size, + ) + else: + self.encrypted = False + self.cipher = None + + self._reserved_regions = bde.reserved_regions() + self._virtualized_block_offset = bde.information.virtualized_block_offset + self._virtualized_block_sector = self._virtualized_block_offset // self.sector_size + self._virtualized_sector_count = bde.information.virtualized_sectors + + self._state_offset_sector = bde.information.state_offset // self.sector_size + + self.is_eow = bde.eow_information is not None + self._eow_bitmaps = [] + self._eow_bitmap_lookup = [] + self._eow_sectors_per_chunk = None + + if self.is_eow: + self._eow_bitmaps = bde.eow_information.bitmaps + self._eow_bitmap_lookup = [bm.region_offset // self.sector_size for bm in self._eow_bitmaps] + self._eow_sectors_per_chunk = self.bde.eow_information.chunk_size // self.sector_size + + super().__init__(size=size) + + def _iter_run_state(self, sector: int, count: int) -> Iterator[Run]: + while count > 0: + if self.is_eow: + bitmap_idx = bisect_right(self._eow_bitmap_lookup, sector) + bitmap = self._eow_bitmaps[bitmap_idx - 1] + + relative_sector = sector - (bitmap.region_offset // self.sector_size) + chunk = (relative_sector * self.sector_size) // self.bde.eow_information.chunk_size + chunk_count = -(-count // self._eow_sectors_per_chunk) + + for bit_set, bit_count in bitmap.runs(chunk, chunk_count): + run_type = BitlockerStream.RUN_ENCRYPTED if bit_set else BitlockerStream.RUN_PLAIN + run_count = min(count, bit_count * self._eow_sectors_per_chunk) + + yield (run_type, sector, run_count) + + sector += run_count + count -= run_count + else: + # The StateOffset determines how much of the volume is encrypted. + # In pre-EOW Bitlocker, it's actually used to determine how much of the volume is encrypted + # for partially encrypted volumes, but since EOW, it seems to just contain the volume size. + # Pre-EOW volumes are encrypted back to front, so reading beyond the StateOffset means reading + # plaintext data. + if self._state_offset_sector and sector < self._state_offset_sector: + remaining_sectors = min(self._state_offset_sector - sector, count) + yield (BitlockerStream.RUN_ENCRYPTED, sector, remaining_sectors) + + sector += remaining_sectors + count -= remaining_sectors + + if self._state_offset_sector and sector >= self._state_offset_sector: + yield (BitlockerStream.RUN_PLAIN, sector, count) + elif self.encrypted: + yield (BitlockerStream.RUN_ENCRYPTED, sector, count) + else: + yield (BitlockerStream.RUN_PLAIN, sector, count) + + sector += count + count -= count + + def _iter_runs(self, offset: int, length: int) -> Iterator[Run]: + sector = offset // self.sector_size + count = -(-length // self.sector_size) + + while count != 0: + # Vista volume header behaviour + if self.bde.version == 1 and sector < 0x2000 // self.sector_size: + if sector == 0: + yield (BitlockerStream.RUN_VISTA_HEADER, sector, 1) + + sector += 1 + count -= 1 + + # Intentionally fall through + if count: + remaining_sectors = min((0x2000 // self.sector_size) - sector, count) + + yield (BitlockerStream.RUN_PLAIN, sector, remaining_sectors) + + sector += remaining_sectors + count -= remaining_sectors + + # Only on Bitlocker version >= 2 + if sector < self._virtualized_sector_count: + remaining_sectors = min(self._virtualized_sector_count - sector, count) + + yield from self._iter_run_state(sector + self._virtualized_block_sector, remaining_sectors) + + sector += remaining_sectors + count -= remaining_sectors + + for region_start, region_size in self._reserved_regions: + if count == 0: + break + + region_end = region_start + region_size + + # Starts outside a region but ends in or after it + if sector < region_start < sector + count: + remaining_sectors = min(region_start - sector, count) + + yield from self._iter_run_state(sector, remaining_sectors) + + sector += remaining_sectors + count -= remaining_sectors + + # Starts in a region + if region_start <= sector < region_end: + remaining_sectors = min(region_end - sector, count) + + yield (BitlockerStream.RUN_SPARSE, sector, remaining_sectors) + + sector += remaining_sectors + count -= remaining_sectors + else: + yield from self._iter_run_state(sector, count) + + sector += count + count -= count + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + for run_type, read_sector, sector_count in _consolidate_runs(self._iter_runs(offset, length)): + if run_type == BitlockerStream.RUN_PLAIN: + self._fh.seek(read_sector * self.sector_size) + result.append(self._fh.read(sector_count * self.sector_size)) + elif run_type == BitlockerStream.RUN_VISTA_HEADER: + self._fh.seek(read_sector * self.sector_size) + buf = bytearray(self._fh.read(sector_count * self.sector_size)) + buf[0x03:0x0B] = b"NTFS " + buf[0x38:0x40] = struct.pack(" Iterator[Run]: + current_type = None + current_sector = None + current_count = 0 + + for run_type, sector, count in it: + if current_type is None: + current_type = run_type + current_sector = sector + current_count = count + continue + + if current_type != run_type or current_sector + current_count != sector: + yield (current_type, current_sector, current_count) + + current_type = run_type + current_sector = sector + current_count = count + else: + current_count += count + + if current_type is not None: + yield (current_type, current_sector, current_count) + + +def is_bde_volume(fh: BinaryIO) -> bool: + stored_position = fh.tell() + try: + fh.seek(0) + BootSector(fh) + return True + except ValueError: + return False + finally: + fh.seek(stored_position) diff --git a/dissect/fve/bde/c_bde.py b/dissect/fve/bde/c_bde.py new file mode 100644 index 0000000..b766fc1 --- /dev/null +++ b/dissect/fve/bde/c_bde.py @@ -0,0 +1,377 @@ +from uuid import UUID + +from dissect.cstruct import cstruct + +bde_def = """ +/* ======== Volume header information ======== */ + +typedef struct _FVE_GUID_RECOGNITION { + CHAR Guid[16]; + QWORD InformationOffset[3]; +} FVE_GUID_RECOGNITION; + +typedef struct _FVE_EOW_GUID_RECOGNITION { + CHAR Guid[16]; + QWORD InformationOffset[3]; + QWORD EowOffset[2]; +} FVE_EOW_GUID_RECOGNITION; + +typedef struct _BIOS_PARAMETER_BLOCK { + USHORT BytesPerSector; + UCHAR SectorsPerCluster; + USHORT ReservedSectors; + UCHAR Fats; + USHORT RootEntries; + USHORT Sectors; + UCHAR Media; + USHORT SectorsPerFat; + USHORT SectorsPerTrack; + USHORT Heads; + ULONG HiddenSectors; + ULONG LargeSectors; +} BIOS_PARAMETER_BLOCK; + +typedef struct _BOOT_SECTOR { + CHAR Jump[3]; + CHAR Oem[8]; + BIOS_PARAMETER_BLOCK Bpb; + CHAR Unused0[20]; + union { + ULONG64 InformationLcn; + ULONG64 Mft2StartLcn; + }; + CHAR Unused1[8]; + ULONG64 PartitionLength; + CHAR Unused2[28]; + UCHAR BytesPerSectorShift; + UCHAR SectorsPerClusterShift; + CHAR Unused3[402]; +} BOOT_SECTOR; + +/* ======== FVE information and dataset ======== */ + +enum FVE_STATE { + DECRYPTED = 1, /* Decrypted state */ + SWITCHING_DIRTY = 2, /* In-progress encryption or decryption of large volumes */ + /* StateSize will be non-zero, and there will be a conversion log */ + PAUSED = 3, /* Seen on Vista volume with paused encryption/decryption */ + ENCRYPTED = 4, /* The most common state */ + SWITCHING = 5, /* In-progress encryption or decryption of small volumes */ + /* Seen when detaching VHD during encryption/decryption of small disks */ +}; + +enum FVE_KEY_TYPE { + NONE = 0x0000, + EXTERNAL = 0x0005, /* External VMKs have a USE_KEY with this key type */ + + STRETCH_KEY = 0x1000, + STRETCH_KEY_1 = 0x1001, + AES_CCM_256_0 = 0x2000, + AES_CCM_256_1 = 0x2001, + EXTERN_KEY = 0x2002, + VMK = 0x2003, + AES_CCM_256_2 = 0x2004, + HASH_256 = 0x2005, + + AES_128_DIFFUSER = 0x8000, + AES_256_DIFFUSER = 0x8001, + AES_128 = 0x8002, + AES_256 = 0x8003, + AES_XTS_128 = 0x8004, + AES_XTS_256 = 0x8005, +}; + +flag FVE_KEY_PROTECTOR { + CLEAR = 0x0000, /* Also known as "obfuscated" */ + TPM = 0x0100, + EXTERNAL = 0x0200, /* Startup key */ + TPM_PIN = 0x0400, + RECOVERY_PASSWORD = 0x0800, /* Recovery password */ + PASSPHRASE = 0x2000, /* User passphrase */ +}; + +flag FVE_KEY_FLAG { + NONE = 0x00, + ENHANCED_PIN = 0x04, + ENHANCED_CRYPTO = 0x10, + PBKDF2 = 0x40, +}; + +enum FVE_DATUM_ROLE : USHORT { + PROPERTY = 0x0000, + + UNKNOWN_1 = 0x0001, + + VOLUME_MASTER_KEY_INFO = 0x0002, + FULL_VOLUME_ENCRYPTION_KEY = 0x0003, + VALIDATION = 0x0004, + + UNKNOWN_5 = 0x0005, + + STARTUP_KEY = 0x0006, + DESCRIPTION = 0x0007, + + UNKNOWN_8 = 0x0008, + UNKNOWN_9 = 0x0009, + UNKNOWN_A = 0x000A, + AUTO_UNLOCK = 0x000B, + FULL_VOLUME_ENCRYPTION_KEY_2 = 0x000C, + UNKNOWN_D = 0x000D, + UNKNOWN_E = 0x000E, + + VIRTUALIZATION_INFO = 0x000F, + VALIDATION_HASH = 0x0011, +}; + +enum FVE_DATUM_TYPE : USHORT { + ERASED = 0x0000, + KEY = 0x0001, + UNICODE = 0x0002, + STRETCH_KEY = 0x0003, + USE_KEY = 0x0004, + AES_CCM_ENCRYPTED_KEY = 0x0005, + TPM_ENCRYPTED_BLOB = 0x0006, + VALIDATION_INFO = 0x0007, + VOLUME_MASTER_KEY_INFO = 0x0008, + EXTERNAL_INFO = 0x0009, + UPDATE = 0x000A, + ERROR_LOG = 0x000B, + ASYMMETRIC_ENCRYPTED_KEY = 0x000C, + EXPORTED_KEY = 0x000D, + PUBLIC_KEY_INFO = 0x000E, + VIRTUALIZATION_INFO = 0x000F, + SIMPLE_1 = 0x0010, + SIMPLE_2 = 0x0011, + CONCAT_HASH_KEY = 0x0012, + SIMPLE_3 = 0x0013, + SIMPLE_LARGE = 0x0014, + BACKUP_INFO = 0x0015, +}; + +typedef struct _FVE_INFORMATION { + CHAR Signature[8]; + USHORT HeaderSize; + USHORT Version; + USHORT CurrentState; + USHORT NextState; + ULONG64 StateOffset; + ULONG StateSize; + ULONG VirtualizedSectors; + ULONG64 InformationOffset[3]; + union { + ULONG64 Mft2StartLcn; + ULONG64 VirtualizedBlockOffset; + }; +} FVE_INFORMATION; + +typedef struct _FVE_DATASET { + ULONG Size; + ULONG Version; + ULONG StartOffset; + ULONG EndOffset; + CHAR Identification[16]; + ULONG NonceCounter; + USHORT FvekType; + USHORT _Unknown; + ULONG64 CreationTime; +} FVE_DATASET; + +typedef struct _FVE_DATUM { + USHORT Size; + USHORT Role; + USHORT Type; + USHORT Flags; +} FVE_DATUM; + +typedef struct _FVE_VALIDATION { + USHORT Size; + USHORT Version; + ULONG Crc32; + // FVE_DATUM IntegrityCheck; +} FVE_VALIDATION; + +/* ======== FVE datums ======== */ + +typedef struct _FVE_DATUM_SIMPLE { + ULONG Data; +} FVE_DATUM_SIMPLE; + +typedef struct _FVE_DATUM_SIMPLE_LARGE { + ULONG64 Data; +} FVE_DATUM_SIMPLE_LARGE; + +typedef struct _FVE_DATUM_GUID { + CHAR Guid[16]; +} FVE_DATUM_GUID; + +typedef struct _FVE_DATUM_KEY { + USHORT KeyType; + USHORT KeyFlags; + // CHAR Data[]; +} FVE_DATUM_KEY; + +typedef struct _FVE_DATUM_UNICODE { + // wchar Text[]; +} FVE_DATUM_UNICODE; + +typedef struct _FVE_DATUM_STRETCH_KEY { + USHORT KeyType; + USHORT KeyFlags; + CHAR Salt[16]; +} FVE_DATUM_STRETCH_KEY; + +typedef struct _FVE_DATUM_USE_KEY { + USHORT KeyType; + USHORT KeyFlags; +} FVE_DATUM_USE_KEY; + +typedef struct _FVE_NONCE { + ULONG64 DateTime; + ULONG Counter; +} FVE_NONCE; + +typedef struct _FVE_DATUM_AESCCM_ENC { + FVE_NONCE Nonce; + CHAR MAC[16]; + // CHAR Data[]; +} FVE_DATUM_AESCCM_ENC; + +typedef struct _FVE_DATUM_TPM_ENC_BLOB { + ULONG PcrBitmap; + // CHAR Data[]; +} FVE_DATUM_TPM_ENC_BLOB; + +typedef struct _FVE_DATUM_VALIDATION_ENTRY { + ULONG _Unknown1; + ULONG _Unknown2; + CHAR Hash[32]; +} FVE_DATUM_VALIDATION_ENTRY; + +typedef struct _FVE_DATUM_VALIDATION_INFO { + // FVE_DATUM_VALIDATION_ENTRY AllowList[]; +} FVE_DATUM_VALIDATION_INFO; + +typedef struct _FVE_DATUM_VMK_INFO { + CHAR Identifier[16]; + ULONG64 DateTime; + USHORT _Unknown1; + USHORT Priority; +} FVE_DATUM_VMK_INFO; + +typedef struct _FVE_DATUM_EXTERNAL_INFO { + CHAR Identifier[16]; + ULONG64 DateTime; +} FVE_DATUM_EXTERNAL_INFO; + +typedef struct _FVE_DATUM_UPDATE { + // Unknown +} FVE_DATUM_UPDATE; + +typedef struct _FVE_DATUM_ERROR_LOG { + // Unknown +} FVE_DATUM_ERROR_LOG; + +typedef struct _FVE_DATUM_ASYM_ENC_BLOB { + // CHAR Data[]; +} FVE_DATUM_ASYM_ENC_BLOB; + +typedef struct _FVE_DATUM_EXPORTED_PUBLIC_KEY { + // CHAR Data[]; +} FVE_DATUM_EXPORTED_PUBLIC_KEY; + +typedef struct _FVE_DATUM_PUBLIC_KEY_INFO { + // CHAR Data[]; +} FVE_DATUM_PUBLIC_KEY_INFO; + +typedef struct _FVE_DATUM_VIRTUALIZATION_INFO { + ULONG64 VirtualizedBlockOffset; + ULONG64 VirtualizedBlockSize; +} FVE_DATUM_VIRTUALIZATION_INFO; + +typedef struct _FVE_DATUM_CONCAT_HASH_KEY { + // Unknown +} FVE_DATUM_CONCAT_HASH_KEY; + +typedef struct _FVE_DATUM_BACKUP_INFO { + // Unknown +} FVE_DATUM_BACKUP_INFO; + +typedef struct _FVE_DATUM_AESCBC256_HMAC_SHA512_ENC { + CHAR Iv[16]; + CHAR Mac[64]; + // CHAR Data[]; +} FVE_DATUM_AESCBC256_HMAC_SHA512_ENC; + +/* ======== EOW structures ======== */ + +typedef struct _FVE_EOW_INFORMATION { + CHAR HeaderSignature[8]; + USHORT HeaderSize; + USHORT Size; + ULONG SectorSize; + ULONG _Unknown1; + ULONG ChunkSize; + ULONG ConvLogSize; + ULONG _Unknown2; + ULONG RegionCount; + ULONG Crc32; + ULONG64 EowOffset[2]; + ULONG64 BitmapOffsets[(Size - HeaderSize) / 8]; +} FVE_EOW_INFORMATION; + +typedef struct _FVE_EOW_BITMAP { + CHAR HeaderSignature[10]; + USHORT HeaderSize; + ULONG Size; + ULONG _Unknown1; + ULONG64 RegionOffset; + ULONG64 RegionSize; + ULONG64 ConvLogOffset; + ULONG RecordOffset[2]; + ULONG RecordSize; + ULONG Crc32; +} FVE_EOW_BITMAP; + +typedef struct _FVE_EOW_BITMAP_RECORD { + CHAR HeaderSignature[10]; + USHORT HeaderSize; + ULONG Size; + ULONG BitmapSize; + ULONG64 SequenceNumber; + ULONG Flags; + ULONG Crc32; + // ULONG Bitmap[]; +} FVE_EOW_BITMAP_RECORD; +""" + +c_bde = cstruct().load(bde_def) + +FVE_STATE = c_bde.FVE_STATE +FVE_KEY_TYPE = c_bde.FVE_KEY_TYPE +FVE_KEY_FLAG = c_bde.FVE_KEY_FLAG +FVE_KEY_PROTECTOR = c_bde.FVE_KEY_PROTECTOR + +FVE_DATUM_ROLE = c_bde.FVE_DATUM_ROLE +FVE_DATUM_TYPE = c_bde.FVE_DATUM_TYPE + +# Volume signatures +BITLOCKER_SIGNATURE = b"-FVE-FS-" +BITLOCKER_TO_GO_SIGNATURE = b"MSWIN4.1" + +EOW_SIGNATURE = b"FVE-EOW\x00" +EOW_BM_SIGNATURE = b"FVE-EOWBM\x00" +EOW_BR_SIGNATURE = b"FVE-EOWBR\x00" + +CONV_MAGIC = b"FVEHDRLO"[::-1] + +INFORMATION_OFFSET_GUID = UUID("4967d63b-2e29-4ad8-8399-f6a339e3d001") +EOW_INFORMATION_OFFSET_GUID = UUID("92a84d3b-dd80-4d0e-9e4e-b1e3284eaed8") + +CIPHER_MAP = { + FVE_KEY_TYPE.AES_128_DIFFUSER: "aes-cbc-128-elephant", + FVE_KEY_TYPE.AES_256_DIFFUSER: "aes-cbc-256-elephant", + FVE_KEY_TYPE.AES_128: "aes-cbc-128-eboiv", + FVE_KEY_TYPE.AES_256: "aes-cbc-256-eboiv", + FVE_KEY_TYPE.AES_XTS_128: "aes-xts-128-plain64", + FVE_KEY_TYPE.AES_XTS_256: "aes-xts-256-plain64", +} diff --git a/dissect/fve/bde/eow.py b/dissect/fve/bde/eow.py new file mode 100644 index 0000000..19bd49d --- /dev/null +++ b/dissect/fve/bde/eow.py @@ -0,0 +1,219 @@ +# References: +# - fvevol.sys + +from __future__ import annotations + +from binascii import crc32 +from functools import cached_property +from io import BytesIO +from typing import BinaryIO, Iterator + +from dissect.fve.bde.c_bde import ( + EOW_BM_SIGNATURE, + EOW_BR_SIGNATURE, + EOW_SIGNATURE, + c_bde, +) +from dissect.fve.exceptions import InvalidHeaderError + + +class EowInformation: + """Bitlocker EOW Information.""" + + def __init__(self, fh: BinaryIO, offset: int): + self.fh = fh + self.offset = offset + fh.seek(offset) + + self.header = c_bde.FVE_EOW_INFORMATION(fh) + if self.header.HeaderSignature != EOW_SIGNATURE: + raise InvalidHeaderError("Invalid EOW information signature") + + _crc32 = self.header.Crc32 + self.header.Crc32 = 0 + self._valid_checksum = crc32(self.header.dumps()) == _crc32 + self.header.Crc32 = _crc32 + + def is_valid(self) -> bool: + return self._valid_checksum + + @property + def size(self) -> int: + return self.header.Size + + @property + def chunk_size(self) -> int: + return self.header.ChunkSize + + @property + def conv_log_size(self) -> int: + return self.header.ConvLogSize + + @cached_property + def bitmaps(self) -> list[EowBitmap]: + result = [] + + for offset in self.header.BitmapOffsets: + result.append(EowBitmap(self.fh, offset)) + + return result + + +class EowBitmap: + """Bitlocker EOW Bitmap. + + A bitmap contains multiple bitmap records, but only one record is active. The active record is + determined by the Lsn field in the header. The record with the highest Lsn is the active record. + + It looks like the number of bitmap records is hardcoded to 2, but let's keep the implementation + flexible. + """ + + def __init__(self, fh: BinaryIO, offset: int): + self.fh = fh + self.offset = offset + fh.seek(offset) + + self.header = c_bde.FVE_EOW_BITMAP(fh) + if self.header.HeaderSignature != EOW_BM_SIGNATURE: + raise ValueError("Invalid EOW bitmap signature") + + _crc32 = self.header.Crc32 + self.header.Crc32 = 0 + remainder = fh.read(self.header.RecordOffset[0] - self.header.HeaderSize) + self._valid_checksum = crc32(self.header.dumps() + remainder) == _crc32 + self.header.Crc32 = _crc32 + + self._record_data = fh.read(self.header.Size - self.header.RecordOffset[0]) + + def __repr__(self) -> str: + return f"" + + def is_valid(self) -> bool: + return self._valid_checksum + + def runs(self, chunk: int, length: int) -> Iterator[tuple[int, int]]: + yield from self.active_record.runs(chunk, length) + + @property + def size(self) -> int: + return self.header.Size + + @property + def region_offset(self) -> int: + return self.header.RegionOffset + + @property + def region_size(self) -> int: + return self.header.RegionSize + + @property + def conv_log_offset(self) -> int: + return self.header.ConvLogOffset + + @cached_property + def active_record(self) -> EowBitmapRecord: + latest_record = None + + for record in self.records: + if latest_record is None: + latest_record = record + continue + + if record.sequence_number > latest_record.sequence_number: + latest_record = record + + return latest_record + + @cached_property + def records(self) -> list[EowBitmapRecord]: + result = [] + + buf = BytesIO(self._record_data) + base = self.header.RecordOffset[0] + for offset in self.header.RecordOffset: + buf.seek(offset - base) + result.append(EowBitmapRecord(buf)) + + return result + + +class EowBitmapRecord: + """Bitlocker EOW Bitmap Record. + + The record holding the actual bitmap. Each bit indicates a chunk with the size defined by + the EOW information. The Lsn is the sequence number of that record. + + The flags are currently unknown, but seem related to an encrypted/decrypted state. + """ + + def __init__(self, fh: BinaryIO): + self.fh = fh + self.header = c_bde.FVE_EOW_BITMAP_RECORD(fh) + if self.header.HeaderSignature != EOW_BR_SIGNATURE: + raise ValueError("Invalid EOW bitmap record signature") + + _crc32 = self.header.Crc32 + self.header.Crc32 = 0 + self._data = memoryview(fh.read(self.header.Size - self.header.HeaderSize)) + self._valid_checksum = crc32(self.header.dumps() + self._data) == _crc32 + self.header.Crc32 = _crc32 + + def __repr__(self) -> str: + return f"" + + def is_valid(self) -> bool: + return self._valid_checksum + + def runs(self, chunk: int, length: int) -> Iterator[tuple[int, int]]: + yield from _iter_bitmap(self.bitmap, self.bitmap_size, chunk, length) + + @property + def size(self) -> int: + return self.header.Size + + @property + def bitmap(self) -> bytes: + return self._data + + @property + def bitmap_size(self) -> int: + return self.header.BitmapSize + + @property + def sequence_number(self) -> int: + return self.header.SequenceNumber + + +def _iter_bitmap(bitmap: bytes, size: int, start: int, count: int) -> Iterator[tuple[int, int]]: + byte_idx, bit_idx = divmod(start, 8) + remaining_bits = size - start + current_bit = (bitmap[byte_idx] & (1 << bit_idx)) >> bit_idx + current_count = 0 + + for byte in bitmap[byte_idx:]: + if count == 0 or remaining_bits == 0: + break + + if (current_bit, byte) == (0, 0) or (current_bit, byte) == (1, 0xFF): + max_count = min(count, remaining_bits, 8 - bit_idx) + current_count += max_count + remaining_bits -= max_count + count -= max_count + bit_idx = 0 + else: + for cur_bit_idx in range(bit_idx, min(count, remaining_bits, 8)): + bit_set = (byte & (1 << cur_bit_idx)) >> cur_bit_idx + + if bit_set == current_bit: + current_count += 1 + else: + yield (current_bit, current_count) + current_bit = bit_set + current_count = 1 + + remaining_bits -= 1 + count -= 1 + + if current_count: + yield (current_bit, current_count) diff --git a/dissect/fve/bde/information.py b/dissect/fve/bde/information.py new file mode 100644 index 0000000..03b023d --- /dev/null +++ b/dissect/fve/bde/information.py @@ -0,0 +1,730 @@ +from __future__ import annotations + +import datetime +import hashlib +from binascii import crc32 +from functools import cached_property +from io import BytesIO +from typing import BinaryIO, Iterator +from uuid import UUID + +from Crypto.Cipher import AES +from dissect.util import ts + +from dissect.fve.bde.c_bde import ( + BITLOCKER_SIGNATURE, + FVE_DATUM_ROLE, + FVE_DATUM_TYPE, + FVE_KEY_FLAG, + FVE_KEY_PROTECTOR, + FVE_KEY_TYPE, + FVE_STATE, + c_bde, +) +from dissect.fve.exceptions import InvalidHeaderError + + +class Information: + """Bitlocker Information. + + Parses Bitlocker Information and Dataset at a specified offset. + + Bitlocker Information consists of a small header, a Dataset and at least a CRC32 validation check. + The CRC32 Validation information is positioned after the Information buffer. + + The ``StateOffset`` field contains the offset to a conversion log, but it also doubles as a "watermark", + containing the offset up until where the Bitlocker encryption is active. + The conversion log as pointed to by the ``StateOffset`` seems to only be used by older Bitlocker + implementations. It looks like more modern implementations (Windows 10+) seem to prefer EOW. + """ + + def __init__(self, fh: BinaryIO, offset: int): + self.offset = offset + fh.seek(offset) + + self.header = c_bde.FVE_INFORMATION(fh) + if self.header.Signature != BITLOCKER_SIGNATURE: + raise InvalidHeaderError("Invalid BDE information signature") + + # Datums are lazily parsed so we can safely parse the dataset header + self.dataset = Dataset(fh) + + fh.seek(offset) + self._buf = fh.read(self.size) + + self.validation = Validation(fh) + self._valid_checksum = crc32(self._buf) == self.validation.crc32 + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} " + f"offset=0x{self.offset:x} current_state={self.current_state} next_state={self.next_state}>" + ) + + def is_valid(self) -> bool: + """Validate the integrity of this Information block.""" + # TODO add sha256 check + return self._valid_checksum + + def check_integrity(self, key: KeyDatum | bytes) -> bool: + """Check the integrity of this Information block.""" + if self.validation.integrity_check: + datum = self.validation.integrity_check.unbox(key) + return hashlib.sha256(self._buf).digest() == datum.data + return self.is_valid() + + @property + def size(self) -> int: + stored_size = self.header.HeaderSize + if self.version >= 2: + stored_size <<= 4 + return stored_size + + @property + def version(self) -> int: + return self.header.Version + + @property + def current_state(self) -> FVE_STATE: + return FVE_STATE(self.header.CurrentState) + + @property + def next_state(self) -> FVE_STATE: + return FVE_STATE(self.header.NextState) + + @property + def state_offset(self) -> int: + return self.header.StateOffset + + @property + def state_size(self) -> int: + return self.header.StateSize + + @property + def virtualized_sectors(self) -> int: + return self.header.VirtualizedSectors + + @property + def virtualized_block_offset(self) -> int: + return self.header.VirtualizedBlockOffset + + @property + def information_offset(self) -> list[int]: + return self.header.InformationOffset + + +class Validation: + """Bitlocker Information Validation. + + The Validation structure is a small piece of data positioned after the Information buffer. + It contains a CRC32 value of the entire Information buffer. It also contains an integrity check + datum, which is a AES-CCM encrypted datum, encrypted with the same key that decrypts the FVEK. + Decrypting the integrity check yields you a SHA256 digest, which must match the entire Information buffer. + """ + + def __init__(self, fh: BinaryIO): + self.validation = c_bde.FVE_VALIDATION(fh) + self.integrity_check = None + if self.version >= 2: # I think + self.integrity_check = Datum.from_fh(fh) + + @property + def version(self) -> int: + return self.validation.Version + + @property + def crc32(self) -> int: + return self.validation.Crc32 + + +class Dataset: + """Bitlocker Information Dataset. + + The dataset is a simple data structure, consisting of a small header and one or more "datum". + Each datum has a role and type, and you can query the dataset for datums with a specific role or type. + Querying the dataset means iterating the datum array until you found the datum you're looking for. + """ + + def __init__(self, fh: BinaryIO): + offset = fh.tell() + self.header = c_bde.FVE_DATASET(fh) + self.identifier = UUID(bytes_le=self.header.Identification) + + fh.seek(offset) + self._buf = fh.read(self.header.Size) + + def __iter__(self) -> Iterator[Datum]: + yield from self.data + + @cached_property + def data(self) -> list[Datum]: + """Return the list of Datum in this Dataset.""" + result = [] + + buf = BytesIO(memoryview(self._buf)[self.header.StartOffset :]) + remaining = self.header.EndOffset - self.header.StartOffset + while remaining >= Datum.MINIMAL_SIZE: + datum = Datum.from_fh(buf) + result.append(datum) + + remaining -= datum.size + + return result + + @property + def fvek_type(self) -> FVE_KEY_TYPE: + return FVE_KEY_TYPE(self.header.FvekType) + + def find_datum(self, role: FVE_DATUM_ROLE, type_: FVE_DATUM_TYPE) -> Iterator[Datum]: + """Find one or more datum specified by role and type.""" + for datum in self: + if (datum.role == role or role is None) and (datum.type == type_ or type_ is None): + yield datum + + def find_description(self) -> str | None: + """Find the description datum.""" + for datum in self.find_datum(FVE_DATUM_ROLE.DESCRIPTION, FVE_DATUM_TYPE.UNICODE): + return datum.text + + def find_virtualization_info(self) -> VirtualizationInfoDatum | None: + """Find the virtualization info datum.""" + for datum in self.find_datum(FVE_DATUM_ROLE.VIRTUALIZATION_INFO, FVE_DATUM_TYPE.VIRTUALIZATION_INFO): + return datum + + def find_startup_key(self) -> ExternalInfoDatum | None: + """Find the external startup/recovery key information.""" + for datum in self.find_datum(FVE_DATUM_ROLE.STARTUP_KEY, FVE_DATUM_TYPE.EXTERNAL_INFO): + return datum + + def find_fvek(self) -> AesCcmEncryptedDatum | None: + """Find the encrypted FVEK.""" + for datum in self.find_datum(FVE_DATUM_ROLE.FULL_VOLUME_ENCRYPTION_KEY, FVE_DATUM_TYPE.AES_CCM_ENCRYPTED_KEY): + return datum + + def find_vmk( + self, + protector_type: FVE_KEY_PROTECTOR | None = None, + min_priority: int = 0x0000, + max_priority: int = 0x7FFF, + mask: int = 0xFF00, + ) -> Iterator[VmkInfoDatum]: + """Find one or more VMK datum specified by key priority.""" + for datum in self.find_datum(FVE_DATUM_ROLE.VOLUME_MASTER_KEY_INFO, FVE_DATUM_TYPE.VOLUME_MASTER_KEY_INFO): + if datum.priority.value < min_priority or datum.priority.value > max_priority: + continue + + if protector_type is None or datum.priority & mask == protector_type: + yield datum + + def find_clear_vmk(self) -> VmkInfoDatum | None: + """Find the clear key VMK (for paused volumes).""" + for vmk in self.find_vmk(FVE_KEY_PROTECTOR.CLEAR, max_priority=0xFF, mask=0x0000): + return vmk + + def find_external_vmk(self) -> Iterator[VmkInfoDatum]: + """Find the external VMK.""" + yield from self.find_vmk(FVE_KEY_PROTECTOR.EXTERNAL) + + def find_recovery_vmk(self) -> Iterator[VmkInfoDatum]: + """Find the recovery VMK.""" + yield from self.find_vmk(FVE_KEY_PROTECTOR.RECOVERY_PASSWORD) + + def find_passphrase_vmk(self) -> Iterator[VmkInfoDatum]: + """Find the passphrase VMK.""" + yield from self.find_vmk(FVE_KEY_PROTECTOR.PASSPHRASE) + + +class Datum: + """Bitlocker Dataset Datum. + + A Datum is the main metadata structure in Bitlocker. It's a small data structure, specifying a + size, role and type, followed by the necessary data to interpret that datum type. + + Datums can be "complex", in which case they can contain nested datums. These nested datums always + have the PROPERTY role. + + Datums can also have a data segment. A data segment is present if a datum is not complex, but contains + data beyond the size of that datums' type structure. + + Originally, this information is stored in a table, also containing a type's minimal size. This implementation + doesn't currently do that, instead relying on the reading from a file handle with cstruct. Whatever is left + on the file handle is the data segment. + """ + + __struct__ = None + __complex__ = False + + MINIMAL_SIZE = len(c_bde.FVE_DATUM) + + def __init__(self, fh: BinaryIO): + self.header = c_bde.FVE_DATUM(fh) + self._data = fh.read(self.data_size) + + buf = BytesIO(self._data) + self._datum = self.__struct__(buf) if self.__struct__ else None + self.data_segment = buf.read() if not self.__complex__ else None + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} type={self.type.name}>" + + @property + def role(self) -> FVE_DATUM_ROLE: + return FVE_DATUM_ROLE(self.header.Role) + + @property + def type(self) -> FVE_DATUM_TYPE: + return FVE_DATUM_TYPE(self.header.Type) + + @property + def size(self) -> int: + return self.header.Size + + @property + def data_size(self) -> int: + return self.size - self.MINIMAL_SIZE + + @cached_property + def properties(self) -> list[Datum]: + """Return a list of property datum if this datum is complex.""" + result = [] + + if self.__complex__: + remaining = self.data_size - len(self.__struct__) + buf = BytesIO(memoryview(self._data)[len(self.__struct__) :]) + while remaining >= self.MINIMAL_SIZE: + nested = Datum.from_fh(buf) + result.append(nested) + + remaining -= nested.size + + return result + + @classmethod + def from_fh(cls, fh: BinaryIO) -> Datum: + """Read a datum from a file handle.""" + offset = fh.tell() + header = c_bde.FVE_DATUM(fh) + fh.seek(offset) + + datum_type_map = { + FVE_DATUM_TYPE.KEY: KeyDatum, + FVE_DATUM_TYPE.UNICODE: UnicodeDatum, + FVE_DATUM_TYPE.STRETCH_KEY: StretchKeyDatum, + FVE_DATUM_TYPE.USE_KEY: UseKeyDatum, + FVE_DATUM_TYPE.AES_CCM_ENCRYPTED_KEY: AesCcmEncryptedDatum, + FVE_DATUM_TYPE.TPM_ENCRYPTED_BLOB: TpmEncryptedBlobDatum, + FVE_DATUM_TYPE.VALIDATION_INFO: ValidationInfoDatum, + FVE_DATUM_TYPE.VOLUME_MASTER_KEY_INFO: VmkInfoDatum, + FVE_DATUM_TYPE.EXTERNAL_INFO: ExternalInfoDatum, + FVE_DATUM_TYPE.UPDATE: UpdateDatum, + FVE_DATUM_TYPE.ERROR_LOG: ErrorLogDatum, + FVE_DATUM_TYPE.ASYMMETRIC_ENCRYPTED_KEY: AsymmetricEncryptedDatum, + FVE_DATUM_TYPE.EXPORTED_KEY: ExportedPublicKeyDatum, + FVE_DATUM_TYPE.PUBLIC_KEY_INFO: PublicKeyInfoDatum, + FVE_DATUM_TYPE.VIRTUALIZATION_INFO: VirtualizationInfoDatum, + FVE_DATUM_TYPE.SIMPLE_1: SimpleDatum, + FVE_DATUM_TYPE.SIMPLE_2: SimpleDatum, + FVE_DATUM_TYPE.CONCAT_HASH_KEY: ConcatHashKeyDatum, + FVE_DATUM_TYPE.SIMPLE_3: SimpleDatum, + FVE_DATUM_TYPE.SIMPLE_LARGE: SimpleLargeDatum, + FVE_DATUM_TYPE.BACKUP_INFO: BackupInfoDatum, + } + datum_type = FVE_DATUM_TYPE(header.Type) + + return datum_type_map.get(datum_type, Datum)(fh) + + @classmethod + def from_bytes(cls, buf: bytes) -> Datum: + """Read a datum from raw bytes.""" + return cls.from_fh(BytesIO(buf)) + + def find_property(self, type_: FVE_DATUM_TYPE | None) -> Iterator[Datum]: + """Find one or more datum with a specified type within the properties.""" + for datum in self.properties: + if datum.type == type_ or type_ is None: + yield datum + + +class SimpleDatum(Datum): + __struct__ = c_bde.FVE_DATUM_SIMPLE + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} data={self.data}>" + + @property + def data(self) -> int: + return self._datum.Data + + +class SimpleLargeDatum(Datum): + __struct__ = c_bde.FVE_DATUM_SIMPLE_LARGE + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} data={self.data}>" + + @property + def data(self) -> int: + return self._datum.Data + + +class GuidDatum(Datum): + __struct__ = c_bde.FVE_DATUM_GUID + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} guid={self.guid}>" + + @property + def guid(self) -> UUID: + return UUID(bytes_le=self._datum.Guid) + + +class KeyDatum(Datum): + __struct__ = c_bde.FVE_DATUM_KEY + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} key_type={self.key_type} key_flags={self.key_flags}>" + + @property + def key_type(self) -> FVE_KEY_TYPE: + return FVE_KEY_TYPE(self._datum.KeyType) + + @property + def key_flags(self) -> FVE_KEY_FLAG: + return FVE_KEY_FLAG(self._datum.KeyFlags) + + @property + def data(self) -> bytes: + return self._data[len(KeyDatum.__struct__) :] + + +class UnicodeDatum(Datum): + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} text={self.text}>" + + @property + def text(self) -> str: + return self._data.decode("utf-16-le").rstrip("\x00") + + +class StretchKeyDatum(Datum): + __struct__ = c_bde.FVE_DATUM_STRETCH_KEY + __complex__ = True + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} key_type={self.key_type} key_flags={self.key_flags}>" + + @property + def key_type(self) -> FVE_KEY_TYPE: + return FVE_KEY_TYPE(self._datum.KeyType) + + @property + def key_flags(self) -> FVE_KEY_FLAG: + return FVE_KEY_FLAG(self._datum.KeyFlags) + + @property + def salt(self) -> bytes: + return self._datum.Salt + + +class UseKeyDatum(Datum): + __struct__ = c_bde.FVE_DATUM_USE_KEY + __complex__ = True + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} key_type={self.key_type} key_flags={self.key_flags}>" + + @property + def key_type(self) -> FVE_KEY_TYPE: + return FVE_KEY_TYPE(self._datum.KeyType) + + @property + def key_flags(self) -> FVE_KEY_FLAG: + return FVE_KEY_FLAG(self._datum.KeyFlags) + + +class AesCcmEncryptedDatum(Datum): + __struct__ = c_bde.FVE_DATUM_AESCCM_ENC + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} role={self.role.name} " + f"nonce_time={self.nonce_time} nonce_counter={self.nonce_counter}>" + ) + + @property + def nonce(self) -> bytes: + return self._data[: len(c_bde.FVE_NONCE)] + + @property + def nonce_time(self) -> datetime.datetime | int: + try: + return ts.wintimestamp(self._datum.Nonce.DateTime) + except ValueError: + return self._datum.Nonce.DateTime + + @property + def nonce_counter(self) -> int: + return self._datum.Nonce.Counter + + @property + def mac(self) -> bytes: + return self._datum.MAC + + @property + def data(self) -> bytes: + return self._data[len(self.__struct__) :] + + def unbox(self, key: KeyDatum | bytes) -> Datum: + key = key.data if isinstance(key, KeyDatum) else key + cipher = AES.new(key, AES.MODE_CCM, nonce=self.nonce) + decrypted_data = cipher.decrypt_and_verify(self.data, self.mac) + return Datum.from_bytes(decrypted_data) + + +class TpmEncryptedBlobDatum(Datum): + __struct__ = c_bde.FVE_DATUM_TPM_ENC_BLOB + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} pcr_bitmap={self.pcr_bitmap}>" + + @property + def pcr_bitmap(self) -> int: + return self._datum.PcrBitmap + + @property + def data(self) -> bytes: + return self._data[len(self.__struct__) :] + + +class ValidationEntry: + def __init__(self, fh): + self._entry = c_bde.FVE_DATUM_VALIDATION_ENTRY(fh) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} hash={self.hash}>" + + @property + def hash(self) -> bytes: + return self._entry.Hash + + +class ValidationInfoDatum(Datum): + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name} allow_list={self.allow_list}>" + + @property + def allow_list(self) -> list[ValidationEntry]: + fh = BytesIO(self._data) + return [ValidationEntry(fh) for _ in range(len(self._data) // len(c_bde.FVE_DATUM_VALIDATION_ENTRY))] + + +class VmkInfoDatum(Datum): + __struct__ = c_bde.FVE_DATUM_VMK_INFO + __complex__ = True + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} role={self.role.name} identifier={self.identifier} " + f"datetime={self.datetime} priority={self.priority}>" + ) + + @property + def identifier(self) -> UUID: + return UUID(bytes_le=self._datum.Identifier) + + @property + def datetime(self) -> datetime.datetime: + return ts.wintimestamp(self._datum.DateTime) + + @property + def priority(self) -> FVE_KEY_PROTECTOR: + return FVE_KEY_PROTECTOR(self._datum.Priority) + + def decrypt(self, key: KeyDatum | bytes) -> KeyDatum: + encrypted_key = self.aes_ccm_encrypted_key() + return encrypted_key.unbox(key) + + def label(self) -> str: + for datum in self.find_property(FVE_DATUM_TYPE.UNICODE): + return datum.text + + def asymmetric_encrypted_key(self) -> AsymmetricEncryptedDatum: + for datum in self.find_property(FVE_DATUM_TYPE.ASYMMETRIC_ENCRYPTED_KEY): + return datum + + def exported_key(self) -> ExportedPublicKeyDatum: + for datum in self.find_property(FVE_DATUM_TYPE.EXPORTED_KEY): + return datum + + def tpm_encrypted_blob(self) -> TpmEncryptedBlobDatum: + for datum in self.find_property(FVE_DATUM_TYPE.TPM_ENCRYPTED_BLOB): + return datum + + def aes_ccm_encrypted_key(self) -> AesCcmEncryptedDatum: + for datum in self.find_property(FVE_DATUM_TYPE.AES_CCM_ENCRYPTED_KEY): + return datum + + def public_key_info(self) -> PublicKeyInfoDatum: + for datum in self.find_property(FVE_DATUM_TYPE.PUBLIC_KEY_INFO): + return datum + + def use_keys(self) -> list[UseKeyDatum]: + return list(self.find_property(FVE_DATUM_TYPE.USE_KEY)) + + def use_key(self, key_type: FVE_KEY_TYPE) -> UseKeyDatum: + for datum in self.use_keys(): + if key_type is None or datum.key_type == key_type: + return datum + + def stretch_keys(self) -> list[StretchKeyDatum]: + return list(self.find_property(FVE_DATUM_TYPE.STRETCH_KEY)) + + def stretch_key(self, key_type: FVE_KEY_TYPE) -> StretchKeyDatum: + for datum in self.stretch_keys(): + if key_type is None or datum.key_type == key_type: + return datum + + def clear_key(self) -> KeyDatum: + for datum in self.find_property(FVE_DATUM_TYPE.KEY): + return datum + + def is_enhanced_pin(self) -> bool: + for stretch_key in self.stretch_keys(): + if stretch_key.key_type == FVE_KEY_TYPE.AES_CCM_256_2 and stretch_key.key_flags & FVE_KEY_FLAG.ENHANCED_PIN: + return True + + def is_enhanced_crypto(self) -> bool: + for stretch_key in self.stretch_keys(): + if ( + stretch_key.key_type == FVE_KEY_TYPE.AES_CCM_256_2 + and stretch_key.key_flags & FVE_KEY_FLAG.ENHANCED_CRYPTO + ): + return True + + def uses_pbkdf2(self) -> bool: + for stretch_key in self.stretch_keys(): + if ( + stretch_key.type in (FVE_KEY_TYPE.STRETCH_KEY, FVE_KEY_TYPE.STRETCH_KEY_1, FVE_KEY_TYPE.AES_CCM_256_2) + and stretch_key.key_flags & FVE_KEY_FLAG.PBKDF2 + ): + return True + + +class ExternalInfoDatum(Datum): + __struct__ = c_bde.FVE_DATUM_EXTERNAL_INFO + __complex__ = True + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} role={self.role.name} identifier={self.identifier} datetime={self.datetime}>" + ) + + @property + def identifier(self) -> UUID: + return UUID(bytes_le=self._datum.Identifier) + + @property + def datetime(self) -> datetime.datetime: + return ts.wintimestamp(self._datum.DateTime) + + def label(self) -> str | None: + for datum in self.find_property(FVE_DATUM_TYPE.UNICODE): + return datum.text + + def external_key(self) -> KeyDatum | None: + for datum in self.find_property(FVE_DATUM_TYPE.KEY): + return datum + + +class UpdateDatum(Datum): + __struct__ = c_bde.FVE_DATUM_UPDATE + __complex__ = True + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + +class ErrorLogDatum(Datum): + __struct__ = c_bde.FVE_DATUM_ERROR_LOG + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + +class AsymmetricEncryptedDatum(Datum): + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + @property + def data(self) -> bytes: + return self._data + + +class ExportedPublicKeyDatum(Datum): + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + @property + def data(self) -> bytes: + return self._data + + +class PublicKeyInfoDatum(Datum): + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + @property + def data(self) -> bytes: + return self._data + + +class VirtualizationInfoDatum(Datum): + __struct__ = c_bde.FVE_DATUM_VIRTUALIZATION_INFO + + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__} role={self.role.name} " + f"virtualized_block_offset=0x{self.virtualized_block_offset:x} " + f"virtualized_block_size=0x{self.virtualized_block_size:x}>" + ) + + @property + def virtualized_block_offset(self) -> int: + return self._datum.VirtualizedBlockOffset + + @property + def virtualized_block_size(self) -> int: + return self._datum.VirtualizedBlockSize + + +class ConcatHashKeyDatum(Datum): + __struct__ = c_bde.FVE_DATUM_CONCAT_HASH_KEY + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + +class BackupInfoDatum(Datum): + __struct__ = c_bde.FVE_DATUM_BACKUP_INFO + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + +class AesCbc256HmacSha512EncryptedDatum(Datum): + __struct__ = c_bde.FVE_DATUM_AESCBC256_HMAC_SHA512_ENC + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} role={self.role.name}>" + + @property + def iv(self) -> bytes: + return self._datum.Iv + + @property + def mac(self) -> bytes: + return self._datum.Mac + + @property + def data(self) -> bytes: + return self._data[len(self.__struct__) :] diff --git a/dissect/fve/bde/keys.py b/dissect/fve/bde/keys.py new file mode 100644 index 0000000..0169956 --- /dev/null +++ b/dissect/fve/bde/keys.py @@ -0,0 +1,70 @@ +import hashlib +import struct + + +def stretch(password: bytes, salt: bytes, rounds: int = 0x100000) -> bytes: + """Stretch a password with a specified salt. + + Bitlocker uses this as the key derivation algorithm. + """ + # Stretch data looks like the following: + # chained hash | user hash | salt | counter + # SHA256 digest length is 32 bytes + # Salt length is 16 bytes + # Counter is a uint64 + if len(password) != 32: + raise ValueError("Invalid password length") + + if len(salt) != 16: + raise ValueError("Invalid salt length") + + data = bytearray(32 + 32 + 16 + 8) + view = memoryview(data) + + view[32:64] = password + view[64:80] = salt + + for i in range(rounds): + view[80:] = i.to_bytes(8, "little") + view[:32] = hashlib.sha256(view).digest() + + return bytes(view[:32]) + + +def derive_user_key(user_password: str) -> bytes: + """Derive an AES key from a given user passphrase.""" + return hashlib.sha256(hashlib.sha256(user_password.encode("utf-16-le")).digest()).digest() + + +def derive_recovery_key(recovery_password: str) -> bytes: + """Derive an AES key from a given recovery password.""" + check_recovery_password(recovery_password) + + blocks = recovery_password.split("-") + key = b"".join(struct.pack(" bool: + """Check if a given recovery password is valid.""" + blocks = recovery_password.split("-") + if len(blocks) != 8: + raise ValueError("Invalid recovery password: invalid length") + + for block in blocks: + if not block.isdigit(): + raise ValueError("Invalid recovery password: contains non-numeric value") + + value = int(block) + if value % 11: + raise ValueError("Invalid recovery password: block not divisible by 11") + + if value >= 2**16 * 11: + raise ValueError("Invalid recovery password: larger than 2 ** 16 * 11 (720896)") + + digits = list(map(int, block)) + checksum = (digits[0] - digits[1] + digits[2] - digits[3] + digits[4]) % 11 + if checksum != digits[5]: + raise ValueError("Invalid recovery password: invalid block checksum") + + return True diff --git a/dissect/fve/crypto/__init__.py b/dissect/fve/crypto/__init__.py new file mode 100644 index 0000000..f40ca40 --- /dev/null +++ b/dissect/fve/crypto/__init__.py @@ -0,0 +1,116 @@ +from typing import Optional + +from Crypto.Cipher import AES + +from dissect.fve.crypto import _pycryptodome +from dissect.fve.crypto.base import Cipher, Plain, Plain64, Plain64BE + +# Only pycryptodome is supported right now +_pycryptodome.install() + + +CIPHER_MODE_MAP = { + "ecb": AES.MODE_FVE_ECB, + "cbc": AES.MODE_FVE_CBC, + "xts": AES.MODE_FVE_XTS, +} + +IV_MODE_MAP = { + "plain": Plain, + "plain64": Plain64, + "plain64be": Plain64BE, + "eboiv": _pycryptodome.EBOIV, + "essiv": _pycryptodome.ESSIV, + "elephant": _pycryptodome.Elephant, +} + + +def create_cipher( + spec: str, key: bytes, key_size: Optional[int] = None, sector_size: int = 512, iv_sector_size: int = 512 +) -> Cipher: + """Create a cipher object according to a given cipher specification and key. + + For more information on the cipher specification, read the documentation on :func:`parse_cipher_spec`. + + Args: + spec: The cipher specification to parse. + key: The key to initialize the cipher with. + key_size: Optional key size that overrides the specification key size. + sector_size: Optional sector size. + """ + cipher_name, cipher_mode, key_size, iv_name, iv_options = parse_cipher_spec( + spec, key_size=key_size, key_size_hint=len(key) * 8 + ) + + if cipher_name != "aes": + raise ValueError("Only AES support is implemented") + + mode = CIPHER_MODE_MAP.get(cipher_mode) + if not mode: + raise ValueError(f"Invalid cipher mode: {cipher_name}-{cipher_mode} (from {spec})") + + iv = IV_MODE_MAP.get(iv_name) + if not iv: + raise ValueError(f"Invalid iv mode: {iv_name}:{iv_options} (from {spec})") + + return AES.new( + key, + mode, + key_size=key_size, + iv_mode=iv, + iv_options=iv_options, + sector_size=sector_size, + iv_sector_size=iv_sector_size, + ) + + +def parse_cipher_spec( + spec: str, key_size: Optional[int] = None, key_size_hint: Optional[int] = None +) -> tuple[str, str, int, str, Optional[str]]: + """Parse a cipher specification into a tuple of (cipher, mode, key size, iv mode, iv options). + + Inspired by and accepts LUKS/dm-crypt-like cipher specifications in the form of:: + + cipher-mode-keysize-iv:ivopts + + The ``mode``, ``keysize``, ``iv`` and ``ivopts`` are optional and will default to ``cbc``, + the ``key_size`` argument and ``plain`` respectively. + + Args: + spec: The cipher specification to parse. + key_size: Optional key size that overrides the specification key size. + key_size_hint: Optional key size hint for the amount of bits that the key actually has. + """ + cipher_name, _, tmp = spec.partition("-") + cipher_mode, _, tmp = tmp.partition("-") + + result_key_size = key_size_hint + specified_key_size, _, tmp = tmp.partition("-") + if specified_key_size.isdigit(): + result_key_size = int(specified_key_size) + else: + if tmp: + raise ValueError("Unexpected cipher spec format") + tmp = specified_key_size + + if key_size: + result_key_size = key_size + + if not result_key_size: + raise ValueError("Missing key size") + + iv_name = None + iv_options = None + iv_name, _, iv_options = tmp.partition(":") + + if not cipher_mode: + cipher_mode = "cbc" + + if not iv_name: + iv_name = "plain" + iv_options = None + + if cipher_mode == "xts" and key_size_hint == result_key_size: + result_key_size //= 2 + + return cipher_name, cipher_mode, result_key_size, iv_name, iv_options or None diff --git a/dissect/fve/crypto/_pycryptodome.py b/dissect/fve/crypto/_pycryptodome.py new file mode 100644 index 0000000..be9d94a --- /dev/null +++ b/dissect/fve/crypto/_pycryptodome.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +import hashlib +import platform +import sys +from typing import Any, Callable + +from Crypto.Cipher import AES, _extra_modes +from Crypto.Util import _raw_api + +from dissect.fve.crypto import elephant +from dissect.fve.crypto.base import DECRYPT, ENCRYPT, IV, Cipher + +if platform.python_implementation() == "CPython": + # On CPython, our own "pure Python" XOR is somehow faster than the one from pycryptodome + from dissect.fve.crypto.utils import xor +else: + # On PyPy the opposite is true, and also just use this as the default fallback + from Crypto.Util.strxor import strxor as xor + +POINTER_SIZE = 8 if sys.maxsize > 2**32 else 4 + + +if _raw_api.backend == "cffi": + + def get_iv_view(cipher, size): + return _raw_api.ffi.cast(_raw_api.uint8_t_type, cipher._state.get() + POINTER_SIZE)[0:size] + +elif _raw_api.backend == "ctypes": + import ctypes + + def get_iv_view(cipher, size): + return ctypes.cast(cipher._state.get().value + POINTER_SIZE, ctypes.POINTER(ctypes.c_char * size))[0] + +else: + + def get_iv_view(cipher, size): + raise NotImplementedError("Unsupported pycryptodome backend") + + +# Sanity check if fast IV is available +def _fast_iv_works() -> bool: + try: + magic = b"\x69" * 16 + cipher = AES.new(b"\x00" * 16, AES.MODE_CBC, iv=magic) + return _raw_api.get_raw_buffer(get_iv_view(cipher, 16)) == magic + except Exception: + return False + + +FAST_IV = _fast_iv_works() + + +class EcbMode(Cipher): + """ECB mode implementation for FVE crypto.""" + + def __init__( + self, + factory: Any, + key: bytes, + key_size: int, + iv_mode: type[IV], + iv_options: str, + sector_size: int = 512, + iv_sector_size: int = 512, + ): + if key_size not in (128, 256): + raise ValueError(f"Incorrect key size for ECB mode ({key_size} bits)") + super().__init__(key, key_size, factory.block_size, iv_mode, iv_options, sector_size, iv_sector_size) + + self._cipher = AES.new(key[: self.key_size_bytes], AES.MODE_ECB) + + def _crypt_sector(self, mode: int, buffer: bytearray, iv: bytes) -> None: + (self._cipher.encrypt if mode == ENCRYPT else self._cipher.decrypt)(buffer, output=buffer) + + +class CbcMode(Cipher): + """CBC mode implementation for FVE crypto.""" + + def __init__( + self, + factory: Any, + key: bytes, + key_size: int, + iv_mode: type[IV], + iv_options: str, + sector_size: int = 512, + iv_sector_size: int = 512, + ): + if key_size not in (128, 256): + raise ValueError(f"Incorrect key size for CBC mode ({key_size} bits)") + super().__init__(key, key_size, factory.block_size, iv_mode, iv_options, sector_size, iv_sector_size) + + if FAST_IV: + self._cipher = AES.new(key[: self.key_size_bytes], AES.MODE_CBC, iv=b"\x00" * self.block_size) + self._iv_view = get_iv_view(self._cipher, self.block_size) + else: + self._key = key[: self.key_size_bytes] + + def _crypt_sector(self, mode: int, buffer: bytearray, iv: bytes) -> None: + if FAST_IV: + self._iv_view[0 : self.block_size] = iv + cipher = self._cipher + else: + cipher = AES.new(self._key, AES.MODE_CBC, iv=iv) + + (cipher.encrypt if mode == ENCRYPT else cipher.decrypt)(buffer, output=buffer) + + +class XtsMode(Cipher): + """XTS mode implementation for FVE crypto.""" + + def __init__( + self, + factory: Any, + key: bytes, + key_size: int, + iv_mode: type[IV], + iv_options: str, + sector_size: int = 512, + iv_sector_size: int = 512, + ): + if (len(key), key_size) not in ((32, 128), (64, 256)): + raise ValueError(f"Incorrect key size for XTS mode ({len(key)} bytes, {key_size} bits)") + super().__init__(key, key_size, key_size // 8, iv_mode, iv_options, sector_size, iv_sector_size) + + self._aes_cipher = factory.new(key[: self.block_size], factory.MODE_ECB) + self._tweak_cipher = factory.new(key[self.block_size :], factory.MODE_ECB) + + def _crypt_sector(self, mode: int, buffer: bytearray, iv: bytes) -> None: + tweak = self._tweak_cipher.encrypt(iv) + _t = int.from_bytes(tweak, "little") + + crypt = self._aes_cipher.encrypt if mode == ENCRYPT else self._aes_cipher.decrypt + + view = buffer + block_size = self.block_size + + for _ in range(self.sector_size // 16): + block_slice = view[:16] + xor(block_slice, tweak[:16], output=block_slice) + crypt(block_slice, output=block_slice) + xor(tweak[:16], block_slice, output=block_slice) + + _t <<= 1 + if _t & (1 << 128): + _t ^= (1 << 128) | (0x87) + tweak = (_t & ((1 << (block_size * 8)) - 1)).to_bytes(block_size, "little") + + view = view[16:] + + +class EBOIV(IV): + """Encrypted byte-offset IV. + + Specific to Bitlocker. + """ + + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = None): + super().__init__(cipher, key) + self._ecb_cipher = AES.new(key, AES.MODE_ECB) + + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + iv[:] = self._ecb_cipher.encrypt((sector * self.cipher.sector_size).to_bytes(16, "little")) + + +class ESSIV(IV): + """Encrypted sector|salt IV. + + The sector number is encrypted with the bulk cipher using a salt as key. The salt should be + derived from the bulk cipher's key via hashing. + """ + + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = "sha256"): + super().__init__(cipher, key) + # Only support one cipher mode for now + self._cipher = AES.new(hashlib.new(iv_options, key).digest(), AES.MODE_ECB) + + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + self._cipher.encrypt(sector.to_bytes(self.cipher.block_size, "little"), output=iv) + + +class Elephant(IV): + """Extended eboiv with Elephant diffuser. + + Specific to Bitlocker. The key is always 64 bytes, but you need to take only the + amount of bytes for the key size that you're working with. + """ + + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = None): + super().__init__(cipher, key) + self._ecb_cipher = AES.new(key[32 : 32 + cipher.key_size_bytes], AES.MODE_ECB) + self._eboiv = EBOIV(cipher, key[: cipher.key_size_bytes]) + + self._sector_key = bytearray(32) + self._sector_key_view = memoryview(self._sector_key) + + def _elephant(self, mode: int, data: bytearray, sector: int) -> None: + sector_size = self.cipher.sector_size + sector_key_view = self._sector_key_view + + # Generate the IV and sector key + iv = bytearray((sector * sector_size).to_bytes(16, "little")) + self._ecb_cipher.encrypt(iv, output=sector_key_view[:16]) + iv[15] = 0x80 + self._ecb_cipher.encrypt(iv, output=sector_key_view[16:]) + + if mode == DECRYPT: + # Apply diffuser B + elephant.diffuser_b_decrypt(data, sector_size) + + # Apply diffuser A + elephant.diffuser_a_decrypt(data, sector_size) + + # Apply sector key + xor(data, self._sector_key * (sector_size // 32), output=data) + + if mode == ENCRYPT: + # Apply diffuser A + elephant.diffuser_a_encrypt(data, sector_size) + + # Apply diffuser B + elephant.diffuser_b_encrypt(data, sector_size) + + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + if mode == ENCRYPT: + self._elephant(mode, data, sector) + self._eboiv.generate(mode, iv, data, sector) + + def post(self, mode: int, data: bytearray, sector: int = 0) -> None: + if mode == DECRYPT: + self._elephant(mode, data, sector) + + +def _create_cipher_factory(mode: type[Cipher]) -> Callable[..., Cipher]: + def cipher_factory(factory: Any, **kwargs) -> Cipher: + try: + key = kwargs.pop("key") + key_size = kwargs.pop("key_size") + iv_mode = kwargs.pop("iv_mode") + except KeyError as e: + raise TypeError("Missing parameter:" + str(e)) + + sector_size = kwargs.pop("sector_size", 512) + iv_sector_size = kwargs.pop("iv_sector_size", 512) + iv_options = kwargs.pop("iv_options", None) + + return mode(factory, key, key_size, iv_mode, iv_options, sector_size, iv_sector_size) + + return cipher_factory + + +def install() -> None: + """Install the cipher modes into pycryptotome.""" + + # Only support AES for now + AES.MODE_FVE_ECB = 50 + AES.MODE_FVE_CBC = 51 + AES.MODE_FVE_XTS = 52 + + _extra_modes[AES.MODE_FVE_ECB] = _create_cipher_factory(EcbMode) + _extra_modes[AES.MODE_FVE_CBC] = _create_cipher_factory(CbcMode) + _extra_modes[AES.MODE_FVE_XTS] = _create_cipher_factory(XtsMode) diff --git a/dissect/fve/crypto/base.py b/dissect/fve/crypto/base.py new file mode 100644 index 0000000..990fe7b --- /dev/null +++ b/dissect/fve/crypto/base.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +ENCRYPT = 0 +DECRYPT = 1 + + +class Cipher: + def __init__( + self, + key: bytes, + key_size: int, + block_size: int, + iv_mode: type[IV], + iv_options: str, + sector_size: int = 512, + iv_sector_size: int = 512, + ): + self.key = key + self.key_size = key_size + self.key_size_bytes = key_size // 8 + self.block_size = block_size + self.sector_size = sector_size + self.iv_sector_size = iv_sector_size + + self.iv_mode = iv_mode(self, key, iv_options) + + def _crypt_sector(self, mode: int, buffer: bytearray, iv: bytes) -> None: + raise NotImplementedError() + + def _crypt(self, mode: int, ciphertext: bytes, sector: int = 0, output: bytearray | None = None) -> bytes | None: + length = len(ciphertext) + + if length % self.block_size: + raise ValueError("Ciphertext is not aligned to block size") + + out = output or bytearray(length) + out[:] = ciphertext + out_view = memoryview(out) + + iv = bytearray(self.iv_mode.iv_size) + iv_view = memoryview(iv) + + iv_mode = self.iv_mode + sector_size = self.sector_size + sector_increment = sector_size // self.iv_sector_size + + for _ in range(length // sector_size): + out_slice = out_view[:sector_size] + + # Generate the IV + iv_mode.generate(mode, iv_view, out_slice, sector) + + # Do the crypting + self._crypt_sector(mode, out_slice, iv) + + # Perform possible post operations for the IV + iv_mode.post(mode, out_slice, sector) + + out_view = out_view[sector_size:] + sector += sector_increment + + return None if output is not None else bytes(out) + + def encrypt(self, ciphertext: bytes, sector: int = 0, output: bytearray | None = None) -> bytes | None: + return self._crypt(ENCRYPT, ciphertext, sector, output) + + def decrypt(self, ciphertext: bytes, sector: int = 0, output: bytearray | None = None) -> bytes | None: + return self._crypt(DECRYPT, ciphertext, sector, output) + + +class IV: + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = None): + self.cipher = cipher + self.iv_size = cipher.block_size + + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + pass + + def post(self, mode: int, data: bytearray, sector: int = 0) -> None: + pass + + +class Plain(IV): + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + iv[:] = b"\x00" * self.iv_size + iv[:4] = (sector & 0xFFFFFFFF).to_bytes(4, "little") + + +class Plain64(IV): + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + iv[:] = b"\x00" * self.iv_size + iv[:8] = sector.to_bytes(8, "little") + + +class Plain64BE(IV): + def generate(self, mode: int, iv: bytearray, data: bytearray, sector: int = 0) -> None: + iv[:] = b"\x00" * self.iv_size + iv[:8] = sector.to_bytes(8, "big") + + +class EBOIV(IV): + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = None): + # Implementation specific + raise NotImplementedError() + + +class ESSIV(IV): + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = None): + # Implementation specific + raise NotImplementedError() + + +class Elephant(IV): + def __init__(self, cipher: Cipher, key: bytes, iv_options: str | None = None): + # Implementation specific + raise NotImplementedError() diff --git a/dissect/fve/crypto/elephant.py b/dissect/fve/crypto/elephant.py new file mode 100644 index 0000000..476dd40 --- /dev/null +++ b/dissect/fve/crypto/elephant.py @@ -0,0 +1,54 @@ +def diffuser_a_decrypt(buffer: memoryview, sector_size: int) -> None: + a_cycles = 5 + r_a = [9, 0, 13, 0] + int_size = sector_size >> 2 + + buffer_i = buffer.cast("I") + + for _ in range(a_cycles): + for i in range(int_size): + buffer_i[i] = (buffer_i[i] + (buffer_i[i - 2] ^ _rotate_left(buffer_i[i - 5], r_a[i % 4]))) & 0xFFFFFFFF + + +def diffuser_a_encrypt(buffer: memoryview, sector_size: int) -> None: + a_cycles = 5 + r_a = [9, 0, 13, 0] + int_size = sector_size >> 2 + + buffer_i = buffer.cast("I") + + for _ in range(a_cycles): + for i in range(int_size - 1, -1, -1): + buffer_i[i] = (buffer_i[i] - (buffer_i[i - 2] ^ _rotate_left(buffer_i[i - 5], r_a[i % 4]))) & 0xFFFFFFFF + + +def diffuser_b_decrypt(buffer: memoryview, sector_size: int) -> None: + b_cycles = 3 + r_b = [0, 10, 0, 25] + int_size = sector_size >> 2 + + buffer_i = buffer.cast("I") + + for _ in range(b_cycles): + for i in range(int_size): + buffer_i[i] = ( + buffer_i[i] + (buffer_i[(i + 2) % int_size] ^ _rotate_left(buffer_i[(i + 5) % int_size], r_b[i % 4])) + ) & 0xFFFFFFFF + + +def diffuser_b_encrypt(buffer: memoryview, sector_size: int) -> None: + b_cycles = 3 + r_b = [0, 10, 0, 25] + int_size = sector_size >> 2 + + buffer_i = buffer.cast("I") + + for _ in range(b_cycles): + for i in range(int_size - 1, -1, -1): + buffer_i[i] = ( + buffer_i[i] - (buffer_i[(i + 2) % int_size] ^ _rotate_left(buffer_i[(i + 5) % int_size], r_b[i % 4])) + ) & 0xFFFFFFFF + + +def _rotate_left(num: int, count: int) -> int: + return ((num << count) | (num >> (32 - count))) & ((0b1 << 32) - 1) diff --git a/dissect/fve/crypto/utils.py b/dissect/fve/crypto/utils.py new file mode 100644 index 0000000..250200f --- /dev/null +++ b/dissect/fve/crypto/utils.py @@ -0,0 +1,17 @@ +import platform + + +# Reference: https://www.da.vidbuchanan.co.uk/blog/python-swar.html +# Sorry David +def xor_pseudo_simd(a: bytes, b: bytes, output: bytearray) -> None: + output[:] = int.to_bytes(int.from_bytes(a, "little") ^ int.from_bytes(b, "little"), len(output), "little") + + +# On PyPy the naive loop is actually faster +# Also just use this as the default fallback, seems safer +def xor_naive(a: bytes, b: bytes, output: bytearray) -> None: + for i in range(len(output)): + output[i] = a[i] ^ b[i] + + +xor = xor_pseudo_simd if platform.python_implementation() == "CPython" else xor_naive diff --git a/dissect/fve/exceptions.py b/dissect/fve/exceptions.py new file mode 100644 index 0000000..05756d7 --- /dev/null +++ b/dissect/fve/exceptions.py @@ -0,0 +1,6 @@ +class Error(Exception): + pass + + +class InvalidHeaderError(Error): + pass diff --git a/dissect/fve/luks/__init__.py b/dissect/fve/luks/__init__.py new file mode 100644 index 0000000..c1c4e76 --- /dev/null +++ b/dissect/fve/luks/__init__.py @@ -0,0 +1,7 @@ +from dissect.fve.luks.luks import LUKS, CryptStream, is_luks_volume + +__all__ = [ + "CryptStream", + "LUKS", + "is_luks_volume", +] diff --git a/dissect/fve/luks/af.py b/dissect/fve/luks/af.py new file mode 100644 index 0000000..ba2ecd8 --- /dev/null +++ b/dissect/fve/luks/af.py @@ -0,0 +1,49 @@ +import hashlib + +from dissect.fve.crypto.utils import xor + +DIGEST_SIZE = { + "sha1": 20, + "sha256": 32, +} + + +def _hash(buf: bytes, hash: str, iv: int) -> bytes: + ctx = hashlib.new(hash) + ctx.update((iv & 0xFFFFFFFF).to_bytes(4, "big")) + ctx.update(buf) + return ctx.digest() + + +def diffuse(buf: bytes, hash: str) -> bytes: + buf_size = len(buf) + digest_size = DIGEST_SIZE[hash] + + view = memoryview(buf) + result = bytearray(buf_size) + + passes, remainder = divmod(buf_size, digest_size) + + for i in range(passes): + result[i * digest_size : (i + 1) * digest_size] = _hash(view[i * digest_size : (i + 1) * digest_size], hash, i) + + if remainder: + result[passes * digest_size : buf_size] = _hash(view[buf_size - remainder : buf_size], hash, passes)[:remainder] + + return bytes(result) + + +def merge(buf: bytes, block_size: int, block_num: int, hash: str) -> bytes: + if block_size * block_num > len(buf): + raise ValueError(f"Unexpected input buffer size ({block_size} * {block_num} != {len(buf)})") + + tmp = bytearray(block_size) + view = memoryview(buf) + + for i in range(block_num - 1): + block = view[i * block_size : (i + 1) * block_size] + xor(block, tmp, output=tmp) + tmp[:] = diffuse(tmp, hash) + + xor(view[(block_num - 1) * block_size : block_num * block_size], tmp, output=tmp) + return bytes(tmp) diff --git a/dissect/fve/luks/c_luks.py b/dissect/fve/luks/c_luks.py new file mode 100644 index 0000000..3832aaa --- /dev/null +++ b/dissect/fve/luks/c_luks.py @@ -0,0 +1,136 @@ +from dissect.cstruct import cstruct + +luks_def = """ +/* =========== LUKS1 =========== */ +#define LUKS_CIPHERNAME_L 32 +#define LUKS_CIPHERMODE_L 32 +#define LUKS_HASHSPEC_L 32 +#define LUKS_DIGESTSIZE 20 // since SHA1 +#define LUKS_HMACSIZE 32 +#define LUKS_SALTSIZE 32 +#define LUKS_NUMKEYS 8 + +// Minimal number of iterations +#define LUKS_MKD_ITERATIONS_MIN 1000 +#define LUKS_SLOT_ITERATIONS_MIN 1000 + +// Iteration time for digest in ms +#define LUKS_MKD_ITERATIONS_MS 125 + +#define LUKS_KEY_DISABLED_OLD 0 +#define LUKS_KEY_ENABLED_OLD 0xCAFE + +#define LUKS_KEY_DISABLED 0x0000DEAD +#define LUKS_KEY_ENABLED 0x00AC71F3 + +#define LUKS_STRIPES 4000 + +// partition header starts with magic +#define LUKS_MAGIC_L 6 + +/* Actually we need only 37, but we don't want struct autoaligning to kick in */ +#define UUID_STRING_L 40 + +/* Offset to keyslot area [in bytes] */ +#define LUKS_ALIGN_KEYSLOTS 4096 + +/* Maximal LUKS header size, for wipe [in bytes] */ +#define LUKS_MAX_KEYSLOT_SIZE 0x1000000 /* 16 MB, up to 32768 bits key */ + +/* Any integer values are stored in network byte order on disk and must be converted */ + +/* DISSECT: Keyblock structure currently separated out due to a cstruct limitation */ +struct luks_keyblock { + uint32_t active; + + /* parameters used for password processing */ + uint32_t passwordIterations; + + char passwordSalt[LUKS_SALTSIZE]; + /* parameters used for AF store/load */ + uint32_t keyMaterialOffset; + uint32_t stripes; +}; + +struct luks_phdr { + char magic[LUKS_MAGIC_L]; + uint16_t version; + char cipherName[LUKS_CIPHERNAME_L]; + char cipherMode[LUKS_CIPHERMODE_L]; + char hashSpec[LUKS_HASHSPEC_L]; + uint32_t payloadOffset; + uint32_t keyBytes; + char mkDigest[LUKS_DIGESTSIZE]; + char mkDigestSalt[LUKS_SALTSIZE]; + uint32_t mkDigestIterations; + char uuid[UUID_STRING_L]; + + /* DISSECT: Keyblock structure currently separated out due to a cstruct limitation */ + luks_keyblock keyblock[LUKS_NUMKEYS]; + + /* Align it to 512 sector size */ + char _padding[432]; +}; + +/* =========== LUKS2 =========== */ + +#define LUKS2_MAGIC_L 6 +#define LUKS2_UUID_L 40 +#define LUKS2_LABEL_L 48 +#define LUKS2_SALT_L 64 +#define LUKS2_CHECKSUM_ALG_L 32 +#define LUKS2_CHECKSUM_L 64 + +#define LUKS2_KEYSLOTS_MAX 32 +#define LUKS2_TOKENS_MAX 32 +#define LUKS2_SEGMENT_MAX 32 + +/* + * LUKS2 header on-disk. + * + * Binary header is followed by JSON area. + * JSON area is followed by keyslot area and data area, + * these are described in JSON metadata. + * + * Note: uuid, csum_alg are intentionally on the same offset as LUKS1 + * (checksum alg replaces hash in LUKS1) + * + * String (char) should be zero terminated. + * Padding should be wiped. + * Checksum is calculated with csum zeroed (+ full JSON area). + */ +struct luks2_hdr_disk { + char magic[LUKS2_MAGIC_L]; + uint16_t version; /* Version 2 */ + uint64_t hdr_size; /* in bytes, including JSON area */ + uint64_t seqid; /* increased on every update */ + char label[LUKS2_LABEL_L]; + char checksum_alg[LUKS2_CHECKSUM_ALG_L]; + uint8_t salt[LUKS2_SALT_L]; /* unique for every header/offset */ + char uuid[LUKS2_UUID_L]; + char subsystem[LUKS2_LABEL_L]; /* owner subsystem label */ + uint64_t hdr_offset; /* offset from device start in bytes */ + char _padding[184]; + uint8_t csum[LUKS2_CHECKSUM_L]; + char _padding4096[7*512]; + /* JSON area starts here */ +}; +""" + +c_luks = cstruct(endian=">").load(luks_def) + +LUKS_MAGIC = b"LUKS\xba\xbe" +LUKS2_MAGIC_1ST = b"LUKS\xba\xbe" +LUKS2_MAGIC_2ND = b"SKUL\xba\xbe" + +SECONDARY_HEADER_OFFSETS = [ + 0x00004000, + 0x00008000, + 0x00010000, + 0x00020000, + 0x00040000, + 0x00080000, + 0x00100000, + 0x00200000, + 0x00400000, +] diff --git a/dissect/fve/luks/luks.py b/dissect/fve/luks/luks.py new file mode 100644 index 0000000..1449634 --- /dev/null +++ b/dissect/fve/luks/luks.py @@ -0,0 +1,315 @@ +# References: +# - https://gitlab.com/cryptsetup/cryptsetup +# - https://gitlab.com/cryptsetup/cryptsetup/-/blob/main/docs/on-disk-format-luks2.pdf + +from __future__ import annotations + +import hashlib +import io +from pathlib import Path +from typing import BinaryIO +from uuid import UUID + +import argon2 +from dissect.util.stream import AlignedStream + +from dissect.fve.crypto import create_cipher +from dissect.fve.luks import af +from dissect.fve.luks.c_luks import ( + LUKS2_MAGIC_1ST, + LUKS2_MAGIC_2ND, + SECONDARY_HEADER_OFFSETS, + c_luks, +) +from dissect.fve.luks.metadata import Digest, Keyslot, Metadata, Segment + + +class LUKS: + """LUKS disk encryption.""" + + def __init__(self, fh: BinaryIO): + self.fh = fh + self.header = None + self.header1 = None + self.header2 = None + + first_offset, second_offset, version = find_luks_headers(fh) + if version is None: + raise ValueError("Not a LUKS volume") + + if version == 1: + header_struct = c_luks.luks_phdr + elif version == 2: + header_struct = c_luks.luks2_hdr_disk + else: + raise ValueError(f"Unsupported LUKS version: {version}") + + if first_offset is not None: + fh.seek(first_offset) + self.header1 = header_struct(fh) + self.header = self.header1 + + if second_offset is not None: + fh.seek(second_offset) + self.header2 = header_struct(fh) + + self.header = self.header2 or self.header1 + + # LUKS1 + self.cipher_name = None + self.cipher_mode = None + self.hash_spec = None + + # LUKS2 + self.label = None + self.checksum_algorithm = None + self.metadata, self.metadata1, self.metadata2 = None, None, None + + self.uuid = UUID(self.header.uuid.strip(b"\x00").decode()) + + if self.header.version == 1: + # LUKS1 + self.metadata = Metadata.from_luks1_header(self.header) + else: + # LUKS2 + self.label = self.header.label.strip(b"\x00").decode() + self.checksum_algorithm = self.header.checksum_alg.strip(b"\x00").decode() + + self.metadata1 = None + if self.header is self.header1: + json_area1 = fh.read(self.header1.hdr_size - 4096).rstrip(b"\x00").decode() + self.metadata1 = Metadata.from_json(json_area1) + self.header2 = c_luks.luks2_hdr_disk(fh) + + json_area2 = fh.read(self.header2.hdr_size - 4096).rstrip(b"\x00").decode() + self.metadata2 = Metadata.from_json(json_area2) + + self.metadata = self.metadata1 or self.metadata2 + + self._active_volume_key = None + self._active_keyslot_id = None + + @property + def unlocked(self) -> bool: + return self._active_volume_key is not None + + @property + def keyslots(self) -> dict[int, Keyslot]: + return self.metadata.keyslots + + def unlock(self, key: bytes, keyslot: int) -> None: + """Unlock the volume with the volume encryption key.""" + if not self._verify_volume_key(key, keyslot): + raise ValueError(f"Invalid volume key for keyslot {keyslot}") + self._active_volume_key = key + self._active_keyslot_id = keyslot + + def unlock_with_key_file(self, path: Path, offset: int = 0, size: int = -1, keyslot: int | None = None) -> None: + with path.open("rb") as fh: + self.unlock_with_key_fh(fh, offset, size, keyslot) + + def unlock_with_key_fh(self, fh: BinaryIO, offset: int = 0, size: int = -1, keyslot: int | None = None) -> None: + fh.seek(offset) + self._unlock_passphrase(fh.read(size), keyslot) + + def unlock_with_passphrase(self, passphrase: str, keyslot: int | None = None) -> None: + """Unlock this volume with a passphrase and optional keyslot hint.""" + self._unlock_passphrase(passphrase.encode(), keyslot) + + def _unlock_passphrase(self, passphrase: bytes, keyslot: int | None = None) -> None: + """Unlock this volume with a passphrase and optional keyslot hint.""" + keyslots = ( + [(keyslot, self.metadata.keyslots[keyslot])] if keyslot is not None else self.metadata.keyslots.items() + ) + + idx = None + vk = None + errors = [] + for idx, keyslot in keyslots: + try: + key = derive_passphrase_key(passphrase, keyslot) + except Exception as exc: + errors.append((idx, exc)) + continue + + try: + vk = self._unlock_volume_key(key, idx) + except Exception as exc: + errors.append((idx, exc)) + continue + + try: + self.unlock(vk, idx) + break + except ValueError: + continue + else: + if errors: + msg = "\n".join(f"{idx}: {exc}" for idx, exc in errors) + raise ValueError(f"No valid keyslot found, but there were errors for the following keyslots:\n{msg}") + raise ValueError("No valid keyslot found") + + def _unlock_volume_key(self, key: bytes, keyslot: int) -> None: + """Unlock the volume key using the given encryption key and keyslot.""" + keyslot_obj = self.metadata.keyslots[keyslot] + + self.fh.seek(keyslot_obj.area.offset) + area = self.fh.read(keyslot_obj.key_size * keyslot_obj.af.stripes) + + cipher = create_cipher(keyslot_obj.area.encryption, key, keyslot_obj.area.key_size * 8) + return af.merge( + cipher.decrypt(area), + keyslot_obj.key_size, + keyslot_obj.af.stripes, + keyslot_obj.af.hash, + ) + + def _verify_volume_key(self, key: bytes, keyslot: int) -> None: + """Verify the given key for the given keyslot.""" + digest = self.find_digest(keyslot) + if digest.type == "pbkdf2": + result = hashlib.pbkdf2_hmac(digest.hash, key, digest.salt, digest.iterations, len(digest.digest)) + else: + # Only the pbkdf2 type is supported in LUKS2 + raise NotImplementedError(f"Unsupported digest algorithm: {digest.type}") + + return result == digest.digest + + def find_digest(self, keyslot: int) -> Digest: + """Find digest metadata corresponding to the given keyslot.""" + digests = [digest for digest in self.metadata.digests.values() if keyslot in digest.keyslots] + if not digests: + raise ValueError(f"No digest found for keyslot {keyslot}") + + return digests[0] + + def find_segment(self, keyslot: int) -> Segment: + """Find segment metadata corresponding to the given keyslot.""" + digest = self.find_digest(keyslot) + segments = [segment for segment_id, segment in self.metadata.segments.items() if segment_id in digest.segments] + if not segments: + raise ValueError(f"No segment found for keyslot {keyslot}") + + if len(segments) > 1: + raise NotImplementedError(f"Keyslot {keyslot} has more than one segment") + + return segments[0] + + def open(self) -> CryptStream: + """Open this volume and return a readable (decrypted) stream.""" + if not self.unlocked: + raise ValueError("Volume is locked") + + # Technically LUKS supports multiple segments, but practically it only ever has one + # Don't bother with supporting multiple segments for now + segment = self.find_segment(self._active_keyslot_id) + + return CryptStream( + self.fh, + segment.encryption, + self._active_volume_key, + self.metadata.keyslots[self._active_keyslot_id].key_size * 8, + segment.offset, + segment.size, + segment.iv_tweak, + segment.sector_size, + ) + + +def derive_passphrase_key(passphrase: bytes, keyslot: Keyslot) -> bytes: + """Derive a key from a passphrase with the given keyslot KDF information. + + Args: + passphrase: The passphrase to derive a key from. + keyslot: The keyslot to use for the derivation. + """ + kdf = keyslot.kdf + + if kdf.type == "pbkdf2": + return hashlib.pbkdf2_hmac(kdf.hash, passphrase, kdf.salt, kdf.iterations, keyslot.key_size) + elif kdf.type.startswith("argon2"): + return argon2.low_level.hash_secret_raw( + passphrase, + kdf.salt, + kdf.time, + kdf.memory, + kdf.cpus, + keyslot.key_size, + {"argon2i": argon2.low_level.Type.I, "argon2id": argon2.low_level.Type.ID}[kdf.type], + ) + else: + raise NotImplementedError(f"Unsupported kdf algorithm: {kdf.type}") + + +class CryptStream(AlignedStream): + """Transparently decrypting stream. + + Technically this is dm-crypt territory, but it's more practical to place it in the LUKS namespace. + + Args: + fh: The original file-like object, usually the encrypted disk. + cipher: The cipher name/specification. + key: The encryption key. + key_size: Optional key size hint. + offset: Optional base offset to the encrypted region. Segment offset in LUKS. + size: Optional size hint. If ``None`` or ``"dynamic"``, determine the size by seeking to the end of ``fh``. + iv_tweak: Optional IV tweak, or offset. + sector_size: Optional sector size. Defaults to 512. + """ + + def __init__( + self, + fh: BinaryIO, + cipher: str, + key: bytes, + key_size: int | None = None, + offset: int = 0, + size: int | str | None = None, + iv_tweak: int = 0, + sector_size: int = 512, + ): + self.fh = fh + self.cipher = create_cipher(cipher, key, key_size or len(key) * 8, sector_size, 512) + self.offset = offset + self.iv_tweak = iv_tweak + self.sector_size = sector_size + + if size in (None, "dynamic"): + size = fh.seek(0, io.SEEK_END) - offset + + super().__init__(size) + + def _read(self, offset: int, length: int) -> bytes: + self.fh.seek(self.offset + offset) + buf = bytearray(self.fh.read(length)) + self.cipher.decrypt(buf, (offset // 512) + self.iv_tweak, buf) + return bytes(buf) + + +def find_luks_headers(fh: BinaryIO) -> tuple[int | None, int | None, int | None]: + stored_position = fh.tell() + + fh.seek(0) + first_header = None + second_header = None + version = None + + if fh.read(c_luks.LUKS2_MAGIC_L) == LUKS2_MAGIC_1ST: + first_header = 0 + version = int.from_bytes(fh.read(2), "big") + + for offset in SECONDARY_HEADER_OFFSETS: + fh.seek(offset) + if fh.read(c_luks.LUKS2_MAGIC_L) == LUKS2_MAGIC_2ND: + second_header = offset + version = int.from_bytes(fh.read(2), "big") + break + + fh.seek(stored_position) + return first_header, second_header, version + + +def is_luks_volume(fh: BinaryIO) -> bool: + """Return whether the file-like object is a LUKS volume.""" + _, _, version = find_luks_headers(fh) + return version is not None diff --git a/dissect/fve/luks/metadata.py b/dissect/fve/luks/metadata.py new file mode 100644 index 0000000..a2e8ec3 --- /dev/null +++ b/dissect/fve/luks/metadata.py @@ -0,0 +1,234 @@ +# NOTE: We can't really use __future__.annotations in this file because the JsonItem parsing is type hinting based. + +import base64 +import json +from dataclasses import dataclass, field, fields +from typing import Any, Optional, Union, get_args, get_origin + +from dissect.fve.luks.c_luks import c_luks + + +@dataclass +class JsonItem: + _raw: Optional[dict] = field(init=False, repr=False) + + @classmethod + def from_json(cls, obj: str) -> "JsonItem": # Self, but that's >=3.11 + return cls.from_dict(json.loads(obj)) + + @classmethod + def from_dict(cls, obj: dict[str, Union[str, int, dict, list]]) -> "JsonItem": # Self, but that's >=3.11 + kwargs = {} + raw = None + for fld in fields(cls): + if fld.name == "_raw": + raw = obj + continue + + value = obj.get(fld.name, None) + kwargs[fld.name] = JsonItem._parse_type(fld.type, value) + + result = cls(**kwargs) + result._raw = raw + return result + + @staticmethod + def _parse_type(type_: Any, value: Union[str, int, dict, list]) -> Union[str, int, dict, list, bytes]: + result = None + + if type_ == Optional[type_]: + result = JsonItem._parse_type(get_args(type_)[0], value) if value is not None else None + elif get_origin(type_) is Union: + for atype in get_args(type_): + try: + result = JsonItem._parse_type(atype, value) + break + except Exception: + continue + elif get_origin(type_) is list: + vtype = get_args(type_)[0] + result = [JsonItem._parse_type(vtype, v) for v in value] + elif get_origin(type_) is dict: + ktype, vtype = get_args(type_) + result = {JsonItem._parse_type(ktype, k): JsonItem._parse_type(vtype, v) for k, v in value.items()} + elif type_ is bytes: + result = base64.b64decode(value) + elif issubclass(type_, JsonItem): + result = type_.from_dict(value) + else: + result = type_(value) + + return result + + +@dataclass +class Config(JsonItem): + json_size: int + keyslots_size: Optional[int] + flags: Optional[list[str]] + requirements: Optional[list[str]] + + +@dataclass +class KeyslotArea(JsonItem): + type: str + offset: int + size: int + # if type == "raw" + encryption: Optional[str] + key_size: Optional[int] + # type == "datashift-checksum" has all the fields of "checksum" and "datashift" + # if type == "checksum" + hash: Optional[str] + sector_size: Optional[int] + # if type in ("datashift", "datashift-journal") + shift_size: Optional[int] + + +@dataclass +class KeyslotKdf(JsonItem): + type: str + salt: bytes + # if type == "pbkdf2" + hash: Optional[str] + iterations: Optional[int] + # if type in ("argon2i", "argin2id") + time: Optional[int] + memory: Optional[int] + cpus: Optional[int] + + +@dataclass +class KeyslotAf(JsonItem): + type: str + # if type == "luks1" + stripes: Optional[int] + hash: Optional[str] + + +@dataclass +class Keyslot(JsonItem): + type: str + key_size: int + area: KeyslotArea + priority: Optional[int] + # if type == "luks2" + kdf: Optional[KeyslotKdf] + af: Optional[KeyslotAf] + # if type == "reencrypt" + mode: Optional[str] + direction: Optional[str] + + +@dataclass +class Digest(JsonItem): + type: str + keyslots: list[int] + segments: list[int] + salt: bytes + digest: bytes + # if type == "pbkdf2" + hash: Optional[str] + iterations: Optional[int] + + +@dataclass +class SegmentIntegrity(JsonItem): + type: str + journal_encryption: str + journal_integrity: str + + +@dataclass +class Segment(JsonItem): + type: str + offset: int + size: Union[int, str] + flags: Optional[list[str]] + # if type == "crypt" + iv_tweak: Optional[int] + encryption: Optional[str] + sector_size: Optional[int] + integrity: Optional[SegmentIntegrity] + + +@dataclass +class Token(JsonItem): + type: str + keyslots: list[int] + + +@dataclass +class Metadata(JsonItem): + config: Config + keyslots: dict[int, Keyslot] + digests: dict[int, Digest] + segments: dict[int, Segment] + tokens: dict[int, Token] + + @classmethod + def from_luks1_header(self, header: c_luks.luks_phdr) -> "Metadata": + """Map LUKS1 header information into a :class:`Metadata` dataclass.""" + config = Config(0, None, None, None) + keyslots = {} + digests = {} + segments = {} + tokens = {} + + cipher_spec = "-".join(map(lambda v: v.rstrip(b"\x00").decode(), [header.cipherName, header.cipherMode])) + hash_spec = header.hashSpec.rstrip(b"\x00").decode() + + for idx, block in enumerate(header.keyblock): + if block.active == c_luks.LUKS_KEY_DISABLED: + continue + + keyslots[idx] = Keyslot( + type="luks1", + key_size=header.keyBytes, + area=KeyslotArea( + type="raw", + offset=block.keyMaterialOffset * 512, + size=header.keyBytes * block.stripes, + encryption=cipher_spec, + key_size=header.keyBytes, + hash=None, + sector_size=None, + shift_size=None, + ), + priority=None, + kdf=KeyslotKdf( + type="pbkdf2", + salt=block.passwordSalt, + hash=hash_spec, + iterations=block.passwordIterations, + time=None, + memory=None, + cpus=None, + ), + af=KeyslotAf(type="luks1", stripes=block.stripes, hash=hash_spec), + mode=None, + direction=None, + ) + + digests[0] = Digest( + type="pbkdf2", + keyslots=list(keyslots.keys()), + segments=[0], + salt=header.mkDigestSalt, + digest=header.mkDigest, + hash=hash_spec, + iterations=header.mkDigestIterations, + ) + + segments[0] = Segment( + type="crypt", + offset=header.payloadOffset * 512, + size="dynamic", + flags=None, + iv_tweak=0, + encryption=cipher_spec, + sector_size=512, + integrity=None, + ) + + return Metadata(config, keyslots, digests, segments, tokens) diff --git a/dissect/fve/tools/__init__.py b/dissect/fve/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dissect/fve/tools/dd.py b/dissect/fve/tools/dd.py new file mode 100644 index 0000000..5bec4fe --- /dev/null +++ b/dissect/fve/tools/dd.py @@ -0,0 +1,235 @@ +from __future__ import annotations + +import argparse +import io +import sys +import traceback +from pathlib import Path +from typing import BinaryIO + +from dissect.target import container, volume + +from dissect.fve.bde import BDE, is_bde_volume +from dissect.fve.luks import LUKS, is_luks_volume +from dissect.fve.luks.luks import CryptStream + +try: + from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, + ) + + progress = Progress( + TextColumn("[bold blue]{task.fields[filename]}", justify="right"), + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + DownloadColumn(), + "•", + TransferSpeedColumn(), + "•", + TimeRemainingColumn(), + transient=True, + ) + + log = progress.console.log +except ImportError: + + class Progress: + def __init__(self): + self.filename = None + self.total = None + + self.position = 0 + + def __enter__(self): + pass + + def __exit__(self, *args, **kwargs): + sys.stderr.write("\n") + sys.stderr.flush() + + def add_task(self, name, filename, total, **kwargs): + self.filename = filename + self.total = total + + def update(self, task_id, advance): + self.position += advance + + sys.stderr.write(f"\r{self.filename} {(self.position / self.total) * 100:0.2f}%") + sys.stderr.flush() + + import logging + + progress = Progress() + + logger = logging.getLogger(__name__) + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(logging.Formatter("%(message)s")) + stream_handler.setLevel(logging.INFO) + logger.addHandler(stream_handler) + logger.setLevel(logging.INFO) + + log = logger.info + + +def stream( + fhin: BinaryIO, + fhout: BinaryIO, + offset: int, + length: int, + chunk_size: int = io.DEFAULT_BUFFER_SIZE, + task_id: int | None = None, +) -> None: + fhin.seek(offset) + while length != 0: + read_size = min(length, chunk_size) + fhout.write(fhin.read(read_size)) + + progress.update(task_id, advance=read_size) + + length -= read_size + + +def open_fve(vol: BinaryIO, args: argparse.Namespace) -> BinaryIO: + # Currently only BDE and LUKS + if is_bde_volume(vol): + return _open_bde(vol, args) + elif is_luks_volume(vol): + return _open_luks(vol, args) + else: + # Plain volume, return itself + return vol + + +def _open_bde(vol: BinaryIO, args: argparse.Namespace) -> BinaryIO | None: + bde = BDE(vol) + + if bde.has_clear_key(): + bde.unlock_with_clear_key() + else: + if args.passphrase and bde.has_passphrase(): + try: + bde.unlock_with_passphrase(args.passphrase) + log("Unlocked BDE volume with passphrase") + except Exception as e: + log(f"Failed to unlock BDE volume with passphrase: {e}") + + elif args.recovery and bde.has_recovery_password(): + try: + bde.unlock_with_recovery_password(args.recovery) + log("Unlocked BDE volume with recovery password") + except Exception as e: + log(f"Failed to unlock BDE volume with recovery password: {e}") + + elif args.unlock_file: + try: + with args.unlock_file.open("rb") as fh: + bde.unlock_with_bek(fh) + log("Unlocked BDE volume with BEK") + except Exception as e: + log(f"Failed to unlock BDE volume with BEK: {e}") + + if not bde.unlocked: + log("Failed to unlock BDE volume") + else: + return bde.open() + + +def _open_luks(vol: BinaryIO, args: argparse.Namespace) -> BinaryIO | None: + luks = LUKS(vol) + + if args.passphrase: + try: + luks.unlock_with_passphrase(args.passphrase, args.key_slot) + log("Unlocked LUKS volume with passphrase") + except Exception as e: + log(f"Failed to unlock LUKS volume with passphrase: {e}") + elif args.unlock_file: + try: + luks.unlock_with_key_file(args.unlock_file, args.keyfile_offset, args.keyfile_size, args.key_slot) + log("Unlocked LUKS volume with key file") + except Exception as e: + log(f"Failed to unlock LUKS volume with key file: {e}") + + if not luks.unlocked: + log("Failed to unlock LUKS volume") + else: + return luks.open() + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("input", type=Path, help="path to container with encrypted volume") + parser.add_argument("-p", "--passphrase", type=str, help="user passphrase") + parser.add_argument("-r", "--recovery", type=str, help="recovery passphrase") + parser.add_argument("-f", "--unlock-file", type=Path, help="unlock file") + parser.add_argument("--key-slot", type=int, help="LUKS keyslot") + parser.add_argument("--keyfile-offset", type=int, help="LUKS keyfile offset") + parser.add_argument("--keyfile-size", type=int, help="LUKS keyfile size") + parser.add_argument("-o", "--output", type=Path, required=True, help="path to output file") + parser.add_argument("-v", "--verbose", action="count", default=3, help="increase output verbosity") + args = parser.parse_args() + + in_path = args.input.resolve() + + if not in_path.exists(): + parser.exit(f"Input file doesn't exist: {in_path}") + + disk = container.open(in_path) + try: + vs = volume.open(disk) + disk_volumes = vs.volumes + except Exception: + log("Container has no volume system, treating as raw instead") + disk_volumes = [volume.Volume(disk, 1, 0, disk.size, None, None, disk=disk)] + + volumes = [] + for vol in disk_volumes: + fve_vol = None + + try: + fve_vol = open_fve(vol, args) + except Exception: + log(traceback.format_exc()) + log("Exception opening FVE volume") + + if fve_vol is None: + parser.exit(f"Failed to open FVE volume: {vol}") + else: + volumes.append((vol, fve_vol)) + + task_id = progress.add_task("decrypt", start=True, visible=True, filename=in_path.name, total=disk.size) + + offset = 0 + with progress: + with args.output.open("wb") as fh: + for vol, fve_vol in volumes: + if offset != vol.offset: + # We're not to the beginning of the volume yet, fill in + stream(disk, fh, offset, vol.offset - offset, task_id=task_id) + offset = vol.offset + + # Stream the decrypted volume + src_vol = fve_vol or vol + stream(src_vol, fh, 0, src_vol.size, task_id=task_id) + offset += src_vol.size + + if isinstance(fve_vol, CryptStream): + # LUKS volumes don't actually start at the beginning like Bitlocker + offset += fve_vol.offset + + # There's data after the volumes until the end of the disk + if offset != disk.size: + stream(disk, fh, offset, disk.size - offset, task_id=task_id) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + pass diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2203c55 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,67 @@ +[build-system] +requires = ["setuptools>=65.5.0", "setuptools_scm[toml]>=6.4.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "dissect.fve" +description = "A Dissect module implementing a parsers for full volume encryption implementations, currently Linux Unified Key Setup (LUKS1 and LUKS2) and Microsoft's Bitlocker Disk Encryption" +readme = "README.md" +requires-python = "~=3.9" +license.text = "Affero General Public License v3" +authors = [ + {name = "Dissect Team", email = "dissect@fox-it.com"} +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "License :: OSI Approved", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Internet :: Log Analysis", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Security", + "Topic :: Utilities", +] +dependencies = [ + "dissect.cstruct>=4,<5", + "dissect.util>=3,<4", + "pycryptodome", + "argon2-cffi", +] +dynamic = ["version"] + +[project.urls] +homepage = "https://dissect.tools" +documentation = "https://docs.dissect.tools/en/latest/projects/dissect.fve" +repository = "https://github.com/fox-it/dissect.fve" + +[project.optional-dependencies] +full = [ + "dissect.target", + "rich", +] +dev = [ + "dissect.cstruct>=4.0.dev,<5.0.dev", + "dissect.util>=3.0.dev,<4.0.dev", +] + +[project.scripts] +fve-dd = "dissect.fve.tools.dd:main" + +[tool.black] +line-length = 120 + +[tool.isort] +profile = "black" +known_first_party = ["dissect.fve"] +known_third_party = ["dissect"] + +[tool.setuptools] +license-files = ["LICENSE", "COPYRIGHT"] + +[tool.setuptools.packages.find] +include = ["dissect.*"] + +[tool.setuptools_scm] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/_data/bde/aes-xts_128.bin.gz b/tests/_data/bde/aes-xts_128.bin.gz new file mode 100644 index 0000000..a02216d Binary files /dev/null and b/tests/_data/bde/aes-xts_128.bin.gz differ diff --git a/tests/_data/bde/aes-xts_256.bin.gz b/tests/_data/bde/aes-xts_256.bin.gz new file mode 100644 index 0000000..c460d72 Binary files /dev/null and b/tests/_data/bde/aes-xts_256.bin.gz differ diff --git a/tests/_data/bde/aes_128.bin.gz b/tests/_data/bde/aes_128.bin.gz new file mode 100644 index 0000000..dcbdcc6 Binary files /dev/null and b/tests/_data/bde/aes_128.bin.gz differ diff --git a/tests/_data/bde/aes_128_diffuser.bin.gz b/tests/_data/bde/aes_128_diffuser.bin.gz new file mode 100644 index 0000000..b3a4e15 Binary files /dev/null and b/tests/_data/bde/aes_128_diffuser.bin.gz differ diff --git a/tests/_data/bde/aes_256.bin.gz b/tests/_data/bde/aes_256.bin.gz new file mode 100644 index 0000000..7bf450c Binary files /dev/null and b/tests/_data/bde/aes_256.bin.gz differ diff --git a/tests/_data/bde/aes_256_diffuser.bin.gz b/tests/_data/bde/aes_256_diffuser.bin.gz new file mode 100644 index 0000000..1765145 Binary files /dev/null and b/tests/_data/bde/aes_256_diffuser.bin.gz differ diff --git a/tests/_data/bde/decrypted.bin.gz b/tests/_data/bde/decrypted.bin.gz new file mode 100644 index 0000000..16af6ce Binary files /dev/null and b/tests/_data/bde/decrypted.bin.gz differ diff --git a/tests/_data/bde/eow_partial.bin.gz b/tests/_data/bde/eow_partial.bin.gz new file mode 100644 index 0000000..d3c047a Binary files /dev/null and b/tests/_data/bde/eow_partial.bin.gz differ diff --git a/tests/_data/bde/recovery_key.bek b/tests/_data/bde/recovery_key.bek new file mode 100755 index 0000000..60b2ca3 Binary files /dev/null and b/tests/_data/bde/recovery_key.bek differ diff --git a/tests/_data/bde/recovery_key.bin.gz b/tests/_data/bde/recovery_key.bin.gz new file mode 100644 index 0000000..b33ab86 Binary files /dev/null and b/tests/_data/bde/recovery_key.bin.gz differ diff --git a/tests/_data/bde/recovery_password.bin.gz b/tests/_data/bde/recovery_password.bin.gz new file mode 100644 index 0000000..4da0154 Binary files /dev/null and b/tests/_data/bde/recovery_password.bin.gz differ diff --git a/tests/_data/bde/startup_key.bek b/tests/_data/bde/startup_key.bek new file mode 100755 index 0000000..dfa32f0 Binary files /dev/null and b/tests/_data/bde/startup_key.bek differ diff --git a/tests/_data/bde/startup_key.bin.gz b/tests/_data/bde/startup_key.bin.gz new file mode 100644 index 0000000..ce04055 Binary files /dev/null and b/tests/_data/bde/startup_key.bin.gz differ diff --git a/tests/_data/bde/suspended.bin.gz b/tests/_data/bde/suspended.bin.gz new file mode 100644 index 0000000..68117fc Binary files /dev/null and b/tests/_data/bde/suspended.bin.gz differ diff --git a/tests/_data/bde/vista.bin.gz b/tests/_data/bde/vista.bin.gz new file mode 100644 index 0000000..ec2a8bd Binary files /dev/null and b/tests/_data/bde/vista.bin.gz differ diff --git a/tests/_data/bde/win7_partial.bin.gz b/tests/_data/bde/win7_partial.bin.gz new file mode 100644 index 0000000..6575786 Binary files /dev/null and b/tests/_data/bde/win7_partial.bin.gz differ diff --git a/tests/_data/luks1/aes-ecb.bin.gz b/tests/_data/luks1/aes-ecb.bin.gz new file mode 100644 index 0000000..b502312 Binary files /dev/null and b/tests/_data/luks1/aes-ecb.bin.gz differ diff --git a/tests/_data/luks1/sha1.bin.gz b/tests/_data/luks1/sha1.bin.gz new file mode 100644 index 0000000..9e7cfc6 Binary files /dev/null and b/tests/_data/luks1/sha1.bin.gz differ diff --git a/tests/_data/luks2/aes-cbc-essiv.bin.gz b/tests/_data/luks2/aes-cbc-essiv.bin.gz new file mode 100644 index 0000000..b4cd48c Binary files /dev/null and b/tests/_data/luks2/aes-cbc-essiv.bin.gz differ diff --git a/tests/_data/luks2/aes-cbc-plain.bin.gz b/tests/_data/luks2/aes-cbc-plain.bin.gz new file mode 100644 index 0000000..c80d10f Binary files /dev/null and b/tests/_data/luks2/aes-cbc-plain.bin.gz differ diff --git a/tests/_data/luks2/aes-ecb-pbkdf2.bin.gz b/tests/_data/luks2/aes-ecb-pbkdf2.bin.gz new file mode 100644 index 0000000..683f74f Binary files /dev/null and b/tests/_data/luks2/aes-ecb-pbkdf2.bin.gz differ diff --git a/tests/_data/luks2/aes-ecb.bin.gz b/tests/_data/luks2/aes-ecb.bin.gz new file mode 100644 index 0000000..e0cdeae Binary files /dev/null and b/tests/_data/luks2/aes-ecb.bin.gz differ diff --git a/tests/_data/luks2/aes-xts-plain64.bin.gz b/tests/_data/luks2/aes-xts-plain64.bin.gz new file mode 100644 index 0000000..883f6c8 Binary files /dev/null and b/tests/_data/luks2/aes-xts-plain64.bin.gz differ diff --git a/tests/_data/luks2/multiple-slots.bin.gz b/tests/_data/luks2/multiple-slots.bin.gz new file mode 100644 index 0000000..931672d Binary files /dev/null and b/tests/_data/luks2/multiple-slots.bin.gz differ diff --git a/tests/_docs/Makefile b/tests/_docs/Makefile new file mode 100644 index 0000000..69e0098 --- /dev/null +++ b/tests/_docs/Makefile @@ -0,0 +1,24 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= -jauto +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: clean help Makefile + +clean: Makefile + rm -rf api + @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/tests/_docs/conf.py b/tests/_docs/conf.py new file mode 100644 index 0000000..7ef62d3 --- /dev/null +++ b/tests/_docs/conf.py @@ -0,0 +1,34 @@ +extensions = [ + "autoapi.extension", + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.doctest", + "sphinx.ext.napoleon", + "sphinx_argparse_cli", +] + +exclude_patterns = [] + +html_theme = "furo" + +autoapi_type = "python" +autoapi_dirs = ["../../dissect/"] +autoapi_ignore = ["*tests*", "*.tox*", "*venv*", "*examples*"] +autoapi_python_use_implicit_namespaces = True +autoapi_add_toctree_entry = False +autoapi_root = "api" +autoapi_options = [ + "members", + "undoc-members", + "show-inheritance", + "show-module-summary", + "special-members", + "imported-members", +] +autoapi_keep_files = True +autoapi_template_dir = "_templates/autoapi" + +autodoc_typehints = "signature" +autodoc_member_order = "groupwise" + +autosectionlabel_prefix_document = True diff --git a/tests/_docs/index.rst b/tests/_docs/index.rst new file mode 100644 index 0000000..ba16345 --- /dev/null +++ b/tests/_docs/index.rst @@ -0,0 +1,8 @@ +API Reference +============= + +.. toctree:: + :maxdepth: 1 + :glob: + + /api/*/*/index diff --git a/tests/_utils.py b/tests/_utils.py new file mode 100644 index 0000000..c78b00f --- /dev/null +++ b/tests/_utils.py @@ -0,0 +1,17 @@ +import gzip +from pathlib import Path +from typing import BinaryIO, Iterator + + +def absolute_path(filename: str) -> Path: + return Path(__file__).parent / filename + + +def open_file(name: str) -> Iterator[BinaryIO]: + with absolute_path(name).open("rb") as f: + yield f + + +def open_file_gz(name: str) -> Iterator[BinaryIO]: + with gzip.GzipFile(absolute_path(name), "rb") as f: + yield f diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..778daa7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,35 @@ +from typing import BinaryIO, Iterator + +import pytest + +from tests._utils import open_file_gz + + +@pytest.fixture +def bde_aes_128() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/bde/aes_128.bin.gz") + + +@pytest.fixture +def bde_decrypted() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/bde/decrypted.bin.gz") + + +@pytest.fixture +def bde_suspended() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/bde/suspended.bin.gz") + + +@pytest.fixture +def bde_vista() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/bde/vista.bin.gz") + + +@pytest.fixture +def bde_win7_partial() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/bde/win7_partial.bin.gz") + + +@pytest.fixture +def bde_eow_partial() -> Iterator[BinaryIO]: + yield from open_file_gz("_data/bde/eow_partial.bin.gz") diff --git a/tests/test_bde.py b/tests/test_bde.py new file mode 100644 index 0000000..39fcd18 --- /dev/null +++ b/tests/test_bde.py @@ -0,0 +1,217 @@ +import contextlib +import struct +from typing import BinaryIO + +import pytest + +from dissect.fve.bde import bde, c_bde, is_bde_volume +from tests._utils import open_file, open_file_gz + + +def _verify_crypto_stream(bde_obj: bde.BDE) -> None: + stream = bde_obj.open() + assert stream.read(512)[3:11] == b"NTFS " + + # 0x2000 is outside the Vista plain text and the default Bitlocker 2 virtualized region + stream.seek(0x2000) + assert stream.read(512)[:8] == b"FILE0\x00\x03\x00" + + +def _verify_passphrase_crypto(test_file: BinaryIO, passphrase: str, fvek_type: c_bde.FVE_KEY_TYPE) -> None: + bde_obj = bde.BDE(test_file) + + assert bde_obj.information.current_state == bde_obj.information.next_state == c_bde.FVE_STATE.ENCRYPTED + assert bde_obj.information.dataset.fvek_type == fvek_type + assert not bde_obj.unlocked + + assert bde_obj.has_passphrase() + bde_obj.unlock_with_passphrase(passphrase) + assert bde_obj.unlocked + + _verify_crypto_stream(bde_obj) + + +def _verify_recovery_password_crypto( + test_file: BinaryIO, recovery_password: str, fvek_type: c_bde.FVE_KEY_TYPE +) -> None: + bde_obj = bde.BDE(test_file) + + assert bde_obj.encrypted + assert bde_obj.information.current_state == bde_obj.information.next_state == c_bde.FVE_STATE.ENCRYPTED + assert bde_obj.information.dataset.fvek_type == fvek_type + assert not bde_obj.unlocked + + assert bde_obj.has_recovery_password() + bde_obj.unlock_with_recovery_password(recovery_password) + assert bde_obj.unlocked + + _verify_crypto_stream(bde_obj) + + +def _verify_bek_crypto(test_file: BinaryIO, bek_file: BinaryIO, fvek_type: c_bde.FVE_KEY_TYPE) -> None: + bde_obj = bde.BDE(test_file) + + assert bde_obj.encrypted + assert bde_obj.information.current_state == bde_obj.information.next_state == c_bde.FVE_STATE.ENCRYPTED + assert bde_obj.information.dataset.fvek_type == fvek_type + assert not bde_obj.unlocked + + assert bde_obj.has_bek() + bde_obj.unlock_with_bek(bek_file) + assert bde_obj.unlocked + + _verify_crypto_stream(bde_obj) + + +def test_bde_basic(bde_aes_128: BinaryIO) -> None: + bde_obj = bde.BDE(bde_aes_128) + + assert bde_obj.sector_size == 512 + assert bde_obj.version == 2 + + assert len(bde_obj._available_information) == len(bde_obj._valid_information) == 3 + + dataset = bde_obj.information.dataset + assert len(dataset.data) == 4 + assert bde_obj.description() == "DESKTOP-QNI1MMF TestVolume 10/8/2021" + + assert bde_obj.reserved_regions() == [(69504, 128), (69632, 16), (84528, 128), (99544, 128)] + + +def test_bde_decrypted(bde_decrypted: BinaryIO) -> None: + bde_obj = bde.BDE(bde_decrypted) + + assert bde_obj.decrypted + assert bde_obj.information.current_state == bde_obj.information.next_state == c_bde.FVE_STATE.DECRYPTED + assert bde_obj.unlocked + + stream = bde_obj.open() + assert not stream.encrypted + assert stream.read(512)[3:11] == b"NTFS " + + +def test_bde_suspended(bde_suspended: BinaryIO) -> None: + bde_obj = bde.BDE(bde_suspended) + + assert bde_obj.has_clear_key() + bde_obj.unlock_with_clear_key() + + stream = bde_obj.open() + assert stream.encrypted + assert stream.read(512)[3:11] == b"NTFS " + + +@pytest.mark.parametrize( + "test_file, passphrase, key_type", + [ + ("_data/bde/aes_128.bin.gz", "password12!@", c_bde.FVE_KEY_TYPE.AES_128), + ("_data/bde/aes_256.bin.gz", "password12!@", c_bde.FVE_KEY_TYPE.AES_256), + ("_data/bde/aes_128_diffuser.bin.gz", "password12!@", c_bde.FVE_KEY_TYPE.AES_128_DIFFUSER), + ("_data/bde/aes_256_diffuser.bin.gz", "password12!@", c_bde.FVE_KEY_TYPE.AES_256_DIFFUSER), + ("_data/bde/aes-xts_128.bin.gz", "password12!@", c_bde.FVE_KEY_TYPE.AES_XTS_128), + ("_data/bde/aes-xts_256.bin.gz", "password12!@", c_bde.FVE_KEY_TYPE.AES_XTS_256), + ], +) +def test_bde_passphrase(test_file: str, passphrase: str, key_type: c_bde.FVE_KEY_TYPE) -> None: + with contextlib.contextmanager(open_file_gz)(test_file) as fh: + _verify_passphrase_crypto(fh, passphrase, key_type) + + +@pytest.mark.parametrize( + "test_file, recovery, key_type", + [ + ( + "_data/bde/recovery_password.bin.gz", + "284867-596541-514998-422114-660297-261613-215424-199408", + c_bde.FVE_KEY_TYPE.AES_XTS_128, + ), + ], +) +def test_bde_recovery(test_file: str, recovery: str, key_type: c_bde.FVE_KEY_TYPE) -> None: + with contextlib.contextmanager(open_file_gz)(test_file) as fh: + _verify_recovery_password_crypto(fh, recovery, key_type) + + +@pytest.mark.parametrize( + "test_file, bek_file, key_type", + [ + ( + "_data/bde/recovery_key.bin.gz", + "_data/bde/recovery_key.bek", + c_bde.FVE_KEY_TYPE.AES_XTS_128, + ), + ( + "_data/bde/startup_key.bin.gz", + "_data/bde/startup_key.bek", + c_bde.FVE_KEY_TYPE.AES_XTS_128, + ), + ], +) +def test_bde_bek(test_file: str, bek_file: str, key_type: c_bde.FVE_KEY_TYPE) -> None: + with ( + contextlib.contextmanager(open_file_gz)(test_file) as fh, + contextlib.contextmanager(open_file)(bek_file) as bek_fh, + ): + _verify_bek_crypto(fh, bek_fh, key_type) + + +def test_bde_vista(bde_vista: BinaryIO) -> None: + bde_obj = bde.BDE(bde_vista) + + assert bde_obj.version == 1 + + assert bde_obj.has_recovery_password() + bde_obj.unlock_with_recovery_password("517506-503998-044583-576191-587004-635965-501270-087802") + assert bde_obj.unlocked + + stream = bde_obj.open() + bde_obj.fh.seek(0) + + patched_sector = bytearray(bde_obj.fh.read(512)) + bde_sector = stream.read(512) + + patched_sector[0x03:0x0B] = b"NTFS " + patched_sector[0x38:0x40] = struct.pack(" None: + bde_obj = bde.BDE(bde_win7_partial) + + assert bde_obj.version == 2 + + assert bde_obj.has_recovery_password() + bde_obj.unlock_with_recovery_password("131450-120197-153989-250338-511368-495572-680944-381546") + assert bde_obj.unlocked + + stream = bde_obj.open() + assert list(stream._iter_runs(bde_obj.information.state_offset - 512, 1024)) == [ + (stream.RUN_ENCRYPTED, 2234023, 1), + (stream.RUN_PLAIN, 2234024, 1), + ] + + +def test_bde_eow_partial(bde_eow_partial: BinaryIO) -> None: + bde_obj = bde.BDE(bde_eow_partial) + + assert bde_obj.version == 2 + assert bde_obj.eow_information + + assert bde_obj.has_passphrase() + bde_obj.unlock_with_passphrase("password12!@") + assert bde_obj.unlocked + + stream = bde_obj.open() + assert list(stream._iter_runs(0x2202000, 0x800000 * 33)) == [ + (0, 69648, 233328), + (2, 302976, 128), + (0, 303104, 98304), + (3, 401408, 135040), + (2, 536448, 128), + (3, 536576, 73744), + ] + + +def test_is_bde_volume(bde_aes_128: BinaryIO) -> None: + assert is_bde_volume(bde_aes_128) diff --git a/tests/test_bde_eow_bitmap.py b/tests/test_bde_eow_bitmap.py new file mode 100644 index 0000000..97a9979 --- /dev/null +++ b/tests/test_bde_eow_bitmap.py @@ -0,0 +1,26 @@ +import pytest + +from dissect.fve.bde.eow import _iter_bitmap + + +@pytest.mark.parametrize( + "test_input, expected", + [ + ((b"\xFF", 8, 0, 8), [(1, 8)]), + ((b"\xFF", 8, 4, 4), [(1, 4)]), + ((b"\x00", 8, 0, 8), [(0, 8)]), + ((b"\x00", 8, 4, 4), [(0, 4)]), + ((b"\xFF\x00", 16, 0, 8), [(1, 8)]), + ((b"\xFF\x00", 16, 4, 8), [(1, 4), (0, 4)]), + ((b"\x00\x00", 16, 0, 12), [(0, 12)]), + ((b"\x00\xFF", 16, 4, 8), [(0, 4), (1, 4)]), + ((b"\xF0\xF0", 16, 0, 16), [(0, 4), (1, 4), (0, 4), (1, 4)]), + ((b"\x0F\x0F", 16, 0, 16), [(1, 4), (0, 4), (1, 4), (0, 4)]), + ((b"\x00", 8, 0, 6), [(0, 6)]), + ((b"\x00", 8, 1, 6), [(0, 6)]), + ((b"\xFF", 4, 0, 8), [(1, 4)]), + ((b"\xFF", 4, 4, 8), []), + ], +) +def test_bde_eow_bitmap_runs(test_input: tuple[bytes, int, int, int], expected: list[tuple[int, int]]) -> None: + assert list(_iter_bitmap(*test_input)) == expected diff --git a/tests/test_crypto.py b/tests/test_crypto.py new file mode 100644 index 0000000..d92cfa3 --- /dev/null +++ b/tests/test_crypto.py @@ -0,0 +1,269 @@ +import hashlib +from typing import Optional + +import pytest + +from dissect.fve.crypto import create_cipher, elephant, parse_cipher_spec + + +@pytest.mark.parametrize( + ("cipher_spec", "key", "buf", "sector", "expected"), + [ + ( + "aes-ecb", + "f76644d736c85de61d1996523382fb0294c06558a484a306ef5c06aa994a0919", + ( + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + "46a8f2cfca6410fa89bbbce18987bdd546a8f2cfca6410fa89bbbce18987bdd5" + ), + 1, + "6caf38d537984e261527b8caef5f990fb91415a1db917198821a79ed28997973", + ), + ( + "aes-cbc-128-eboiv", + "84c3a3157e5f21dee140005220bc940e", + ( + "30d6c53b40e537d33972ed6ac97292f1308fbc15f77599b5dfd5a58fa52df42d" + "24a187bec736d8818fd4a0f0f6f68a2f6070a1045fb32da7d13ee4d06fb8d2bd" + "206c593497936224ea06960aadd0f8442926ed799d7bfef791037903239c11a5" + "1a090e72146eb551cf3c65d3aa3311f1016471339f3fb38c0e3b74ba489c44ad" + "efb454abfa777e8a4ab4cc11361d197717748fb3d53d5cccc3c873be4d90a656" + "32d4503f87160ea0763652438c03d3523b574fc2a9579b4fdc137352a8351add" + "6f21d9ec4e9dcb16404845556d44ce10358f62112ff44d171ea2078824354690" + "f36e7f3751e618b6f2125f85d89aff9e554dd7c3f4a10a0fbc8a691b5a106759" + "0de812794752021abf8bf022ef9316fd30df475074610a3e73cbd4537e785bff" + "16b19746550285df4201fb018a7ad2c1d3c0be0bd03c2e28b33c7cbe5a304ae1" + "00fe35d02f53e7cbf724c3d6654942c7f87cd29ce94c7b7513d77ba1e6cd6587" + "668ab9242b30a2d3fa194c59e737f8c0c47f0a58c120457574b31b2d5628e3a4" + "95c47d00f456af86e2909b10386d9755e08f8dd45a3b442166d10b938b6c4e06" + "d934230f5957ae1a78dfd9f54425f079fb0b19f10dc13ba8f8957e05d72ca459" + "83fd58f7c68e2f338e64a53d967ddd68cb4eb36837f447005ace7064af086abd" + "e518cdc3c3651da1eb7d723b35c60a72f02cb15b6113206c7fef62a28d8b6302" + ), + 69632, + "330ae9824fdd919fbe5c0eb7f48c9f9df2a002557eb5daa2f8be93d25f4dccc5", + ), + ( + "aes-cbc-256-eboiv", + "dd3885ef9948c8dc6ad1b54a6c4a4b6fb74b44d1d9775ac7ed186c35f1b59022", + ( + "7018bff0f32d94d4463a0c0ef56825538d2f98f4be7230f240b861678da36e0c" + "30b1a2d71f7d3d3e6e3fc13cb3ec0e9fef09ac344e0c57c9d4aa72c1e7ccb266" + "fb5b9bdacb5f5ddd3db573593e045ec1efb2702831098f30f2d88ab6c6bb03ac" + "af0ada116ea4fe8acb5df6f1c48daabac5d6ac89333e15205dd0e55ba0fb9451" + "89635b16549ee2b8777e7de971dea1a779d9ecbb973b1697af8619c9924874f9" + "86aaebbaa41dec271809f344ac45f0a4d8aef2d22619bbd52cc91e2e3ff66577" + "798a979aa6397ce22f2bbbaed4e57ff0dd202f6273183a454db9d16a87531320" + "c1632fda49a1265929df4d6ece0c385330162cd211677f168e969e913cae39b9" + "70e16e3b2166b6c3f69f73e04451468d72781aa09002eca7734faa04f11377f9" + "552e057fc8dd6b750ab507737bee6235517075588bb67dd1e2ad792bcfd79b91" + "9edf36e6ecef33cd4e654252495d766c47514694cacf1e298a66ac24bbbbd851" + "7bf4874f587b7a6229d01099c9b6b50d29b1d8746a68b8c5c3bcf9f4b4591298" + "5feb89e8f28ccb8b2fab1e6038a3ea25b76fdb047b5e08216464871d9e9c51a2" + "4dc634466a8cbb8babd4e150ff3243df5a12b17af5128078a9a03f8601029037" + "cccc89c70e13c93ec29fa6435b23d75452623a709b3c207a84769d5dea5f3d5c" + "f130170ec80bbaf7717324a87af1e81a7a1d915b35fd3d019aa6b51357dded85" + ), + 69632, + "1c9af69f51facaf1588143be07b2bffe0bfd4be0036b3d569e13f68020a7e469", + ), + ( + "aes-cbc-128-elephant", + ( + "10730f695df62a49cd3aa1b1c9ae3edf2229c338a3740830e2b19d2b83f9cada" + "268af0e0613921085edc89e1b804de354fd265acf4e5c410b47764bb9565666b" + ), + ( + "2895d4deed18488410dc2cd5db6bbf422fa34ac01a2a1322cdcf62ee0767c3b2" + "d4288ed284b4f819d2a1c37618fc9a1408932d8400bda39aaba2d611399220b1" + "6fa0bc2f791b87ab4adbd5cf6e6fcf1821f62fafb33750b2056da73d35356ac0" + "eee55bda92582720474bbaac2ff360d4fa168c49984d306ee73bc074a1978b97" + "f09f37d5aeb81d9452b5d04326b490e4e7e3c362cbad3467fe9a9cb6a2d67cf1" + "17c4d482bfc9abeb6b68f694a55c11ccb33bf879ba5e318f4b0f7433d0971a0a" + "18533531d818eb0b5980596d98e07bf82bb126ac531a738013eb7b00a1ab8306" + "6177afd93617c5b31378990e47004974343bd76cdac9e56e6e05a4f831b2655e" + "8263042dbbcc13b4b44f35a862f46f72816c76a36a684399b3ed76a53cdf225b" + "c9693a8de876c871c9479b2cd519838ab2fafe27c9503e154745a3473e3d4ce5" + "c958203cb27bfb24f7da9765ff922a885897bc0835d52e7ee4ff044a47ced516" + "5537ac88f10c5d59f905ea4ffb6ff424ca2e578520c59042a774830063527d75" + "2d5f8272b08160b2e184928876c7befbd9ca9d4c674b90c1b19ea23d01271d3c" + "62f5aac812af0dd44c30c7bda69bd45c01b68f677c6ee416417e5437e799c776" + "b264998dbeb65713269789c7443ae7f57c826eaa6a60116fb0b2950dff4ffc05" + "1282fa9ae6f4c20ce1738ea898f70cd48dbf9e9325673b250e2deaa50408fb47" + ), + 85072, + "d94ac3b56f5307afa9350882f9b5d84401d1eac2dc0a27300376c2e5c80172d8", + ), + ( + "aes-cbc-256-elephant", + ( + "3a600625f8fd5cc506cf8b30c8ca0600cc32f0c6b54c140789f7518c4fb5c71b" + "a272f34f1a920d5be247298b5d233ce6199023c24d0aefec28717232f9894d1f" + ), + ( + "8568f934436237a034086e7acbb9b32f278c7fd1f9803d1d8d08000e8c9b6398" + "0f9b72261175c71ec4b8f5a9e99830334a96460250d4a3030e2866341ac65e33" + "39307198fcb64ed438592804148b917c5cfc2d0bb00c99f89568d68f914321d5" + "4f8b70f6d7835f9b287f43b1ab99c5cadae38de1fb27f389a2ae22930aeb2269" + "0ca3e176af6b1adfdee1d82229768179bec35729125287a2ab20be958c468a1d" + "973e7b157d6562444dd4d7ae2ba21094597d98b8665af025e859dc7568a6cef4" + "f615e02516eac6d4e82479a51548220707ba6a9fe13edf4797b1986085884f52" + "b50f55bbb41f351ba87e388bbb11406a19e4c3fac1407d727c7aa95d41b40b1e" + "bb996cecc7f329faad33af6c5f3e98e18078ab1242a2bc0e9ed90835e75730c9" + "3697d57bf1c68204238ff099050108f1f6ec3fec06ff8f239b5089409a3ca10e" + "8aa37180b62049cd5ce586e57f7c02afcf10f3ddd8328e9810a0c29df3d983b3" + "dbef7cde8cfbc9f0f3f2aece11b332b8593bf94e66af9b3b44f01f056d4c462d" + "868123c2b88dcd3fba831f9b9240ab0f985d1cf42d97504d535fa44edd6c371b" + "5e56daadfbc2ba27db9ca65045d1eae97bc4f131f51a67baab3344c4af9eeab3" + "f784b6f0bf1456d9bb0c0863ce7d6f8305ba2cc908fbef535d6a716cc583e934" + "8d07ab73a517e015d52a2d9c496f8776b259995c3851e27c9776a2aa5b6520cb" + ), + 85072, + "d62b8dfa4eb68d5507bc446cb34cd1d5bc1a2425c2dd609c9920c5ec9d87308d", + ), + ( + "aes-xts-128-plain64", + "4eb949c473f0edfc379ad041670ddb9c4da0abdb4482a2c8bb47250493aa1ed5", + ( + "138bae29d1e47e38411a65675406cb9bc18f5eae362c3cdb6b58b9b39bad18d6" + "cd4e3aad995f5681e0949914355e200701bf3d9ad8de9b8ed245be2b7c364b7c" + "37c918181c25ea64fa88f8bb048a87122ea028c82fa05c18caa979d33b86808c" + "18d6791c25eec448960aa4f98a666176a1b1eb50d8a0b96be740b51117b8f278" + "18f5bf5afa794f2908b942bf3eb9725336db7c089a350186ef76c8e6035ca2b7" + "257fb006776990b2304d0c98d6a923d170b621b343dc8be02d71cc4d18706e0e" + "358e745cb0700f4f8250b3f3ebbe5889ed89d804051933e60c80c8ce038b6090" + "5f3f6d23228b91162885dd8bcce2ff3e6f498acf2f4f349be7c931f4e4a1d9f5" + "4e34a5754057df547377418f9e002c30e1e77ed623eddb11a1b7448569af9866" + "34a12e8e71ddddd7395f3d9de5c7e9019eafc864914750f909b5298e14029b51" + "9f4ee34c60be2f45d0b600e1e2f29e9c05c4c0a7ef6be1e82922d699dfcc61e8" + "285850b080e4b9a151abc996a2571a689e9bb2ea53995e4c3191c81f50f8463b" + "6f9e28f6cdd671c220472e0e5a5c4026baaed99c4bb170a2a9708d9974dba25e" + "c102b881307dfa13c7a774243e66b499386523b426a575961444b91f85d7ab60" + "b860a56fbe2c799fc4e29af79c27d2010431caa414fbb3acd3bae79f076f0974" + "f548dd52f3273081b2a17695a0d4f365a4988ed5dc0bcc5503f63bc86ed613c5" + ), + 69632, + "01a264035f380f8edef7f377747cd4dbacff4be8fee23d6e9b8021766c9a3c0f", + ), + ( + "aes-xts-256-plain64", + ( + "c74002df41f5eadeee2549fc009233a2a510726ce08736aba2f84a52ac6e7bbc" + "56b8a824a4dc26cf9c4c2926386319d17427998e045ebfdc789e328e0dc97da4" + ), + ( + "d14389175de9e1ad3b7d9bd4605e16880489ad8ee851bad735c35acef1bc8f3a" + "ff2ec2ee373a77c20fb471e7c9c177f213823629fbec4a369bf3aa58cf510a9f" + "b7a36b9121894c502b6d5d12b5467e4babcb50fc852f03f01e419d96645f6763" + "ad008285a35764fe2efba129bc217ab65a8890be355f33f9ff3d5d86cba22574" + "369cace995fc792cb0186e84f9a138f65b57a28358d6a29a3b0e1a08451c1b4b" + "8d2d1526dc986a6a509ca2d37ac971bfd11e0ca8f9098a3d16c46649d0bd6cc7" + "10f44960a290b3f1d6380dfa03167c7035e10537158be830ae7ea3d2c01ff0fd" + "6da772ca4d152dcdc12fdccc1b7cfc8578f4b052b6e7661717d8d374bc9f0bec" + "cc9b04c3c30ce7acd30919f354b7549f2c023608d3d8c5519abb3e90d4c3ad35" + "f25c7185ca4a5a4becffebcf8959349dca6c63cfc3ee6c195d92516fa68f5a27" + "68b63b7e10bba4e97f9252a2cdc1ba7765ba01e6351afb82c6cfff3abc9e84f9" + "5ca5a59d477d2b8bc90eacd4f3efc483068c52e340c88651d623178835aa084a" + "095ef3e0c45b2752594693ffc7b63b2fe85182fa674672ec22bff855b3e6e13d" + "c2af8ee44edde26c497be718bb51bd99e9188a3f071d10e2ba30a6f84dbfbf33" + "14e8b5a670a8c96f3eb7efb35cc26e3fdd7b669e216f4b55dc4cb5d8f197069b" + "d1ff2e796de40b3a5b04f3674a03a66e9065c1e8f3586d329623cc35e6a2b024" + ), + 69632, + "7d456ac1e51d71caa82560bbe343985120f82f4fce2c397e860af946be4471a7", + ), + ], +) +def test_crypto_ciphers(cipher_spec: str, key: str, buf: str, sector: int, expected: str) -> None: + cipher = create_cipher(cipher_spec, bytes.fromhex(key)) + + buf = bytes.fromhex(buf) + out = cipher.decrypt(buf, sector) + + assert hashlib.sha256(out).hexdigest() == expected + + cipher = create_cipher(cipher_spec, bytes.fromhex(key)) + assert cipher.encrypt(out, sector) == buf + + +def test_crypto_elephant_diffuser_a() -> None: + buffer = bytearray(b"a" * 512) + view = memoryview(buffer) + + elephant.diffuser_a_encrypt(view, 512) + + assert hashlib.sha256(buffer).hexdigest() == "f58aa15c1219f893c4ed355d363d8f831bcc0c4a82c6bbffcca321aada9e86ec" + + elephant.diffuser_a_decrypt(view, 512) + + assert buffer == b"a" * 512 + + +def test_crypto_elephant_diffuser_b() -> None: + buffer = bytearray(b"a" * 512) + view = memoryview(buffer) + + elephant.diffuser_b_encrypt(view, 512) + + assert hashlib.sha256(buffer).hexdigest() == "1d5a51ae0d0b6309f1f8661376af9ebd880b1274601f6841f5aaeb5273580133" + + elephant.diffuser_b_decrypt(view, 512) + + assert buffer == b"a" * 512 + + +@pytest.mark.parametrize( + ("spec", "key_size", "key_size_hint", "expected"), + [ + ("aes", 128, None, ("aes", "cbc", 128, "plain", None)), + ("aes-cbc", 128, None, ("aes", "cbc", 128, "plain", None)), + ("aes-cbc", None, 256, ("aes", "cbc", 256, "plain", None)), + ("aes-cbc", 128, 256, ("aes", "cbc", 128, "plain", None)), + ("aes-cbc-256", 128, 256, ("aes", "cbc", 128, "plain", None)), + ("aes-cbc-256", None, None, ("aes", "cbc", 256, "plain", None)), + ("aes-cbc-256", 128, None, ("aes", "cbc", 128, "plain", None)), + ("aes-cbc-256-eboiv", None, None, ("aes", "cbc", 256, "eboiv", None)), + ( + "aes-cbc-256-essiv:sha256", + None, + None, + ("aes", "cbc", 256, "essiv", "sha256"), + ), + ("aes-cbc-essiv:sha256", 128, None, ("aes", "cbc", 128, "essiv", "sha256")), + ("aes-cbc-essiv:sha256", None, 128, ("aes", "cbc", 128, "essiv", "sha256")), + ("aes-xts-plain64", None, 512, ("aes", "xts", 256, "plain64", None)), + ("aes-xts-plain64", 128, 512, ("aes", "xts", 128, "plain64", None)), + ("aes-xts-256-plain64", None, None, ("aes", "xts", 256, "plain64", None)), + ], +) +def test_crypto_parse_cipher_spec( + spec: str, + key_size: Optional[int], + key_size_hint: Optional[int], + expected: tuple[str, str, int, str, Optional[str]], +) -> None: + assert parse_cipher_spec(spec, key_size, key_size_hint) == expected + + +def test_crypto_parse_cipher_spec_invalid() -> None: + with pytest.raises(ValueError) as exc: + parse_cipher_spec("aes") + assert str(exc.value) == "Missing key size" + + with pytest.raises(ValueError) as exc: + parse_cipher_spec("aes-cbc-garbage-essiv") + assert str(exc.value) == "Unexpected cipher spec format" diff --git a/tests/test_luks.py b/tests/test_luks.py new file mode 100644 index 0000000..063eed9 --- /dev/null +++ b/tests/test_luks.py @@ -0,0 +1,42 @@ +import contextlib +from typing import BinaryIO + +import pytest + +from dissect.fve.luks.luks import LUKS +from tests._utils import open_file_gz + + +def _verify_crypto_stream(luks_obj: LUKS) -> None: + stream = luks_obj.open() + for i in range(4): + assert stream.read(512) == bytes([i] * 512) + + +def _verify_passphrase_crypto(test_file: BinaryIO, passphrase: str, cipher_type: str) -> None: + luks_obj = LUKS(test_file) + + assert not luks_obj.unlocked + assert luks_obj.keyslots + luks_obj.unlock_with_passphrase(passphrase) + + assert luks_obj.find_segment(luks_obj._active_keyslot_id).encryption == cipher_type + _verify_crypto_stream(luks_obj) + + +@pytest.mark.parametrize( + ("test_file", "password", "cipher"), + [ + ("_data/luks1/aes-ecb.bin.gz", "password", "aes-ecb"), + ("_data/luks1/sha1.bin.gz", "password", "aes-ecb"), + ("_data/luks2/aes-cbc-plain.bin.gz", "password", "aes-cbc-plain"), + ("_data/luks2/aes-cbc-essiv.bin.gz", "password", "aes-cbc-essiv:sha256"), + ("_data/luks2/aes-ecb-pbkdf2.bin.gz", "password", "aes-ecb"), + ("_data/luks2/aes-xts-plain64.bin.gz", "password", "aes-xts-plain64"), + ("_data/luks2/multiple-slots.bin.gz", "password", "aes-cbc-plain"), + ("_data/luks2/multiple-slots.bin.gz", "another", "aes-cbc-plain"), + ], +) +def test_luks(test_file: str, password: str, cipher: str) -> None: + with contextlib.contextmanager(open_file_gz)(test_file) as fh: + _verify_passphrase_crypto(fh, password, cipher) diff --git a/tests/test_luks_kdf.py b/tests/test_luks_kdf.py new file mode 100644 index 0000000..3cc7434 --- /dev/null +++ b/tests/test_luks_kdf.py @@ -0,0 +1,67 @@ +from dissect.fve.luks.luks import derive_passphrase_key +from dissect.fve.luks.metadata import Keyslot + + +def test_luks_kdf_pbkdf2() -> None: + keyslot = Keyslot.from_dict( + { + "type": "luks2", + "key_size": 32, + "af": {"type": "luks1", "stripes": 4000, "hash": "sha256"}, + "area": {"type": "raw", "offset": "32768", "size": "131072", "encryption": "aes-ecb", "key_size": 32}, + "kdf": { + "type": "pbkdf2", + "hash": "sha256", + "iterations": 3426718, + "salt": "fmh2v7DaJ2D/tFkvvGB+mogBu3s+tUpDuKaf0vQyqIA=", + }, + } + ) + + assert derive_passphrase_key(b"password", keyslot) == bytes.fromhex( + "05cff19a3cbc2d5612bdbcaee94db532b95b2cd33a997d1f2b30ffb166f302a6" + ) + + +def test_luks_kdf_argon2i() -> None: + keyslot = Keyslot.from_dict( + { + "type": "luks2", + "key_size": 32, + "af": {"type": "luks1", "stripes": 4000, "hash": "sha256"}, + "area": {"type": "raw", "offset": "32768", "size": "131072", "encryption": "aes-ecb", "key_size": 32}, + "kdf": { + "type": "argon2i", + "time": 5, + "memory": 1048576, + "cpus": 4, + "salt": "fsv0tZWR6Q/WkidkWY6p0jiP1A+am8CH8h3D8gYEoYE=", + }, + } + ) + + assert derive_passphrase_key(b"password", keyslot) == bytes.fromhex( + "37f6085467d330749f59ea348491908a4faddabcb67523efc649419ecb52bd94" + ) + + +def test_luks_kdf_argon2id() -> None: + keyslot = Keyslot.from_dict( + { + "type": "luks2", + "key_size": 32, + "af": {"type": "luks1", "stripes": 4000, "hash": "sha256"}, + "area": {"type": "raw", "offset": "32768", "size": "131072", "encryption": "aes-ecb", "key_size": 32}, + "kdf": { + "type": "argon2id", + "time": 5, + "memory": 1048576, + "cpus": 4, + "salt": "yssVZKHcjdSON4vM096WRyzZBWfz5Pf+a08cRzPdcZg=", + }, + } + ) + + assert derive_passphrase_key(b"password", keyslot) == bytes.fromhex( + "15b7b31551e6df90e6dbc58bb5dbc5f82efa8f36fdab9422f8a9bdfa72a8af85" + ) diff --git a/tests/test_luks_metadata.py b/tests/test_luks_metadata.py new file mode 100644 index 0000000..485d580 --- /dev/null +++ b/tests/test_luks_metadata.py @@ -0,0 +1,46 @@ +from dissect.fve.luks.metadata import Metadata + + +def test_luks_metadata() -> None: + obj = { + "keyslots": { + "0": { + "type": "luks2", + "key_size": 32, + "af": {"type": "luks1", "stripes": 4000, "hash": "sha256"}, + "area": {"type": "raw", "offset": "32768", "size": "131072", "encryption": "aes-ecb", "key_size": 32}, + "kdf": { + "type": "argon2id", + "time": 5, + "memory": 1048576, + "cpus": 4, + "salt": "yssVZKHcjdSON4vM096WRyzZBWfz5Pf+a08cRzPdcZg=", + }, + } + }, + "tokens": {}, + "segments": { + "0": { + "type": "crypt", + "offset": "16777216", + "size": "dynamic", + "iv_tweak": "0", + "encryption": "aes-ecb", + "sector_size": 512, + } + }, + "digests": { + "0": { + "type": "pbkdf2", + "keyslots": ["0"], + "segments": ["0"], + "hash": "sha256", + "iterations": 217366, + "salt": "uvTVgMNRd82F6+o5onDIqPtlqNb7N2Ah8ygqPUiK7k0=", + "digest": "DChjy4pamOC06wmGlIJFzZk2hZgocGQ+BXumiMRTlRU=", + } + }, + "config": {"json_size": "12288", "keyslots_size": "16744448"}, + } + + assert Metadata.from_dict(obj) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..bfcf133 --- /dev/null +++ b/tox.ini @@ -0,0 +1,79 @@ +[tox] +envlist = lint, py3, pypy3 +# This version of tox will autoprovision itself and the requirements defined in +# requires if they are not available on the host system. This requires the +# locally installed tox to have a minimum version 3.3.0. This means the names +# of the configuration options are still according to the tox 3.x syntax. +minversion = 4.4.3 +# This version of virtualenv will install setuptools version 68.2.2 and pip +# 23.3.1. These versions fully support python projects defined only through a +# pyproject.toml file (PEP-517/PEP-518/PEP-621). This pip version also support +# the proper version resolving with (sub-)dependencies defining dev extra's. +requires = virtualenv>=20.24.6 + +[testenv] +extras = dev +deps = + pytest + pytest-cov + coverage +commands = + pytest --basetemp="{envtmpdir}" {posargs:--color=yes --cov=dissect --cov-report=term-missing -v tests} + coverage report + coverage xml + +[testenv:build] +package = skip +deps = + build +commands = + pyproject-build + +[testenv:fix] +package = skip +deps = + black==23.1.0 + isort==5.11.4 +commands = + black dissect tests + isort dissect tests + +[testenv:lint] +package = skip +deps = + black==23.1.0 + flake8 + flake8-black + flake8-isort + isort==5.11.4 + vermin +commands = + flake8 dissect tests + vermin -t=3.9- --no-tips --lint dissect tests + +[flake8] +max-line-length = 120 +extend-ignore = + # See https://github.com/PyCQA/pycodestyle/issues/373 + E203, +statistics = True + +[testenv:docs-build] +allowlist_externals = make +deps = + sphinx + sphinx-autoapi + sphinx_argparse_cli + sphinx-copybutton + sphinx-design + furo +commands = + make -C tests/docs clean + make -C tests/docs html + +[testenv:docs-linkcheck] +allowlist_externals = make +deps = {[testenv:docs-build]deps} +commands = + make -C tests/docs clean + make -C tests/docs linkcheck