From 87bbbbc68d42285ee3fd9bf4e279dde193b43fce Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 24 Jul 2025 14:40:09 +0900 Subject: [PATCH 01/15] adding ci files --- .github/workflows/kernel_build.yml | 32 +++ .github/workflows/run-blktests.yml | 148 +++++++++++++ CODE_OF_CONDUCT.md | 134 ++++++++++++ LICENSE | 339 +++++++++++++++++++++++++++++ LICENSES/GPL-2.0-or-later.txt | 339 +++++++++++++++++++++++++++++ README.md | 11 + 6 files changed, 1003 insertions(+) create mode 100644 .github/workflows/kernel_build.yml create mode 100644 .github/workflows/run-blktests.yml create mode 100644 CODE_OF_CONDUCT.md create mode 100644 LICENSE create mode 100644 LICENSES/GPL-2.0-or-later.txt create mode 100644 README.md diff --git a/.github/workflows/kernel_build.yml b/.github/workflows/kernel_build.yml new file mode 100644 index 0000000000000..ad344105ec434 --- /dev/null +++ b/.github/workflows/kernel_build.yml @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2025 Western Digital Corporation or its affiliates. + +name: blktests-ci + +on: + pull_request: + +jobs: + build-kernel: + runs-on: ubuntu-latest + steps: + - name: Configure git + run: | + git config --global --add safe.directory '*' + - name: Checkout git + run: | + sudo apt-get install -y libelf-dev + mkdir -p linux + cd linux + git init + git remote add origin https://github.com/${{ github.repository }} + git fetch origin --depth=5 ${{ github.event.pull_request.head.sha }} + git reset --hard ${{ github.event.pull_request.head.sha }} + git log -1 + - name: Build kernel + run: | + cd linux + make defconfig + make -j 8 + diff --git a/.github/workflows/run-blktests.yml b/.github/workflows/run-blktests.yml new file mode 100644 index 0000000000000..45d1047046867 --- /dev/null +++ b/.github/workflows/run-blktests.yml @@ -0,0 +1,148 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2025 Western Digital Corporation or its affiliates. +# +# Authors: Dennis Maisenbacher (dennis.maisenbacher@wdc.com) + +name: Run blktests + +on: + pull_request: + +env: + KERNEL_REF: "${{ github.event.pull_request.head.sha }}" + KERNEL_TREE: "https://github.com/${{ github.repository }}" + +#This workflow requires an actions-runner-controllers (ARC) to be active. +#The k8s cluster of this ARC needs KubeVirt to be installed. +jobs: + build-and-test-kernel: + #This step runs in a container in the k8s cluster + runs-on: arc-vm-linux-blktests + steps: + - name: Checkout blktests-ci + uses: actions/checkout@v4 + with: + repository: linux-blktests/blktests-ci + path: blktests-ci + + - name: Build kernel and package it into a containerimage + run: | + cd blktests-ci/playbooks/roles/kernel-builder-k8s-job/templates + docker build \ + --build-arg KERNEL_TREE=${KERNEL_TREE} \ + --build-arg KERNEL_REF=${KERNEL_REF} \ + -t linux-kernel-containerdisk \ + -f Dockerfile.linux-kernel-containerdisk . 2>&1 | tee build.log + #Setting KERNEL_VERSION var which is latern needed for notifying the VM what kernel to pick up + cat build.log | grep KERNEL_VERSION | awk '{print $3}' | grep KERNEL_VERSION >> $GITHUB_ENV + + - name: Push the new Fedora containerimage with the freshly build kernel + run: | + docker tag linux-kernel-containerdisk registry-service.docker-registry.svc.cluster.local/linux-kernel-containerdisk:${KERNEL_VERSION} + docker push registry-service.docker-registry.svc.cluster.local/linux-kernel-containerdisk:${KERNEL_VERSION} + + - name: Run in VM + uses: ./blktests-ci/.github/actions/kubevirt-action + with: + kernel_version: ${{ env.KERNEL_VERSION }} + run_cmds: | + #Print VM debug info + uname -a + cat /etc/os-release + lsblk + + #Install build dependencies for blktests + sudo dnf install -y gcc \ + clang \ + make \ + util-linux \ + llvm \ + gawk \ + fio \ + udev \ + kmod \ + coreutils \ + gcc \ + gzip \ + e2fsprogs \ + xfsprogs \ + f2fs-tools \ + btrfs-progs \ + device-mapper-multipath \ + blktrace \ + kernel-headers \ + liburing \ + liburing-devel \ + nbd \ + device-mapper \ + ktls-utils \ + dosfstools \ + mdadm \ + bc \ + libnl3-cli \ + cryptsetup \ + sg3_utils \ + pciutils \ + unzip \ + jq \ + nvme-cli \ + git \ + wget + + git clone https://github.com/linux-blktests/blktests.git + + cd blktests + make + + #ATTENTION! This section formats all available NVMe devices. Be careful when changing the `runs-on` tag! + #This step runs in a VM with the previously compiled kernel in the k8s cluster + + # Run blktests block group + cat > config << EOF + TEST_DEVS=(${BDEV0}) + EOF + export RUN_ZONED_TESTS=1 + sudo ./check block + + # Run blktests nvme group + cat > config << EOF + TEST_DEVS=(${BDEV0}) + NVMET_TRTYPES="loop rdma tcp" + EOF + sudo ./check nvme + + # Run blktests scsi group + cat > config << EOF + TEST_DEVS=() + export RUN_ZONED_TESTS=1 + EOF + sudo ./check scsi + + # Run blktests misc groups + cat > config << EOF + TEST_DEVS=() + EOF + sudo ./check dm loop md nbd throtl ublk + + # Run ZBD blktests without TEST_DEVS + cat > config << EOF + TEST_DEVS=() + EOF + export RUN_ZONED_TESTS=1 + sudo ./check zbd + + # Run ZBD blktests with TEST_DEVS + cat > config << EOF + TEST_DEVS=(${ZBD0}) + DEVICE_ONLY=1 + EOF + + export RUN_ZONED_TESTS=1 + sudo ./check zbd + + # Run blktests srp group + cat > config << EOF + TEST_DEVS=() + EOF + sudo ./check srp diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000..4edb70c22d479 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,134 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +opensource@wdc.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000..d159169d10508 --- /dev/null +++ b/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/LICENSES/GPL-2.0-or-later.txt b/LICENSES/GPL-2.0-or-later.txt new file mode 100644 index 0000000000000..d159169d10508 --- /dev/null +++ b/LICENSES/GPL-2.0-or-later.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000..339ce7da25ed4 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ + + +# blktests-kpd-ci +This is a collection of GitHub actions workflow scripts for Continuous +Integration (CI) of Linux kernel using blktests. The scripts are intended to be +triggered by [Kernel Patches Daemon](https://github.com/kernel-patches/kernel-patches-daemon). +To use the scripts, specify this repository and the main branch as "ci_repo" and +"ci_branch" parameters respectively in the Kernel Patches Daemon config file. From 99ca72c2f43f8eb3c0f27822c7a31ac03a6f6eb3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:25 +0200 Subject: [PATCH 02/15] iomap: header diet Drop various unused #include statements. Signed-off-by: Christoph Hellwig Reviewed-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 8 -------- fs/iomap/direct-io.c | 5 ----- fs/iomap/fiemap.c | 3 --- fs/iomap/iter.c | 1 - fs/iomap/seek.c | 4 ---- fs/iomap/swapfile.c | 3 --- fs/iomap/trace.c | 1 - 7 files changed, 25 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fb4519158f3a7..38263b8f4bb86 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -3,18 +3,10 @@ * Copyright (C) 2010 Red Hat, Inc. * Copyright (C) 2016-2023 Christoph Hellwig. */ -#include -#include -#include #include -#include -#include #include -#include #include #include -#include -#include #include #include "internal.h" #include "trace.h" diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 844261a31156c..6f25d4cfea9f7 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -3,14 +3,9 @@ * Copyright (C) 2010 Red Hat, Inc. * Copyright (c) 2016-2025 Christoph Hellwig. */ -#include -#include -#include #include #include #include -#include -#include #include #include "internal.h" #include "trace.h" diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index 80675c42e94e1..d11dadff82865 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -2,9 +2,6 @@ /* * Copyright (c) 2016-2021 Christoph Hellwig. */ -#include -#include -#include #include #include #include diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index 6ffc6a7b9ba50..cef77ca0c20bc 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -3,7 +3,6 @@ * Copyright (C) 2010 Red Hat, Inc. * Copyright (c) 2016-2021 Christoph Hellwig. */ -#include #include #include "trace.h" diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c index 04d7919636c1f..56db2dd4b10dd 100644 --- a/fs/iomap/seek.c +++ b/fs/iomap/seek.c @@ -3,12 +3,8 @@ * Copyright (C) 2017 Red Hat, Inc. * Copyright (c) 2018-2021 Christoph Hellwig. */ -#include -#include -#include #include #include -#include static int iomap_seek_hole_iter(struct iomap_iter *iter, loff_t *hole_pos) diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c index c1a762c10ce4c..0db77c449467a 100644 --- a/fs/iomap/swapfile.c +++ b/fs/iomap/swapfile.c @@ -3,9 +3,6 @@ * Copyright (C) 2018 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ -#include -#include -#include #include #include diff --git a/fs/iomap/trace.c b/fs/iomap/trace.c index 728d5443daf58..da217246b1a94 100644 --- a/fs/iomap/trace.c +++ b/fs/iomap/trace.c @@ -3,7 +3,6 @@ * Copyright (c) 2019 Christoph Hellwig */ #include -#include /* * We include this last to have the helpers above available for the trace From f367c1bf93d7c60d6a4d24ca1a3bc5ec4833e0df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:26 +0200 Subject: [PATCH 03/15] iomap: pass more arguments using the iomap writeback context Add inode and wpc fields to pass the inode and writeback context that are needed in the entire writeback call chain, and let the callers initialize all fields in the writeback context before calling iomap_writepages to simplify the argument passing. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Joanne Koong Reviewed-by: Johannes Thumshirn Reviewed-by: "Darrick J. Wong" --- block/fops.c | 8 +++++-- fs/gfs2/aops.c | 8 +++++-- fs/iomap/buffered-io.c | 52 +++++++++++++++++++----------------------- fs/xfs/xfs_aops.c | 24 +++++++++++++------ fs/zonefs/file.c | 8 +++++-- include/linux/iomap.h | 6 ++--- 6 files changed, 61 insertions(+), 45 deletions(-) diff --git a/block/fops.c b/block/fops.c index 1309861d4c2c4..3394263d942b7 100644 --- a/block/fops.c +++ b/block/fops.c @@ -558,9 +558,13 @@ static const struct iomap_writeback_ops blkdev_writeback_ops = { static int blkdev_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct iomap_writepage_ctx wpc = { }; + struct iomap_writepage_ctx wpc = { + .inode = mapping->host, + .wbc = wbc, + .ops = &blkdev_writeback_ops + }; - return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops); + return iomap_writepages(&wpc); } const struct address_space_operations def_blk_aops = { diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 14f204cd5a82f..47d74afd63ac9 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -159,7 +159,11 @@ static int gfs2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); - struct iomap_writepage_ctx wpc = { }; + struct iomap_writepage_ctx wpc = { + .inode = mapping->host, + .wbc = wbc, + .ops = &gfs2_writeback_ops, + }; int ret; /* @@ -168,7 +172,7 @@ static int gfs2_writepages(struct address_space *mapping, * want balance_dirty_pages() to loop indefinitely trying to write out * pages held in the ail that it can't find. */ - ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops); + ret = iomap_writepages(&wpc); if (ret == 0 && wbc->nr_to_write > 0) set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags); return ret; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 38263b8f4bb86..fd6cee35c9483 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1621,20 +1621,19 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) } static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct inode *inode, loff_t pos, - u16 ioend_flags) + loff_t pos, u16 ioend_flags) { struct bio *bio; bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, - REQ_OP_WRITE | wbc_to_write_flags(wbc), + REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), GFP_NOFS, &iomap_ioend_bioset); bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); bio->bi_end_io = iomap_writepage_end_bio; - bio->bi_write_hint = inode->i_write_hint; - wbc_init_bio(wbc, bio); + bio->bi_write_hint = wpc->inode->i_write_hint; + wbc_init_bio(wpc->wbc, bio); wpc->nr_folios = 0; - return iomap_init_ioend(inode, bio, pos, ioend_flags); + return iomap_init_ioend(wpc->inode, bio, pos, ioend_flags); } static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, @@ -1673,9 +1672,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, * writepage context that the caller will need to submit. */ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct folio *folio, - struct inode *inode, loff_t pos, loff_t end_pos, - unsigned len) + struct folio *folio, loff_t pos, loff_t end_pos, unsigned len) { struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); @@ -1696,8 +1693,7 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, error = iomap_submit_ioend(wpc, 0); if (error) return error; - wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos, - ioend_flags); + wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); } if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) @@ -1751,24 +1747,24 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; - wbc_account_cgroup_owner(wbc, folio, len); + wbc_account_cgroup_owner(wpc->wbc, folio, len); return 0; } static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct folio *folio, - struct inode *inode, u64 pos, u64 end_pos, - unsigned dirty_len, unsigned *count) + struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len, + unsigned *count) { int error; do { unsigned map_len; - error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len); + error = wpc->ops->map_blocks(wpc, wpc->inode, pos, dirty_len); if (error) break; - trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap); + trace_iomap_writepage_map(wpc->inode, pos, dirty_len, + &wpc->iomap); map_len = min_t(u64, dirty_len, wpc->iomap.offset + wpc->iomap.length - pos); @@ -1782,8 +1778,8 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, case IOMAP_HOLE: break; default: - error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos, - end_pos, map_len); + error = iomap_add_to_ioend(wpc, folio, pos, end_pos, + map_len); if (!error) (*count)++; break; @@ -1865,10 +1861,10 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, } static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct folio *folio) + struct folio *folio) { struct iomap_folio_state *ifs = folio->private; - struct inode *inode = folio->mapping->host; + struct inode *inode = wpc->inode; u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); u64 end_aligned = 0; @@ -1915,8 +1911,8 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, */ end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { - error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, - pos, end_pos, rlen, &count); + error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos, + rlen, &count); if (error) break; pos += rlen; @@ -1952,10 +1948,9 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, } int -iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, - struct iomap_writepage_ctx *wpc, - const struct iomap_writeback_ops *ops) +iomap_writepages(struct iomap_writepage_ctx *wpc) { + struct address_space *mapping = wpc->inode->i_mapping; struct folio *folio = NULL; int error; @@ -1967,9 +1962,8 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, PF_MEMALLOC)) return -EIO; - wpc->ops = ops; - while ((folio = writeback_iter(mapping, wbc, folio, &error))) - error = iomap_writepage_map(wpc, wbc, folio); + while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) + error = iomap_writepage_map(wpc, folio); return iomap_submit_ioend(wpc, error); } EXPORT_SYMBOL_GPL(iomap_writepages); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 63151feb9c3fd..65485a52df3b3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -636,19 +636,29 @@ xfs_vm_writepages( xfs_iflags_clear(ip, XFS_ITRUNCATED); if (xfs_is_zoned_inode(ip)) { - struct xfs_zoned_writepage_ctx xc = { }; + struct xfs_zoned_writepage_ctx xc = { + .ctx = { + .inode = mapping->host, + .wbc = wbc, + .ops = &xfs_zoned_writeback_ops + }, + }; int error; - error = iomap_writepages(mapping, wbc, &xc.ctx, - &xfs_zoned_writeback_ops); + error = iomap_writepages(&xc.ctx); if (xc.open_zone) xfs_open_zone_put(xc.open_zone); return error; } else { - struct xfs_writepage_ctx wpc = { }; - - return iomap_writepages(mapping, wbc, &wpc.ctx, - &xfs_writeback_ops); + struct xfs_writepage_ctx wpc = { + .ctx = { + .inode = mapping->host, + .wbc = wbc, + .ops = &xfs_writeback_ops + }, + }; + + return iomap_writepages(&wpc.ctx); } } diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 42e2c0065bb30..edca4bbe4b723 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -152,9 +152,13 @@ static const struct iomap_writeback_ops zonefs_writeback_ops = { static int zonefs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct iomap_writepage_ctx wpc = { }; + struct iomap_writepage_ctx wpc = { + .inode = mapping->host, + .wbc = wbc, + .ops = &zonefs_writeback_ops, + }; - return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); + return iomap_writepages(&wpc); } static int zonefs_swap_activate(struct swap_info_struct *sis, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 522644d62f30f..00179c9387c5d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -448,6 +448,8 @@ struct iomap_writeback_ops { struct iomap_writepage_ctx { struct iomap iomap; + struct inode *inode; + struct writeback_control *wbc; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; u32 nr_folios; /* folios added to the ioend */ @@ -461,9 +463,7 @@ void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); -int iomap_writepages(struct address_space *mapping, - struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, - const struct iomap_writeback_ops *ops); +int iomap_writepages(struct iomap_writepage_ctx *wpc); /* * Flags for direct I/O ->end_io: From f4802651d1c65d60c79f38facd03075d47b3f725 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 10 Jul 2025 15:33:27 +0200 Subject: [PATCH 04/15] iomap: cleanup the pending writeback tracking in iomap_writepage_map_blocks We don't care about the count of outstanding ioends, just if there is one. Replace the count variable passed to iomap_writepage_map_blocks with a boolean to make that more clear. Signed-off-by: Joanne Koong [hch: rename the variable, update the commit message] Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Johannes Thumshirn Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fd6cee35c9483..8b87e60a99f56 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1753,7 +1753,7 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len, - unsigned *count) + bool *wb_pending) { int error; @@ -1781,7 +1781,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, error = iomap_add_to_ioend(wpc, folio, pos, end_pos, map_len); if (!error) - (*count)++; + *wb_pending = true; break; } dirty_len -= map_len; @@ -1868,7 +1868,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); u64 end_aligned = 0; - unsigned count = 0; + bool wb_pending = false; int error = 0; u32 rlen; @@ -1912,13 +1912,13 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos, - rlen, &count); + rlen, &wb_pending); if (error) break; pos += rlen; } - if (count) + if (wb_pending) wpc->nr_folios++; /* @@ -1940,7 +1940,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, if (atomic_dec_and_test(&ifs->write_bytes_pending)) folio_end_writeback(folio); } else { - if (!count) + if (!wb_pending) folio_end_writeback(folio); } mapping_set_error(inode->i_mapping, error); From f98a6ad0d7fc716d6c1958888173367a886c7e79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:28 +0200 Subject: [PATCH 05/15] iomap: refactor the writeback interface Replace ->map_blocks with a new ->writeback_range, which differs in the following ways: - it must also queue up the I/O for writeback, that is called into the slightly refactored and extended in scope iomap_add_to_ioend for each region - can handle only a part of the requested region, that is the retry loop for partial mappings moves to the caller - handles cleanup on failures as well, and thus also replaces the discard_folio method only implemented by XFS. This will allow to use the iomap writeback code also for file systems that are not block based like fuse. Co-developed-by: Joanne Koong Signed-off-by: Joanne Koong Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: "Darrick J. Wong" Acked-by: Damien Le Moal # zonefs --- .../filesystems/iomap/operations.rst | 32 ++--- block/fops.c | 25 ++-- fs/gfs2/bmap.c | 26 ++-- fs/iomap/buffered-io.c | 96 ++++++------- fs/iomap/trace.h | 2 +- fs/xfs/xfs_aops.c | 128 +++++++++++------- fs/zonefs/file.c | 28 ++-- include/linux/iomap.h | 21 ++- 8 files changed, 197 insertions(+), 161 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 3b628e370d88b..f07c8fdb20465 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -271,7 +271,7 @@ writeback. It does not lock ``i_rwsem`` or ``invalidate_lock``. The dirty bit will be cleared for all folios run through the -``->map_blocks`` machinery described below even if the writeback fails. +``->writeback_range`` machinery described below even if the writeback fails. This is to prevent dirty folio clots when storage devices fail; an ``-EIO`` is recorded for userspace to collect via ``fsync``. @@ -283,15 +283,14 @@ The ``ops`` structure must be specified and is as follows: .. code-block:: c struct iomap_writeback_ops { - int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned len); - int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); - void (*discard_folio)(struct folio *folio, loff_t pos); + int (*writeback_range)(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 pos, unsigned int len, u64 end_pos); + int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); }; The fields are as follows: - - ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file + - ``writeback_range``: Sets ``wpc->iomap`` to the space mapping of the file range (in bytes) given by ``offset`` and ``len``. iomap calls this function for each dirty fs block in each dirty folio, though it will `reuse mappings @@ -306,6 +305,15 @@ The fields are as follows: This revalidation must be open-coded by the filesystem; it is unclear if ``iomap::validity_cookie`` can be reused for this purpose. + + If this methods fails to schedule I/O for any part of a dirty folio, it + should throw away any reservations that may have been made for the write. + The folio will be marked clean and an ``-EIO`` recorded in the + pagecache. + Filesystems can use this callback to `remove + `_ + delalloc reservations to avoid having delalloc reservations for + clean pagecache. This function must be supplied by the filesystem. - ``submit_ioend``: Allows the file systems to hook into writeback bio @@ -316,18 +324,6 @@ The fields are as follows: transactions from process context before submitting the bio. This function is optional. - - ``discard_folio``: iomap calls this function after ``->map_blocks`` - fails to schedule I/O for any part of a dirty folio. - The function should throw away any reservations that may have been - made for the write. - The folio will be marked clean and an ``-EIO`` recorded in the - pagecache. - Filesystems can use this callback to `remove - `_ - delalloc reservations to avoid having delalloc reservations for - clean pagecache. - This function is optional. - Pagecache Writeback Completion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/block/fops.c b/block/fops.c index 3394263d942b7..b500ff8f55dd3 100644 --- a/block/fops.c +++ b/block/fops.c @@ -537,22 +537,29 @@ static void blkdev_readahead(struct readahead_control *rac) iomap_readahead(rac, &blkdev_iomap_ops); } -static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) +static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, unsigned int len, u64 end_pos) { - loff_t isize = i_size_read(inode); + loff_t isize = i_size_read(wpc->inode); if (WARN_ON_ONCE(offset >= isize)) return -EIO; - if (offset >= wpc->iomap.offset && - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; - return blkdev_iomap_begin(inode, offset, isize - offset, - IOMAP_WRITE, &wpc->iomap, NULL); + + if (offset < wpc->iomap.offset || + offset >= wpc->iomap.offset + wpc->iomap.length) { + int error; + + error = blkdev_iomap_begin(wpc->inode, offset, isize - offset, + IOMAP_WRITE, &wpc->iomap, NULL); + if (error) + return error; + } + + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); } static const struct iomap_writeback_ops blkdev_writeback_ops = { - .map_blocks = blkdev_map_blocks, + .writeback_range = blkdev_writeback_range, }; static int blkdev_writepages(struct address_space *mapping, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 7703d04711392..0cc41de54aba6 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2469,23 +2469,25 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) return error; } -static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned int len) +static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, unsigned int len, u64 end_pos) { - int ret; - - if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode)))) + if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode)))) return -EIO; - if (offset >= wpc->iomap.offset && - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; + if (offset < wpc->iomap.offset || + offset >= wpc->iomap.offset + wpc->iomap.length) { + int ret; - memset(&wpc->iomap, 0, sizeof(wpc->iomap)); - ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap); - return ret; + memset(&wpc->iomap, 0, sizeof(wpc->iomap)); + ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap); + if (ret) + return ret; + } + + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); } const struct iomap_writeback_ops gfs2_writeback_ops = { - .map_blocks = gfs2_map_blocks, + .writeback_range = gfs2_writeback_range, }; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 8b87e60a99f56..ecc221592acea 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1671,14 +1671,30 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, * At the end of a writeback pass, there will be a cached ioend remaining on the * writepage context that the caller will need to submit. */ -static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, - struct folio *folio, loff_t pos, loff_t end_pos, unsigned len) +ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, + loff_t pos, loff_t end_pos, unsigned int dirty_len) { struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); unsigned int ioend_flags = 0; + unsigned int map_len = min_t(u64, dirty_len, + wpc->iomap.offset + wpc->iomap.length - pos); int error; + trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap); + + WARN_ON_ONCE(!folio->private && map_len < dirty_len); + + switch (wpc->iomap.type) { + case IOMAP_INLINE: + WARN_ON_ONCE(1); + return -EIO; + case IOMAP_HOLE: + return map_len; + default: + break; + } + if (wpc->iomap.type == IOMAP_UNWRITTEN) ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) @@ -1696,11 +1712,11 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); } - if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) + if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff)) goto new_ioend; if (ifs) - atomic_add(len, &ifs->write_bytes_pending); + atomic_add(map_len, &ifs->write_bytes_pending); /* * Clamp io_offset and io_size to the incore EOF so that ondisk @@ -1743,63 +1759,39 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, * Note that this defeats the ability to chain the ioends of * appending writes. */ - wpc->ioend->io_size += len; + wpc->ioend->io_size += map_len; if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; - wbc_account_cgroup_owner(wpc->wbc, folio, len); - return 0; + wbc_account_cgroup_owner(wpc->wbc, folio, map_len); + return map_len; } +EXPORT_SYMBOL_GPL(iomap_add_to_ioend); -static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, - struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len, +static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 pos, u32 rlen, u64 end_pos, bool *wb_pending) { - int error; - do { - unsigned map_len; - - error = wpc->ops->map_blocks(wpc, wpc->inode, pos, dirty_len); - if (error) - break; - trace_iomap_writepage_map(wpc->inode, pos, dirty_len, - &wpc->iomap); + ssize_t ret; - map_len = min_t(u64, dirty_len, - wpc->iomap.offset + wpc->iomap.length - pos); - WARN_ON_ONCE(!folio->private && map_len < dirty_len); + ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos); + if (WARN_ON_ONCE(ret == 0 || ret > rlen)) + return -EIO; + if (ret < 0) + return ret; + rlen -= ret; + pos += ret; - switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - error = -EIO; - break; - case IOMAP_HOLE: - break; - default: - error = iomap_add_to_ioend(wpc, folio, pos, end_pos, - map_len); - if (!error) - *wb_pending = true; - break; - } - dirty_len -= map_len; - pos += map_len; - } while (dirty_len && !error); + /* + * Holes are not be written back by ->writeback_range, so track + * if we did handle anything that is not a hole here. + */ + if (wpc->iomap.type != IOMAP_HOLE) + *wb_pending = true; + } while (rlen); - /* - * We cannot cancel the ioend directly here on error. We may have - * already set other pages under writeback and hence we have to run I/O - * completion to mark the error state of the pages under writeback - * appropriately. - * - * Just let the file system know what portion of the folio failed to - * map. - */ - if (error && wpc->ops->discard_folio) - wpc->ops->discard_folio(folio, pos); - return error; + return 0; } /* @@ -1911,8 +1903,8 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, */ end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { - error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos, - rlen, &wb_pending); + error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, + &wb_pending); if (error) break; pos += rlen; diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 455cc6f90be03..aaea02c9560a3 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -169,7 +169,7 @@ DEFINE_EVENT(iomap_class, name, \ DEFINE_IOMAP_EVENT(iomap_iter_dstmap); DEFINE_IOMAP_EVENT(iomap_iter_srcmap); -TRACE_EVENT(iomap_writepage_map, +TRACE_EVENT(iomap_add_to_ioend, TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len, struct iomap *iomap), TP_ARGS(inode, pos, dirty_len, iomap), diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 65485a52df3b3..f6d44ab78442a 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -233,6 +233,47 @@ xfs_end_bio( spin_unlock_irqrestore(&ip->i_ioend_lock, flags); } +/* + * We cannot cancel the ioend directly on error. We may have already set other + * pages under writeback and hence we have to run I/O completion to mark the + * error state of the pages under writeback appropriately. + * + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. + * + * We prevent this by truncating away the delalloc regions on the folio. Because + * they are delalloc, we can do this without needing a transaction. Indeed - if + * we get ENOSPC errors, we have to be able to do this truncation without a + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). + */ +static void +xfs_discard_folio( + struct folio *folio, + loff_t pos) +{ + struct xfs_inode *ip = XFS_I(folio->mapping->host); + struct xfs_mount *mp = ip->i_mount; + + if (xfs_is_shutdown(mp)) + return; + + xfs_alert_ratelimited(mp, + "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", + folio, ip->i_ino, pos); + + /* + * The end of the punch range is always the offset of the first + * byte of the next folio. Hence the end offset is only dependent on the + * folio itself and not the start offset that is passed in. + */ + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, + folio_pos(folio) + folio_size(folio), NULL); +} + /* * Fast revalidation of the cached writeback mapping. Return true if the current * mapping is valid, false otherwise. @@ -278,13 +319,12 @@ xfs_imap_valid( static int xfs_map_blocks( struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; - ssize_t count = i_blocksize(inode); + ssize_t count = i_blocksize(wpc->inode); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_fileoff_t cow_fsb; @@ -436,6 +476,24 @@ xfs_map_blocks( return 0; } +static ssize_t +xfs_writeback_range( + struct iomap_writepage_ctx *wpc, + struct folio *folio, + u64 offset, + unsigned int len, + u64 end_pos) +{ + ssize_t ret; + + ret = xfs_map_blocks(wpc, offset, len); + if (!ret) + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); + if (ret < 0) + xfs_discard_folio(folio, offset); + return ret; +} + static bool xfs_ioend_needs_wq_completion( struct iomap_ioend *ioend) @@ -488,47 +546,9 @@ xfs_submit_ioend( return 0; } -/* - * If the folio has delalloc blocks on it, the caller is asking us to punch them - * out. If we don't, we can leave a stale delalloc mapping covered by a clean - * page that needs to be dirtied again before the delalloc mapping can be - * converted. This stale delalloc mapping can trip up a later direct I/O read - * operation on the same region. - * - * We prevent this by truncating away the delalloc regions on the folio. Because - * they are delalloc, we can do this without needing a transaction. Indeed - if - * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why - * we see a ENOSPC in writeback). - */ -static void -xfs_discard_folio( - struct folio *folio, - loff_t pos) -{ - struct xfs_inode *ip = XFS_I(folio->mapping->host); - struct xfs_mount *mp = ip->i_mount; - - if (xfs_is_shutdown(mp)) - return; - - xfs_alert_ratelimited(mp, - "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", - folio, ip->i_ino, pos); - - /* - * The end of the punch range is always the offset of the first - * byte of the next folio. Hence the end offset is only dependent on the - * folio itself and not the start offset that is passed in. - */ - xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, - folio_pos(folio) + folio_size(folio), NULL); -} - static const struct iomap_writeback_ops xfs_writeback_ops = { - .map_blocks = xfs_map_blocks, + .writeback_range = xfs_writeback_range, .submit_ioend = xfs_submit_ioend, - .discard_folio = xfs_discard_folio, }; struct xfs_zoned_writepage_ctx { @@ -545,11 +565,10 @@ XFS_ZWPC(struct iomap_writepage_ctx *ctx) static int xfs_zoned_map_blocks( struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + len); @@ -608,6 +627,24 @@ xfs_zoned_map_blocks( return 0; } +static ssize_t +xfs_zoned_writeback_range( + struct iomap_writepage_ctx *wpc, + struct folio *folio, + u64 offset, + unsigned int len, + u64 end_pos) +{ + ssize_t ret; + + ret = xfs_zoned_map_blocks(wpc, offset, len); + if (!ret) + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); + if (ret < 0) + xfs_discard_folio(folio, offset); + return ret; +} + static int xfs_zoned_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -621,9 +658,8 @@ xfs_zoned_submit_ioend( } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { - .map_blocks = xfs_zoned_map_blocks, + .writeback_range = xfs_zoned_writeback_range, .submit_ioend = xfs_zoned_submit_ioend, - .discard_folio = xfs_discard_folio, }; STATIC int diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index edca4bbe4b723..c88e2c8517533 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -124,29 +124,33 @@ static void zonefs_readahead(struct readahead_control *rac) * Map blocks for page writeback. This is used only on conventional zone files, * which implies that the page range can only be within the fixed inode size. */ -static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, - unsigned int len) +static ssize_t zonefs_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, unsigned len, u64 end_pos) { - struct zonefs_zone *z = zonefs_inode_zone(inode); + struct zonefs_zone *z = zonefs_inode_zone(wpc->inode); if (WARN_ON_ONCE(zonefs_zone_is_seq(z))) return -EIO; - if (WARN_ON_ONCE(offset >= i_size_read(inode))) + if (WARN_ON_ONCE(offset >= i_size_read(wpc->inode))) return -EIO; /* If the mapping is already OK, nothing needs to be done */ - if (offset >= wpc->iomap.offset && - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; + if (offset < wpc->iomap.offset || + offset >= wpc->iomap.offset + wpc->iomap.length) { + int error; + + error = zonefs_write_iomap_begin(wpc->inode, offset, + z->z_capacity - offset, IOMAP_WRITE, + &wpc->iomap, NULL); + if (error) + return error; + } - return zonefs_write_iomap_begin(inode, offset, - z->z_capacity - offset, - IOMAP_WRITE, &wpc->iomap, NULL); + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); } static const struct iomap_writeback_ops zonefs_writeback_ops = { - .map_blocks = zonefs_write_map_blocks, + .writeback_range = zonefs_writeback_range, }; static int zonefs_writepages(struct address_space *mapping, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 00179c9387c5d..625d7911a2b5f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -416,18 +416,20 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) struct iomap_writeback_ops { /* - * Required, maps the blocks so that writeback can be performed on - * the range starting at offset. + * Required, performs writeback on the passed in range * - * Can return arbitrarily large regions, but we need to call into it at + * Can map arbitrarily large regions, but we need to call into it at * least once per folio to allow the file systems to synchronize with * the write path that could be invalidating mappings. * * An existing mapping from a previous call to this method can be reused * by the file system if it is still valid. + * + * Returns the number of bytes processed or a negative errno. */ - int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned len); + ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 pos, unsigned int len, + u64 end_pos); /* * Optional, allows the file systems to hook into bio submission, @@ -438,12 +440,6 @@ struct iomap_writeback_ops { * the bio could not be submitted. */ int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); - - /* - * Optional, allows the file system to discard state on a page where - * we failed to submit any I/O. - */ - void (*discard_folio)(struct folio *folio, loff_t pos); }; struct iomap_writepage_ctx { @@ -463,6 +459,9 @@ void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); +ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, + loff_t pos, loff_t end_pos, unsigned int dirty_len); + int iomap_writepages(struct iomap_writepage_ctx *wpc); /* From 19ae4fa835fd882a5283546ed56281217b809ac9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:29 +0200 Subject: [PATCH 06/15] iomap: hide ioends from the generic writeback code Replace the ioend pointer in iomap_writeback_ctx with a void *wb_ctx one to facilitate non-block, non-ioend writeback for use. Rename the submit_ioend method to writeback_submit and make it mandatory so that the generic writeback code stops seeing ioends and bios. Co-developed-by: Joanne Koong Signed-off-by: Joanne Koong Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: "Darrick J. Wong" Acked-by: Damien Le Moal --- .../filesystems/iomap/operations.rst | 17 ++-- block/fops.c | 1 + fs/gfs2/bmap.c | 1 + fs/iomap/buffered-io.c | 91 ++++++++++--------- fs/xfs/xfs_aops.c | 60 ++++++------ fs/zonefs/file.c | 1 + include/linux/iomap.h | 19 ++-- 7 files changed, 100 insertions(+), 90 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index f07c8fdb20465..4b93c5f7841a5 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -284,8 +284,8 @@ The ``ops`` structure must be specified and is as follows: struct iomap_writeback_ops { int (*writeback_range)(struct iomap_writepage_ctx *wpc, - struct folio *folio, u64 pos, unsigned int len, u64 end_pos); - int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); + struct folio *folio, u64 pos, unsigned int len, u64 end_pos); + int (*writeback_submit)(struct iomap_writepage_ctx *wpc, int error); }; The fields are as follows: @@ -316,13 +316,15 @@ The fields are as follows: clean pagecache. This function must be supplied by the filesystem. - - ``submit_ioend``: Allows the file systems to hook into writeback bio - submission. + - ``writeback_submit``: Submit the previous built writeback context. + Block based file systems should use the iomap_ioend_writeback_submit + helper, other file system can implement their own. + File systems can optionall to hook into writeback bio submission. This might include pre-write space accounting updates, or installing a custom ``->bi_end_io`` function for internal purposes, such as deferring the ioend completion to a workqueue to run metadata update transactions from process context before submitting the bio. - This function is optional. + This function must be supplied by the filesystem. Pagecache Writeback Completion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -336,10 +338,9 @@ If the write failed, it will also set the error bits on the folios and the address space. This can happen in interrupt or process context, depending on the storage device. - Filesystems that need to update internal bookkeeping (e.g. unwritten -extent conversions) should provide a ``->submit_ioend`` function to -set ``struct iomap_end::bio::bi_end_io`` to its own function. +extent conversions) should set their own bi_end_io on the bios +submitted by ``->submit_writeback`` This function should call ``iomap_finish_ioends`` after finishing its own work (e.g. unwritten extent conversion). diff --git a/block/fops.c b/block/fops.c index b500ff8f55dd3..0845737c0320a 100644 --- a/block/fops.c +++ b/block/fops.c @@ -560,6 +560,7 @@ static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc, static const struct iomap_writeback_ops blkdev_writeback_ops = { .writeback_range = blkdev_writeback_range, + .writeback_submit = iomap_ioend_writeback_submit, }; static int blkdev_writepages(struct address_space *mapping, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 0cc41de54aba6..86045d3577b71 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2490,4 +2490,5 @@ static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc, const struct iomap_writeback_ops gfs2_writeback_ops = { .writeback_range = gfs2_writeback_range, + .writeback_submit = iomap_ioend_writeback_submit, }; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ecc221592acea..dac6d19f941db 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1574,7 +1574,7 @@ u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) return folio_count; } -static void iomap_writepage_end_bio(struct bio *bio) +static void ioend_writeback_end_bio(struct bio *bio) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); @@ -1583,42 +1583,30 @@ static void iomap_writepage_end_bio(struct bio *bio) } /* - * Submit an ioend. - * - * If @error is non-zero, it means that we have a situation where some part of - * the submission process has failed after we've marked pages for writeback. - * We cannot cancel ioend directly in that case, so call the bio end I/O handler - * with the error status here to run the normal I/O completion handler to clear - * the writeback bit and let the file system proess the errors. + * We cannot cancel the ioend directly in case of an error, so call the bio end + * I/O handler with the error status here to run the normal I/O completion + * handler. */ -static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) +int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error) { - if (!wpc->ioend) - return error; + struct iomap_ioend *ioend = wpc->wb_ctx; - /* - * Let the file systems prepare the I/O submission and hook in an I/O - * comletion handler. This also needs to happen in case after a - * failure happened so that the file system end I/O handler gets called - * to clean up. - */ - if (wpc->ops->submit_ioend) { - error = wpc->ops->submit_ioend(wpc, error); - } else { - if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) - error = -EIO; - if (!error) - submit_bio(&wpc->ioend->io_bio); - } + if (!ioend->io_bio.bi_end_io) + ioend->io_bio.bi_end_io = ioend_writeback_end_bio; + + if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) + error = -EIO; if (error) { - wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); - bio_endio(&wpc->ioend->io_bio); + ioend->io_bio.bi_status = errno_to_blk_status(error); + bio_endio(&ioend->io_bio); + return error; } - wpc->ioend = NULL; - return error; + submit_bio(&ioend->io_bio); + return 0; } +EXPORT_SYMBOL_GPL(iomap_ioend_writeback_submit); static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, u16 ioend_flags) @@ -1629,7 +1617,6 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), GFP_NOFS, &iomap_ioend_bioset); bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); - bio->bi_end_io = iomap_writepage_end_bio; bio->bi_write_hint = wpc->inode->i_write_hint; wbc_init_bio(wpc->wbc, bio); wpc->nr_folios = 0; @@ -1639,16 +1626,17 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, u16 ioend_flags) { + struct iomap_ioend *ioend = wpc->wb_ctx; + if (ioend_flags & IOMAP_IOEND_BOUNDARY) return false; if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != - (wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) + (ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) return false; - if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) + if (pos != ioend->io_offset + ioend->io_size) return false; if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && - iomap_sector(&wpc->iomap, pos) != - bio_end_sector(&wpc->ioend->io_bio)) + iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio)) return false; /* * Limit ioend bio chain lengths to minimise IO completion latency. This @@ -1674,6 +1662,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len) { + struct iomap_ioend *ioend = wpc->wb_ctx; struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); unsigned int ioend_flags = 0; @@ -1704,15 +1693,17 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) ioend_flags |= IOMAP_IOEND_BOUNDARY; - if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { + if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { new_ioend: - error = iomap_submit_ioend(wpc, 0); - if (error) - return error; - wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); + if (ioend) { + error = wpc->ops->writeback_submit(wpc, 0); + if (error) + return error; + } + wpc->wb_ctx = ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); } - if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff)) + if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) goto new_ioend; if (ifs) @@ -1759,9 +1750,9 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, * Note that this defeats the ability to chain the ioends of * appending writes. */ - wpc->ioend->io_size += map_len; - if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) - wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; + ioend->io_size += map_len; + if (ioend->io_offset + ioend->io_size > end_pos) + ioend->io_size = end_pos - ioend->io_offset; wbc_account_cgroup_owner(wpc->wbc, folio, map_len); return map_len; @@ -1956,6 +1947,18 @@ iomap_writepages(struct iomap_writepage_ctx *wpc) while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) error = iomap_writepage_map(wpc, folio); - return iomap_submit_ioend(wpc, error); + + /* + * If @error is non-zero, it means that we have a situation where some + * part of the submission process has failed after we've marked pages + * for writeback. + * + * We cannot cancel the writeback directly in that case, so always call + * ->writeback_submit to run the I/O completion handler to clear the + * writeback bit and let the file system proess the errors. + */ + if (wpc->wb_ctx) + return wpc->ops->writeback_submit(wpc, error); + return error; } EXPORT_SYMBOL_GPL(iomap_writepages); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f6d44ab78442a..1ee4f835ac3c3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -514,41 +514,40 @@ xfs_ioend_needs_wq_completion( } static int -xfs_submit_ioend( - struct iomap_writepage_ctx *wpc, - int status) +xfs_writeback_submit( + struct iomap_writepage_ctx *wpc, + int error) { - struct iomap_ioend *ioend = wpc->ioend; - unsigned int nofs_flag; + struct iomap_ioend *ioend = wpc->wb_ctx; /* - * We can allocate memory here while doing writeback on behalf of - * memory reclaim. To avoid memory allocation deadlocks set the - * task-wide nofs context for the following operations. + * Convert CoW extents to regular. + * + * We can allocate memory here while doing writeback on behalf of memory + * reclaim. To avoid memory allocation deadlocks, set the task-wide + * nofs context. */ - nofs_flag = memalloc_nofs_save(); + if (!error && (ioend->io_flags & IOMAP_IOEND_SHARED)) { + unsigned int nofs_flag; - /* Convert CoW extents to regular */ - if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) { - status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), + nofs_flag = memalloc_nofs_save(); + error = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); + memalloc_nofs_restore(nofs_flag); } - memalloc_nofs_restore(nofs_flag); - - /* send ioends that might require a transaction to the completion wq */ + /* + * Send ioends that might require a transaction to the completion wq. + */ if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; - if (status) - return status; - submit_bio(&ioend->io_bio); - return 0; + return iomap_ioend_writeback_submit(wpc, error); } static const struct iomap_writeback_ops xfs_writeback_ops = { .writeback_range = xfs_writeback_range, - .submit_ioend = xfs_submit_ioend, + .writeback_submit = xfs_writeback_submit, }; struct xfs_zoned_writepage_ctx { @@ -646,20 +645,25 @@ xfs_zoned_writeback_range( } static int -xfs_zoned_submit_ioend( - struct iomap_writepage_ctx *wpc, - int status) +xfs_zoned_writeback_submit( + struct iomap_writepage_ctx *wpc, + int error) { - wpc->ioend->io_bio.bi_end_io = xfs_end_bio; - if (status) - return status; - xfs_zone_alloc_and_submit(wpc->ioend, &XFS_ZWPC(wpc)->open_zone); + struct iomap_ioend *ioend = wpc->wb_ctx; + + ioend->io_bio.bi_end_io = xfs_end_bio; + if (error) { + ioend->io_bio.bi_status = errno_to_blk_status(error); + bio_endio(&ioend->io_bio); + return error; + } + xfs_zone_alloc_and_submit(ioend, &XFS_ZWPC(wpc)->open_zone); return 0; } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { .writeback_range = xfs_zoned_writeback_range, - .submit_ioend = xfs_zoned_submit_ioend, + .writeback_submit = xfs_zoned_writeback_submit, }; STATIC int diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index c88e2c8517533..fee9403ad49b7 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -151,6 +151,7 @@ static ssize_t zonefs_writeback_range(struct iomap_writepage_ctx *wpc, static const struct iomap_writeback_ops zonefs_writeback_ops = { .writeback_range = zonefs_writeback_range, + .writeback_submit = iomap_ioend_writeback_submit, }; static int zonefs_writepages(struct address_space *mapping, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 625d7911a2b5f..9f32dd8dc0750 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -391,8 +391,7 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, /* * Structure for writeback I/O completions. * - * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io - * for direct I/O) can split a bio generated by iomap. In that case the parent + * File systems can split a bio generated by iomap. In that case the parent * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { @@ -416,7 +415,7 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) struct iomap_writeback_ops { /* - * Required, performs writeback on the passed in range + * Performs writeback on the passed in range * * Can map arbitrarily large regions, but we need to call into it at * least once per folio to allow the file systems to synchronize with @@ -432,23 +431,22 @@ struct iomap_writeback_ops { u64 end_pos); /* - * Optional, allows the file systems to hook into bio submission, - * including overriding the bi_end_io handler. + * Submit a writeback context previously build up by ->writeback_range. * - * Returns 0 if the bio was successfully submitted, or a negative - * error code if status was non-zero or another error happened and - * the bio could not be submitted. + * Returns 0 if the context was successfully submitted, or a negative + * error code if not. If @error is non-zero a failure occurred, and + * the writeback context should be completed with an error. */ - int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); + int (*writeback_submit)(struct iomap_writepage_ctx *wpc, int error); }; struct iomap_writepage_ctx { struct iomap iomap; struct inode *inode; struct writeback_control *wbc; - struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; u32 nr_folios; /* folios added to the ioend */ + void *wb_ctx; /* pending writeback context */ }; struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, @@ -461,6 +459,7 @@ void iomap_ioend_try_merge(struct iomap_ioend *ioend, void iomap_sort_ioends(struct list_head *ioend_list); ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); +int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); int iomap_writepages(struct iomap_writepage_ctx *wpc); From 4ea1ab651562c4992aaf2999a521b7f62463df02 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 10 Jul 2025 15:33:30 +0200 Subject: [PATCH 07/15] iomap: add public helpers for uptodate state manipulation Add a new iomap_start_folio_write helper to abstract away the write_bytes_pending handling, and export it and the existing iomap_finish_folio_write for non-iomap writeback in fuse. Signed-off-by: Joanne Koong [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 20 +++++++++++++++----- include/linux/iomap.h | 5 +++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index dac6d19f941db..190a919bf9dd1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1530,7 +1530,18 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, } EXPORT_SYMBOL_GPL(iomap_page_mkwrite); -static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, +void iomap_start_folio_write(struct inode *inode, struct folio *folio, + size_t len) +{ + struct iomap_folio_state *ifs = folio->private; + + WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); + if (ifs) + atomic_add(len, &ifs->write_bytes_pending); +} +EXPORT_SYMBOL_GPL(iomap_start_folio_write); + +void iomap_finish_folio_write(struct inode *inode, struct folio *folio, size_t len) { struct iomap_folio_state *ifs = folio->private; @@ -1541,6 +1552,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending)) folio_end_writeback(folio); } +EXPORT_SYMBOL_GPL(iomap_finish_folio_write); /* * We're now finished for good with this ioend structure. Update the page @@ -1663,7 +1675,6 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len) { struct iomap_ioend *ioend = wpc->wb_ctx; - struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); unsigned int ioend_flags = 0; unsigned int map_len = min_t(u64, dirty_len, @@ -1706,8 +1717,7 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) goto new_ioend; - if (ifs) - atomic_add(map_len, &ifs->write_bytes_pending); + iomap_start_folio_write(wpc->inode, folio, map_len); /* * Clamp io_offset and io_size to the incore EOF so that ondisk @@ -1880,7 +1890,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, * all blocks. */ WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); - atomic_inc(&ifs->write_bytes_pending); + iomap_start_folio_write(inode, folio, 1); } /* diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 9f32dd8dc0750..cbf9d299a6168 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -461,6 +461,11 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); +void iomap_start_folio_write(struct inode *inode, struct folio *folio, + size_t len); +void iomap_finish_folio_write(struct inode *inode, struct folio *folio, + size_t len); + int iomap_writepages(struct iomap_writepage_ctx *wpc); /* From e492ed58317f3112543b9831b6a6f7a6c8d931fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:31 +0200 Subject: [PATCH 08/15] iomap: move all ioend handling to ioend.c Now that the writeback code has the proper abstractions, all the ioend code can be self-contained in ioend.c. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 216 ---------------------------------------- fs/iomap/internal.h | 1 - fs/iomap/ioend.c | 220 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 219 insertions(+), 218 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 190a919bf9dd1..c6836511db996 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -8,7 +8,6 @@ #include #include #include -#include "internal.h" #include "trace.h" #include "../internal.h" @@ -1554,221 +1553,6 @@ void iomap_finish_folio_write(struct inode *inode, struct folio *folio, } EXPORT_SYMBOL_GPL(iomap_finish_folio_write); -/* - * We're now finished for good with this ioend structure. Update the page - * state, release holds on bios, and finally free up memory. Do not use the - * ioend after this. - */ -u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) -{ - struct inode *inode = ioend->io_inode; - struct bio *bio = &ioend->io_bio; - struct folio_iter fi; - u32 folio_count = 0; - - if (ioend->io_error) { - mapping_set_error(inode->i_mapping, ioend->io_error); - if (!bio_flagged(bio, BIO_QUIET)) { - pr_err_ratelimited( -"%s: writeback error on inode %lu, offset %lld, sector %llu", - inode->i_sb->s_id, inode->i_ino, - ioend->io_offset, ioend->io_sector); - } - } - - /* walk all folios in bio, ending page IO on them */ - bio_for_each_folio_all(fi, bio) { - iomap_finish_folio_write(inode, fi.folio, fi.length); - folio_count++; - } - - bio_put(bio); /* frees the ioend */ - return folio_count; -} - -static void ioend_writeback_end_bio(struct bio *bio) -{ - struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); - - ioend->io_error = blk_status_to_errno(bio->bi_status); - iomap_finish_ioend_buffered(ioend); -} - -/* - * We cannot cancel the ioend directly in case of an error, so call the bio end - * I/O handler with the error status here to run the normal I/O completion - * handler. - */ -int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error) -{ - struct iomap_ioend *ioend = wpc->wb_ctx; - - if (!ioend->io_bio.bi_end_io) - ioend->io_bio.bi_end_io = ioend_writeback_end_bio; - - if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) - error = -EIO; - - if (error) { - ioend->io_bio.bi_status = errno_to_blk_status(error); - bio_endio(&ioend->io_bio); - return error; - } - - submit_bio(&ioend->io_bio); - return 0; -} -EXPORT_SYMBOL_GPL(iomap_ioend_writeback_submit); - -static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, - loff_t pos, u16 ioend_flags) -{ - struct bio *bio; - - bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, - REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), - GFP_NOFS, &iomap_ioend_bioset); - bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); - bio->bi_write_hint = wpc->inode->i_write_hint; - wbc_init_bio(wpc->wbc, bio); - wpc->nr_folios = 0; - return iomap_init_ioend(wpc->inode, bio, pos, ioend_flags); -} - -static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, - u16 ioend_flags) -{ - struct iomap_ioend *ioend = wpc->wb_ctx; - - if (ioend_flags & IOMAP_IOEND_BOUNDARY) - return false; - if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != - (ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) - return false; - if (pos != ioend->io_offset + ioend->io_size) - return false; - if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && - iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio)) - return false; - /* - * Limit ioend bio chain lengths to minimise IO completion latency. This - * also prevents long tight loops ending page writeback on all the - * folios in the ioend. - */ - if (wpc->nr_folios >= IOEND_BATCH_SIZE) - return false; - return true; -} - -/* - * Test to see if we have an existing ioend structure that we could append to - * first; otherwise finish off the current ioend and start another. - * - * If a new ioend is created and cached, the old ioend is submitted to the block - * layer instantly. Batching optimisations are provided by higher level block - * plugging. - * - * At the end of a writeback pass, there will be a cached ioend remaining on the - * writepage context that the caller will need to submit. - */ -ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, - loff_t pos, loff_t end_pos, unsigned int dirty_len) -{ - struct iomap_ioend *ioend = wpc->wb_ctx; - size_t poff = offset_in_folio(folio, pos); - unsigned int ioend_flags = 0; - unsigned int map_len = min_t(u64, dirty_len, - wpc->iomap.offset + wpc->iomap.length - pos); - int error; - - trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap); - - WARN_ON_ONCE(!folio->private && map_len < dirty_len); - - switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - return -EIO; - case IOMAP_HOLE: - return map_len; - default: - break; - } - - if (wpc->iomap.type == IOMAP_UNWRITTEN) - ioend_flags |= IOMAP_IOEND_UNWRITTEN; - if (wpc->iomap.flags & IOMAP_F_SHARED) - ioend_flags |= IOMAP_IOEND_SHARED; - if (folio_test_dropbehind(folio)) - ioend_flags |= IOMAP_IOEND_DONTCACHE; - if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) - ioend_flags |= IOMAP_IOEND_BOUNDARY; - - if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { -new_ioend: - if (ioend) { - error = wpc->ops->writeback_submit(wpc, 0); - if (error) - return error; - } - wpc->wb_ctx = ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); - } - - if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) - goto new_ioend; - - iomap_start_folio_write(wpc->inode, folio, map_len); - - /* - * Clamp io_offset and io_size to the incore EOF so that ondisk - * file size updates in the ioend completion are byte-accurate. - * This avoids recovering files with zeroed tail regions when - * writeback races with appending writes: - * - * Thread 1: Thread 2: - * ------------ ----------- - * write [A, A+B] - * update inode size to A+B - * submit I/O [A, A+BS] - * write [A+B, A+B+C] - * update inode size to A+B+C - * - * - * - * After reboot: - * 1) with A+B+C < A+BS, the file has zero padding in range - * [A+B, A+B+C] - * - * |< Block Size (BS) >| - * |DDDDDDDDDDDD0000000000000| - * ^ ^ ^ - * A A+B A+B+C - * (EOF) - * - * 2) with A+B+C > A+BS, the file has zero padding in range - * [A+B, A+BS] - * - * |< Block Size (BS) >|< Block Size (BS) >| - * |DDDDDDDDDDDD0000000000000|00000000000000000000000000| - * ^ ^ ^ ^ - * A A+B A+BS A+B+C - * (EOF) - * - * D = Valid Data - * 0 = Zero Padding - * - * Note that this defeats the ability to chain the ioends of - * appending writes. - */ - ioend->io_size += map_len; - if (ioend->io_offset + ioend->io_size > end_pos) - ioend->io_size = end_pos - ioend->io_offset; - - wbc_account_cgroup_owner(wpc->wbc, folio, map_len); - return map_len; -} -EXPORT_SYMBOL_GPL(iomap_add_to_ioend); - static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, struct folio *folio, u64 pos, u32 rlen, u64 end_pos, bool *wb_pending) diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h index f6992a3bf66a4..d05cb3aed96e7 100644 --- a/fs/iomap/internal.h +++ b/fs/iomap/internal.h @@ -4,7 +4,6 @@ #define IOEND_BATCH_SIZE 4096 -u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend); u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend); #endif /* _IOMAP_INTERNAL_H */ diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index 18894ebba6db1..b49fa75eab260 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2024-2025 Christoph Hellwig. + * Copyright (c) 2016-2025 Christoph Hellwig. */ #include #include +#include +#include #include "internal.h" +#include "trace.h" struct bio_set iomap_ioend_bioset; EXPORT_SYMBOL_GPL(iomap_ioend_bioset); @@ -28,6 +31,221 @@ struct iomap_ioend *iomap_init_ioend(struct inode *inode, } EXPORT_SYMBOL_GPL(iomap_init_ioend); +/* + * We're now finished for good with this ioend structure. Update the folio + * state, release holds on bios, and finally free up memory. Do not use the + * ioend after this. + */ +static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) +{ + struct inode *inode = ioend->io_inode; + struct bio *bio = &ioend->io_bio; + struct folio_iter fi; + u32 folio_count = 0; + + if (ioend->io_error) { + mapping_set_error(inode->i_mapping, ioend->io_error); + if (!bio_flagged(bio, BIO_QUIET)) { + pr_err_ratelimited( +"%s: writeback error on inode %lu, offset %lld, sector %llu", + inode->i_sb->s_id, inode->i_ino, + ioend->io_offset, ioend->io_sector); + } + } + + /* walk all folios in bio, ending page IO on them */ + bio_for_each_folio_all(fi, bio) { + iomap_finish_folio_write(inode, fi.folio, fi.length); + folio_count++; + } + + bio_put(bio); /* frees the ioend */ + return folio_count; +} + +static void ioend_writeback_end_bio(struct bio *bio) +{ + struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); + + ioend->io_error = blk_status_to_errno(bio->bi_status); + iomap_finish_ioend_buffered(ioend); +} + +/* + * We cannot cancel the ioend directly in case of an error, so call the bio end + * I/O handler with the error status here to run the normal I/O completion + * handler. + */ +int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error) +{ + struct iomap_ioend *ioend = wpc->wb_ctx; + + if (!ioend->io_bio.bi_end_io) + ioend->io_bio.bi_end_io = ioend_writeback_end_bio; + + if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) + error = -EIO; + + if (error) { + ioend->io_bio.bi_status = errno_to_blk_status(error); + bio_endio(&ioend->io_bio); + return error; + } + + submit_bio(&ioend->io_bio); + return 0; +} +EXPORT_SYMBOL_GPL(iomap_ioend_writeback_submit); + +static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, + loff_t pos, u16 ioend_flags) +{ + struct bio *bio; + + bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, + REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc), + GFP_NOFS, &iomap_ioend_bioset); + bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); + bio->bi_write_hint = wpc->inode->i_write_hint; + wbc_init_bio(wpc->wbc, bio); + wpc->nr_folios = 0; + return iomap_init_ioend(wpc->inode, bio, pos, ioend_flags); +} + +static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, + u16 ioend_flags) +{ + struct iomap_ioend *ioend = wpc->wb_ctx; + + if (ioend_flags & IOMAP_IOEND_BOUNDARY) + return false; + if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != + (ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) + return false; + if (pos != ioend->io_offset + ioend->io_size) + return false; + if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && + iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio)) + return false; + /* + * Limit ioend bio chain lengths to minimise IO completion latency. This + * also prevents long tight loops ending page writeback on all the + * folios in the ioend. + */ + if (wpc->nr_folios >= IOEND_BATCH_SIZE) + return false; + return true; +} + +/* + * Test to see if we have an existing ioend structure that we could append to + * first; otherwise finish off the current ioend and start another. + * + * If a new ioend is created and cached, the old ioend is submitted to the block + * layer instantly. Batching optimisations are provided by higher level block + * plugging. + * + * At the end of a writeback pass, there will be a cached ioend remaining on the + * writepage context that the caller will need to submit. + */ +ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, + loff_t pos, loff_t end_pos, unsigned int dirty_len) +{ + struct iomap_ioend *ioend = wpc->wb_ctx; + size_t poff = offset_in_folio(folio, pos); + unsigned int ioend_flags = 0; + unsigned int map_len = min_t(u64, dirty_len, + wpc->iomap.offset + wpc->iomap.length - pos); + int error; + + trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap); + + WARN_ON_ONCE(!folio->private && map_len < dirty_len); + + switch (wpc->iomap.type) { + case IOMAP_INLINE: + WARN_ON_ONCE(1); + return -EIO; + case IOMAP_HOLE: + return map_len; + default: + break; + } + + if (wpc->iomap.type == IOMAP_UNWRITTEN) + ioend_flags |= IOMAP_IOEND_UNWRITTEN; + if (wpc->iomap.flags & IOMAP_F_SHARED) + ioend_flags |= IOMAP_IOEND_SHARED; + if (folio_test_dropbehind(folio)) + ioend_flags |= IOMAP_IOEND_DONTCACHE; + if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) + ioend_flags |= IOMAP_IOEND_BOUNDARY; + + if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { +new_ioend: + if (ioend) { + error = wpc->ops->writeback_submit(wpc, 0); + if (error) + return error; + } + wpc->wb_ctx = ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); + } + + if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) + goto new_ioend; + + iomap_start_folio_write(wpc->inode, folio, map_len); + + /* + * Clamp io_offset and io_size to the incore EOF so that ondisk + * file size updates in the ioend completion are byte-accurate. + * This avoids recovering files with zeroed tail regions when + * writeback races with appending writes: + * + * Thread 1: Thread 2: + * ------------ ----------- + * write [A, A+B] + * update inode size to A+B + * submit I/O [A, A+BS] + * write [A+B, A+B+C] + * update inode size to A+B+C + * + * + * + * After reboot: + * 1) with A+B+C < A+BS, the file has zero padding in range + * [A+B, A+B+C] + * + * |< Block Size (BS) >| + * |DDDDDDDDDDDD0000000000000| + * ^ ^ ^ + * A A+B A+B+C + * (EOF) + * + * 2) with A+B+C > A+BS, the file has zero padding in range + * [A+B, A+BS] + * + * |< Block Size (BS) >|< Block Size (BS) >| + * |DDDDDDDDDDDD0000000000000|00000000000000000000000000| + * ^ ^ ^ ^ + * A A+B A+BS A+B+C + * (EOF) + * + * D = Valid Data + * 0 = Zero Padding + * + * Note that this defeats the ability to chain the ioends of + * appending writes. + */ + ioend->io_size += map_len; + if (ioend->io_offset + ioend->io_size > end_pos) + ioend->io_size = end_pos - ioend->io_offset; + + wbc_account_cgroup_owner(wpc->wbc, folio, map_len); + return map_len; +} +EXPORT_SYMBOL_GPL(iomap_add_to_ioend); + static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error) { if (ioend->io_parent) { From 894930dc556edbf308ff0b6c62513b4b1cc70443 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:32 +0200 Subject: [PATCH 09/15] iomap: rename iomap_writepage_map to iomap_writeback_folio ->writepage is gone, and our naming wasn't always that great to start with. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 10 +++++----- fs/iomap/trace.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c6836511db996..72fe23893f11d 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1585,7 +1585,7 @@ static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, * If the folio is entirely beyond i_size, return false. If it straddles * i_size, adjust end_pos and zero all data beyond i_size. */ -static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, +static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode, u64 *end_pos) { u64 isize = i_size_read(inode); @@ -1637,7 +1637,7 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, return true; } -static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, +static int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) { struct iomap_folio_state *ifs = folio->private; @@ -1653,9 +1653,9 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, WARN_ON_ONCE(folio_test_dirty(folio)); WARN_ON_ONCE(folio_test_writeback(folio)); - trace_iomap_writepage(inode, pos, folio_size(folio)); + trace_iomap_writeback_folio(inode, pos, folio_size(folio)); - if (!iomap_writepage_handle_eof(folio, inode, &end_pos)) { + if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) { folio_unlock(folio); return 0; } @@ -1740,7 +1740,7 @@ iomap_writepages(struct iomap_writepage_ctx *wpc) return -EIO; while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) - error = iomap_writepage_map(wpc, folio); + error = iomap_writeback_folio(wpc, folio); /* * If @error is non-zero, it means that we have a situation where some diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index aaea02c9560a3..6ad66e6ba653e 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -79,7 +79,7 @@ DECLARE_EVENT_CLASS(iomap_range_class, DEFINE_EVENT(iomap_range_class, name, \ TP_PROTO(struct inode *inode, loff_t off, u64 len),\ TP_ARGS(inode, off, len)) -DEFINE_RANGE_EVENT(iomap_writepage); +DEFINE_RANGE_EVENT(iomap_writeback_folio); DEFINE_RANGE_EVENT(iomap_release_folio); DEFINE_RANGE_EVENT(iomap_invalidate_folio); DEFINE_RANGE_EVENT(iomap_dio_invalidate_fail); From 3af2ce4f6fa8698ce5b71bb9254fafb75cd52e83 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 10 Jul 2025 15:33:33 +0200 Subject: [PATCH 10/15] iomap: move folio_unlock out of iomap_writeback_folio Move unlocking the folio out of iomap_writeback_folio into the caller. This means the end writeback machinery is now run with the folio locked when no writeback happened, or writeback completed extremely fast. Note that having the folio locked over the call to folio_end_writeback in iomap_writeback_folio means that the dropbehind handling there will never run because the trylock fails. The only way this can happen is if the writepage either never wrote back any dirty data at all, in which case the dropbehind handling isn't needed, or if all writeback finished instantly, which is rather unlikely. Even in the latter case the dropbehind handling is an optional optimization so skipping it will not cause correctness issues. This prepares for exporting iomap_writeback_folio for use in folio laundering. Signed-off-by: Joanne Koong [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 72fe23893f11d..156262e7e645b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1655,10 +1655,8 @@ static int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, trace_iomap_writeback_folio(inode, pos, folio_size(folio)); - if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) { - folio_unlock(folio); + if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) return 0; - } WARN_ON_ONCE(end_pos <= pos); if (i_blocks_per_folio(inode, folio) > 1) { @@ -1712,7 +1710,6 @@ static int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, * already at this point. In that case we need to clear the writeback * bit ourselves right after unlocking the page. */ - folio_unlock(folio); if (ifs) { if (atomic_dec_and_test(&ifs->write_bytes_pending)) folio_end_writeback(folio); @@ -1739,8 +1736,10 @@ iomap_writepages(struct iomap_writepage_ctx *wpc) PF_MEMALLOC)) return -EIO; - while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) + while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) { error = iomap_writeback_folio(wpc, folio); + folio_unlock(folio); + } /* * If @error is non-zero, it means that we have a situation where some From 6568b715ae3af2413a5312dc37bf2cc46dade58c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:34 +0200 Subject: [PATCH 11/15] iomap: export iomap_writeback_folio Allow fuse to use iomap_writeback_folio for folio laundering. Note that the caller needs to manually submit the pending writeback context. Signed-off-by: Christoph Hellwig Reviewed-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 4 ++-- include/linux/iomap.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 156262e7e645b..eed3d3e01aa83 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1637,8 +1637,7 @@ static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode, return true; } -static int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, - struct folio *folio) +int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = wpc->inode; @@ -1720,6 +1719,7 @@ static int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, mapping_set_error(inode->i_mapping, error); return error; } +EXPORT_SYMBOL_GPL(iomap_writeback_folio); int iomap_writepages(struct iomap_writepage_ctx *wpc) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index cbf9d299a6168..b65d3f063bb07 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -466,6 +466,7 @@ void iomap_start_folio_write(struct inode *inode, struct folio *folio, void iomap_finish_folio_write(struct inode *inode, struct folio *folio, size_t len); +int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio); int iomap_writepages(struct iomap_writepage_ctx *wpc); /* From 836555afb8ab8ea08c950d65f969fdb66e39ebbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:35 +0200 Subject: [PATCH 12/15] iomap: replace iomap_folio_ops with iomap_write_ops The iomap_folio_ops are only used for buffered writes, including the zero and unshare variants. Rename them to iomap_write_ops to better describe the usage, and pass them through the call chain like the other operation specific methods instead of through the iomap. xfs_iomap_valid grows a IOMAP_HOLE check to keep the existing behavior that never attached the folio_ops to a iomap representing a hole. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: "Darrick J. Wong" Acked-by: Damien Le Moal --- Documentation/filesystems/iomap/design.rst | 3 - .../filesystems/iomap/operations.rst | 8 +- block/fops.c | 3 +- fs/gfs2/bmap.c | 21 ++--- fs/gfs2/bmap.h | 1 + fs/gfs2/file.c | 3 +- fs/iomap/buffered-io.c | 79 +++++++++++-------- fs/xfs/xfs_file.c | 6 +- fs/xfs/xfs_iomap.c | 12 ++- fs/xfs/xfs_iomap.h | 1 + fs/xfs/xfs_reflink.c | 3 +- fs/zonefs/file.c | 3 +- include/linux/iomap.h | 22 +++--- 13 files changed, 89 insertions(+), 76 deletions(-) diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst index f2df9b6df9883..0f7672676c0ba 100644 --- a/Documentation/filesystems/iomap/design.rst +++ b/Documentation/filesystems/iomap/design.rst @@ -167,7 +167,6 @@ structure below: struct dax_device *dax_dev; void *inline_data; void *private; - const struct iomap_folio_ops *folio_ops; u64 validity_cookie; }; @@ -292,8 +291,6 @@ The fields are as follows: `_. This value will be passed unchanged to ``->iomap_end``. - * ``folio_ops`` will be covered in the section on pagecache operations. - * ``validity_cookie`` is a magic freshness value set by the filesystem that should be used to detect stale mappings. For pagecache operations this is critical for correct operation diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 4b93c5f7841a5..a9b48ce4af920 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -57,16 +57,12 @@ The following address space operations can be wrapped easily: * ``bmap`` * ``swap_activate`` -``struct iomap_folio_ops`` +``struct iomap_write_ops`` -------------------------- -The ``->iomap_begin`` function for pagecache operations may set the -``struct iomap::folio_ops`` field to an ops structure to override -default behaviors of iomap: - .. code-block:: c - struct iomap_folio_ops { + struct iomap_write_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, diff --git a/block/fops.c b/block/fops.c index 0845737c0320a..0c2c010ff3035 100644 --- a/block/fops.c +++ b/block/fops.c @@ -723,7 +723,8 @@ blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from) static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from) { - return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops, NULL); + return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops, NULL, + NULL); } /* diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 86045d3577b71..131091520de6b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -963,12 +963,16 @@ static struct folio * gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) { struct inode *inode = iter->inode; + struct gfs2_inode *ip = GFS2_I(inode); unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int blocks; struct folio *folio; int status; + if (!gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) + return iomap_get_folio(iter, pos, len); + blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); if (status) @@ -987,7 +991,7 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - if (!gfs2_is_stuffed(ip)) + if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) gfs2_trans_add_databufs(ip->i_gl, folio, offset_in_folio(folio, pos), copied); @@ -995,13 +999,14 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, folio_unlock(folio); folio_put(folio); - if (tr->tr_num_buf_new) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - - gfs2_trans_end(sdp); + if (gfs2_is_jdata(ip) || gfs2_is_stuffed(ip)) { + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + gfs2_trans_end(sdp); + } } -static const struct iomap_folio_ops gfs2_iomap_folio_ops = { +const struct iomap_write_ops gfs2_iomap_write_ops = { .get_folio = gfs2_iomap_get_folio, .put_folio = gfs2_iomap_put_folio, }; @@ -1078,8 +1083,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, gfs2_trans_end(sdp); } - if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) - iomap->folio_ops = &gfs2_iomap_folio_ops; return 0; out_trans_end: @@ -1304,7 +1307,7 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from, loff_t length return 0; length = min(length, inode->i_size - from); return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops, - NULL); + &gfs2_iomap_write_ops, NULL); } #define GFS2_JTRUNC_REVOKES 8192 diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 4e8b1e8ebdf39..6cdc72dd55a3f 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -44,6 +44,7 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, } extern const struct iomap_ops gfs2_iomap_ops; +extern const struct iomap_write_ops gfs2_iomap_write_ops; extern const struct iomap_writeback_ops gfs2_writeback_ops; int gfs2_unstuff_dinode(struct gfs2_inode *ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fd1147aa3891d..2908f5bee21dc 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1058,7 +1058,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, } pagefault_disable(); - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops, NULL); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops, + &gfs2_iomap_write_ops, NULL); pagefault_enable(); if (ret > 0) written += ret; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index eed3d3e01aa83..e57014bb0e239 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -736,28 +736,27 @@ static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, return 0; } -static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len) +static struct folio *__iomap_get_folio(struct iomap_iter *iter, + const struct iomap_write_ops *write_ops, size_t len) { - const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; loff_t pos = iter->pos; if (!mapping_large_folio_support(iter->inode->i_mapping)) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); - if (folio_ops && folio_ops->get_folio) - return folio_ops->get_folio(iter, pos, len); - else - return iomap_get_folio(iter, pos, len); + if (write_ops && write_ops->get_folio) + return write_ops->get_folio(iter, pos, len); + return iomap_get_folio(iter, pos, len); } -static void __iomap_put_folio(struct iomap_iter *iter, size_t ret, +static void __iomap_put_folio(struct iomap_iter *iter, + const struct iomap_write_ops *write_ops, size_t ret, struct folio *folio) { - const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; loff_t pos = iter->pos; - if (folio_ops && folio_ops->put_folio) { - folio_ops->put_folio(iter->inode, pos, ret, folio); + if (write_ops && write_ops->put_folio) { + write_ops->put_folio(iter->inode, pos, ret, folio); } else { folio_unlock(folio); folio_put(folio); @@ -794,10 +793,10 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, * offset, and length. Callers can optionally pass a max length *plen, * otherwise init to zero. */ -static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, +static int iomap_write_begin(struct iomap_iter *iter, + const struct iomap_write_ops *write_ops, struct folio **foliop, size_t *poffset, u64 *plen) { - const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); @@ -812,7 +811,7 @@ static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, if (fatal_signal_pending(current)) return -EINTR; - folio = __iomap_get_folio(iter, len); + folio = __iomap_get_folio(iter, write_ops, len); if (IS_ERR(folio)) return PTR_ERR(folio); @@ -826,8 +825,8 @@ static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, * could do the wrong thing here (zero a page range incorrectly or fail * to zero) and corrupt data. */ - if (folio_ops && folio_ops->iomap_valid) { - bool iomap_valid = folio_ops->iomap_valid(iter->inode, + if (write_ops && write_ops->iomap_valid) { + bool iomap_valid = write_ops->iomap_valid(iter->inode, &iter->iomap); if (!iomap_valid) { iter->iomap.flags |= IOMAP_F_STALE; @@ -853,8 +852,7 @@ static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop, return 0; out_unlock: - __iomap_put_folio(iter, 0, folio); - + __iomap_put_folio(iter, write_ops, 0, folio); return status; } @@ -926,7 +924,8 @@ static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied, return __iomap_write_end(iter->inode, pos, len, copied, folio); } -static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) +static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, + const struct iomap_write_ops *write_ops) { ssize_t total_written = 0; int status = 0; @@ -970,7 +969,8 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) break; } - status = iomap_write_begin(iter, &folio, &offset, &bytes); + status = iomap_write_begin(iter, write_ops, &folio, &offset, + &bytes); if (unlikely(status)) { iomap_write_failed(iter->inode, iter->pos, bytes); break; @@ -999,7 +999,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } - __iomap_put_folio(iter, written, folio); + __iomap_put_folio(iter, write_ops, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); @@ -1032,7 +1032,8 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, - const struct iomap_ops *ops, void *private) + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private) { struct iomap_iter iter = { .inode = iocb->ki_filp->f_mapping->host, @@ -1049,7 +1050,7 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, iter.flags |= IOMAP_DONTCACHE; while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_write_iter(&iter, i); + iter.status = iomap_write_iter(&iter, i, write_ops); if (unlikely(iter.pos == iocb->ki_pos)) return ret; @@ -1283,7 +1284,8 @@ void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, } EXPORT_SYMBOL_GPL(iomap_write_delalloc_release); -static int iomap_unshare_iter(struct iomap_iter *iter) +static int iomap_unshare_iter(struct iomap_iter *iter, + const struct iomap_write_ops *write_ops) { struct iomap *iomap = &iter->iomap; u64 bytes = iomap_length(iter); @@ -1298,14 +1300,15 @@ static int iomap_unshare_iter(struct iomap_iter *iter) bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, &folio, &offset, &bytes); + status = iomap_write_begin(iter, write_ops, &folio, &offset, + &bytes); if (unlikely(status)) return status; if (iomap->flags & IOMAP_F_STALE) break; ret = iomap_write_end(iter, bytes, bytes, folio); - __iomap_put_folio(iter, bytes, folio); + __iomap_put_folio(iter, write_ops, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1323,7 +1326,8 @@ static int iomap_unshare_iter(struct iomap_iter *iter) int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, - const struct iomap_ops *ops) + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops) { struct iomap_iter iter = { .inode = inode, @@ -1338,7 +1342,7 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, iter.len = min(len, size - pos); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_unshare_iter(&iter); + iter.status = iomap_unshare_iter(&iter, write_ops); return ret; } EXPORT_SYMBOL_GPL(iomap_file_unshare); @@ -1357,7 +1361,8 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) return filemap_write_and_wait_range(mapping, i->pos, end); } -static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) +static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, + const struct iomap_write_ops *write_ops) { u64 bytes = iomap_length(iter); int status; @@ -1368,7 +1373,8 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) bool ret; bytes = min_t(u64, SIZE_MAX, bytes); - status = iomap_write_begin(iter, &folio, &offset, &bytes); + status = iomap_write_begin(iter, write_ops, &folio, &offset, + &bytes); if (status) return status; if (iter->iomap.flags & IOMAP_F_STALE) @@ -1381,7 +1387,7 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); ret = iomap_write_end(iter, bytes, bytes, folio); - __iomap_put_folio(iter, bytes, folio); + __iomap_put_folio(iter, write_ops, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1397,7 +1403,8 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, - const struct iomap_ops *ops, void *private) + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private) { struct iomap_iter iter = { .inode = inode, @@ -1427,7 +1434,8 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { iter.len = plen; while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_zero_iter(&iter, did_zero); + iter.status = iomap_zero_iter(&iter, did_zero, + write_ops); iter.len = len - (iter.pos - pos); if (ret || !iter.len) @@ -1458,7 +1466,7 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, continue; } - iter.status = iomap_zero_iter(&iter, did_zero); + iter.status = iomap_zero_iter(&iter, did_zero, write_ops); } return ret; } @@ -1466,7 +1474,8 @@ EXPORT_SYMBOL_GPL(iomap_zero_range); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops, void *private) + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private) { unsigned int blocksize = i_blocksize(inode); unsigned int off = pos & (blocksize - 1); @@ -1475,7 +1484,7 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, if (!off) return 0; return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, - private); + write_ops, private); } EXPORT_SYMBOL_GPL(iomap_truncate_page); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 38e365b16348d..82d97eceaf622 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -979,7 +979,8 @@ xfs_file_buffered_write( trace_xfs_file_buffered_write(iocb, from); ret = iomap_file_buffered_write(iocb, from, - &xfs_buffered_write_iomap_ops, NULL); + &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops, + NULL); /* * If we hit a space limit, try to free up some lingering preallocated @@ -1059,7 +1060,8 @@ xfs_file_buffered_write_zoned( retry: trace_xfs_file_buffered_write(iocb, from); ret = iomap_file_buffered_write(iocb, from, - &xfs_buffered_write_iomap_ops, &ac); + &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops, + &ac); if (ret == -ENOSPC && !cleared_space) { /* * Kick off writeback to convert delalloc space and release the diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ec30b78bf5c4d..2a74f29573410 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -79,6 +79,9 @@ xfs_iomap_valid( { struct xfs_inode *ip = XFS_I(inode); + if (iomap->type == IOMAP_HOLE) + return true; + if (iomap->validity_cookie != xfs_iomap_inode_sequence(ip, iomap->flags)) { trace_xfs_iomap_invalid(ip, iomap); @@ -89,7 +92,7 @@ xfs_iomap_valid( return true; } -static const struct iomap_folio_ops xfs_iomap_folio_ops = { +const struct iomap_write_ops xfs_iomap_write_ops = { .iomap_valid = xfs_iomap_valid, }; @@ -151,7 +154,6 @@ xfs_bmbt_to_iomap( iomap->flags |= IOMAP_F_DIRTY; iomap->validity_cookie = sequence_cookie; - iomap->folio_ops = &xfs_iomap_folio_ops; return 0; } @@ -2198,7 +2200,8 @@ xfs_zero_range( return dax_zero_range(inode, pos, len, did_zero, &xfs_dax_write_iomap_ops); return iomap_zero_range(inode, pos, len, did_zero, - &xfs_buffered_write_iomap_ops, ac); + &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops, + ac); } int @@ -2214,5 +2217,6 @@ xfs_truncate_page( return dax_truncate_page(inode, pos, did_zero, &xfs_dax_write_iomap_ops); return iomap_truncate_page(inode, pos, did_zero, - &xfs_buffered_write_iomap_ops, ac); + &xfs_buffered_write_iomap_ops, &xfs_iomap_write_ops, + ac); } diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 674f8ac1b9bd8..ebcce7d494468 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -57,5 +57,6 @@ extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; extern const struct iomap_ops xfs_dax_write_iomap_ops; extern const struct iomap_ops xfs_atomic_write_cow_iomap_ops; +extern const struct iomap_write_ops xfs_iomap_write_ops; #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ad3bcb76d8055..3f177b4ec131d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1881,7 +1881,8 @@ xfs_reflink_unshare( &xfs_dax_write_iomap_ops); else error = iomap_file_unshare(inode, offset, len, - &xfs_buffered_write_iomap_ops); + &xfs_buffered_write_iomap_ops, + &xfs_iomap_write_ops); if (error) goto out; diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index fee9403ad49b7..24c29c10e27fb 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -572,7 +572,8 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, if (ret <= 0) goto inode_unlock; - ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops, NULL); + ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops, + NULL, NULL); if (ret == -EIO) zonefs_io_error(inode, true); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b65d3f063bb07..80f543cc4fe8a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -101,8 +101,6 @@ struct vm_fault; */ #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ -struct iomap_folio_ops; - struct iomap { u64 addr; /* disk offset of mapping, bytes */ loff_t offset; /* file offset of mapping, bytes */ @@ -113,7 +111,6 @@ struct iomap { struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; void *private; /* filesystem private */ - const struct iomap_folio_ops *folio_ops; u64 validity_cookie; /* used with .iomap_valid() */ }; @@ -143,16 +140,11 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) } /* - * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio - * and put_folio will be called for each folio written to. This only applies - * to buffered writes as unbuffered writes will not typically have folios - * associated with them. - * * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ -struct iomap_folio_ops { +struct iomap_write_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, @@ -335,7 +327,8 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) } ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, - const struct iomap_ops *ops, void *private); + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private); int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); @@ -344,11 +337,14 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, - const struct iomap_ops *ops); + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, const struct iomap_ops *ops, void *private); + bool *did_zero, const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops, void *private); + const struct iomap_ops *ops, + const struct iomap_write_ops *write_ops, void *private); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, void *private); typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, From 7d50f37c217e7393719f98bb468f8e8a7be991cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:36 +0200 Subject: [PATCH 13/15] iomap: improve argument passing to iomap_read_folio_sync Pass the iomap_iter and derive the map inside iomap_read_folio_sync instead of in the caller, and use the more descriptive srcmap name for the source iomap. Stop passing the offset into folio argument as it can be derived from the folio and the file offset. Rename the variables for the offset into the file and the length to be more descriptive and match the rest of the code. Rename the function itself to iomap_read_folio_range to make the use more clear. Signed-off-by: Christoph Hellwig Reviewed-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e57014bb0e239..f9cf4245b3331 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -661,22 +661,22 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) pos + len - 1); } -static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, - size_t poff, size_t plen, const struct iomap *iomap) +static int iomap_read_folio_range(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len) { + const struct iomap *srcmap = iomap_iter_srcmap(iter); struct bio_vec bvec; struct bio bio; - bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ); - bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); - bio_add_folio_nofail(&bio, folio, plen, poff); + bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = iomap_sector(srcmap, pos); + bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos)); return submit_bio_wait(&bio); } static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, struct folio *folio) { - const struct iomap *srcmap = iomap_iter_srcmap(iter); struct iomap_folio_state *ifs; loff_t pos = iter->pos; loff_t block_size = i_blocksize(iter->inode); @@ -725,8 +725,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, if (iter->flags & IOMAP_NOWAIT) return -EAGAIN; - status = iomap_read_folio_sync(block_start, folio, - poff, plen, srcmap); + status = iomap_read_folio_range(iter, folio, + block_start, plen); if (status) return status; } From 3613f44a5df91334cf1466f4fda148c929563ff7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:37 +0200 Subject: [PATCH 14/15] iomap: add read_folio_range() handler for buffered writes Add a read_folio_range() handler for buffered writes that filesystems may pass in if they wish to provide a custom handler for synchronously reading in the contents of a folio. Signed-off-by: Joanne Koong [hch: renamed to read_folio_range, pass less arguments] Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" --- Documentation/filesystems/iomap/operations.rst | 6 ++++++ fs/iomap/buffered-io.c | 13 +++++++++---- include/linux/iomap.h | 10 ++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index a9b48ce4af920..067ed8e14ef34 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -68,6 +68,8 @@ The following address space operations can be wrapped easily: void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); + int (*read_folio_range)(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len); }; iomap calls these functions: @@ -123,6 +125,10 @@ iomap calls these functions: ``->iomap_valid``, then the iomap should considered stale and the validation failed. + - ``read_folio_range``: Called to synchronously read in the range that will + be written to. If this function is not provided, iomap will default to + submitting a bio read request. + These ``struct kiocb`` flags are significant for buffered I/O with iomap: * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``. diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f9cf4245b3331..a510066538613 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -674,7 +674,8 @@ static int iomap_read_folio_range(const struct iomap_iter *iter, return submit_bio_wait(&bio); } -static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, +static int __iomap_write_begin(const struct iomap_iter *iter, + const struct iomap_write_ops *write_ops, size_t len, struct folio *folio) { struct iomap_folio_state *ifs; @@ -725,8 +726,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, size_t len, if (iter->flags & IOMAP_NOWAIT) return -EAGAIN; - status = iomap_read_folio_range(iter, folio, - block_start, plen); + if (write_ops && write_ops->read_folio_range) + status = write_ops->read_folio_range(iter, + folio, block_start, plen); + else + status = iomap_read_folio_range(iter, + folio, block_start, plen); if (status) return status; } @@ -842,7 +847,7 @@ static int iomap_write_begin(struct iomap_iter *iter, else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(folio, pos, len, NULL, srcmap); else - status = __iomap_write_begin(iter, len, folio); + status = __iomap_write_begin(iter, write_ops, len, folio); if (unlikely(status)) goto out_unlock; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 80f543cc4fe8a..73dceabc21c8c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -166,6 +166,16 @@ struct iomap_write_ops { * locked by the iomap code. */ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); + + /* + * Optional if the filesystem wishes to provide a custom handler for + * reading in the contents of a folio, otherwise iomap will default to + * submitting a bio read request. + * + * The read must be done synchronously. + */ + int (*read_folio_range)(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len); }; /* From 508ded62be652de15a22410a168e61a87ec86a9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Jul 2025 15:33:38 +0200 Subject: [PATCH 15/15] iomap: build the writeback code without CONFIG_BLOCK Allow fuse to use the iomap writeback code even when CONFIG_BLOCK is not enabled. Do this with an ifdef instead of a separate file to keep the iomap_folio_state local to buffered-io.c. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Reviewed-by: Joanne Koong --- fs/iomap/Makefile | 6 +-- fs/iomap/buffered-io.c | 113 ++++++++++++++++++++++------------------- 2 files changed, 64 insertions(+), 55 deletions(-) diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile index 69e8ebb41302a..f7e1c8534c464 100644 --- a/fs/iomap/Makefile +++ b/fs/iomap/Makefile @@ -9,9 +9,9 @@ ccflags-y += -I $(src) # needed for trace events obj-$(CONFIG_FS_IOMAP) += iomap.o iomap-y += trace.o \ - iter.o -iomap-$(CONFIG_BLOCK) += buffered-io.o \ - direct-io.o \ + iter.o \ + buffered-io.o +iomap-$(CONFIG_BLOCK) += direct-io.o \ ioend.o \ fiemap.o \ seek.o diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a510066538613..6be7ced9b0313 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -278,6 +278,46 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, *lenp = plen; } +static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, + loff_t pos) +{ + const struct iomap *srcmap = iomap_iter_srcmap(iter); + + return srcmap->type != IOMAP_MAPPED || + (srcmap->flags & IOMAP_F_NEW) || + pos >= i_size_read(iter->inode); +} + +/** + * iomap_read_inline_data - copy inline data into the page cache + * @iter: iteration structure + * @folio: folio to copy to + * + * Copy the inline data in @iter into @folio and zero out the rest of the folio. + * Only a single IOMAP_INLINE extent is allowed at the end of each file. + * Returns zero for success to complete the read, or the usual negative errno. + */ +static int iomap_read_inline_data(const struct iomap_iter *iter, + struct folio *folio) +{ + const struct iomap *iomap = iomap_iter_srcmap(iter); + size_t size = i_size_read(iter->inode) - iomap->offset; + size_t offset = offset_in_folio(folio, iomap->offset); + + if (folio_test_uptodate(folio)) + return 0; + + if (WARN_ON_ONCE(size > iomap->length)) + return -EIO; + if (offset > 0) + ifs_alloc(iter->inode, folio, iter->flags); + + folio_fill_tail(folio, offset, iomap->inline_data, size); + iomap_set_range_uptodate(folio, offset, folio_size(folio) - offset); + return 0; +} + +#ifdef CONFIG_BLOCK static void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, int error) { @@ -317,45 +357,6 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; -/** - * iomap_read_inline_data - copy inline data into the page cache - * @iter: iteration structure - * @folio: folio to copy to - * - * Copy the inline data in @iter into @folio and zero out the rest of the folio. - * Only a single IOMAP_INLINE extent is allowed at the end of each file. - * Returns zero for success to complete the read, or the usual negative errno. - */ -static int iomap_read_inline_data(const struct iomap_iter *iter, - struct folio *folio) -{ - const struct iomap *iomap = iomap_iter_srcmap(iter); - size_t size = i_size_read(iter->inode) - iomap->offset; - size_t offset = offset_in_folio(folio, iomap->offset); - - if (folio_test_uptodate(folio)) - return 0; - - if (WARN_ON_ONCE(size > iomap->length)) - return -EIO; - if (offset > 0) - ifs_alloc(iter->inode, folio, iter->flags); - - folio_fill_tail(folio, offset, iomap->inline_data, size); - iomap_set_range_uptodate(folio, offset, folio_size(folio) - offset); - return 0; -} - -static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, - loff_t pos) -{ - const struct iomap *srcmap = iomap_iter_srcmap(iter); - - return srcmap->type != IOMAP_MAPPED || - (srcmap->flags & IOMAP_F_NEW) || - pos >= i_size_read(iter->inode); -} - static int iomap_readpage_iter(struct iomap_iter *iter, struct iomap_readpage_ctx *ctx) { @@ -548,6 +549,27 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) } EXPORT_SYMBOL_GPL(iomap_readahead); +static int iomap_read_folio_range(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len) +{ + const struct iomap *srcmap = iomap_iter_srcmap(iter); + struct bio_vec bvec; + struct bio bio; + + bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = iomap_sector(srcmap, pos); + bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos)); + return submit_bio_wait(&bio); +} +#else +static int iomap_read_folio_range(const struct iomap_iter *iter, + struct folio *folio, loff_t pos, size_t len) +{ + WARN_ON_ONCE(1); + return -EIO; +} +#endif /* CONFIG_BLOCK */ + /* * iomap_is_partially_uptodate checks whether blocks within a folio are * uptodate or not. @@ -661,19 +683,6 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) pos + len - 1); } -static int iomap_read_folio_range(const struct iomap_iter *iter, - struct folio *folio, loff_t pos, size_t len) -{ - const struct iomap *srcmap = iomap_iter_srcmap(iter); - struct bio_vec bvec; - struct bio bio; - - bio_init(&bio, srcmap->bdev, &bvec, 1, REQ_OP_READ); - bio.bi_iter.bi_sector = iomap_sector(srcmap, pos); - bio_add_folio_nofail(&bio, folio, len, offset_in_folio(folio, pos)); - return submit_bio_wait(&bio); -} - static int __iomap_write_begin(const struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t len, struct folio *folio)