chore(snix): s/tvix/snix/

Change-Id: Iae961416eea0a38bc57df7b736f6dda5903b0828
This commit is contained in:
Florian Klink 2025-03-16 17:55:12 +00:00
parent 768f053416
commit 36e4d017f5
1417 changed files with 3741 additions and 3650 deletions

6
snix/.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
/target
/result-*
/result
target
/*.sled

8
snix/.vscode/extensions.json vendored Normal file
View file

@ -0,0 +1,8 @@
{
"recommendations": [
"rust-lang.rust-analyzer"
],
"unwantedRecommendations": [
"rust-lang.rust"
]
}

6106
snix/Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

22358
snix/Cargo.nix Normal file

File diff suppressed because it is too large Load diff

172
snix/Cargo.toml Normal file
View file

@ -0,0 +1,172 @@
# This Cargo file is a workspace configuration as per
# https://doc.rust-lang.org/book/ch14-03-cargo-workspaces.html
#
# We add this file to get a coherent set of dependencies across Snix
# crates by sharing a Cargo.lock. This is necessary because of the
# currently limited support for Rust builds in Nix.
#
# Note that this explicitly does *not* mean that //snix should be
# considered "one project": This is simply a workaround for a
# technical limitation and it should be our aim to remove this
# workspace file and make the subprojects independent.
#
# Note also that CI targets for actual projects should *not* be tied
# to //snix, but to its subprojects. A potential file at
# //snix/default.nix should likely *not* expose anything other than
# extra steps or other auxiliary targets.
[workspace]
resolver = "2"
members = [
"build",
"castore",
"cli",
"eval",
"eval/builtin-macros",
"glue",
"nar-bridge",
"nix-compat",
"nix-compat-derive",
"nix-compat-derive-tests",
"nix-daemon",
"serde",
"store",
"tracing",
]
[workspace.lints.clippy]
# Allow blocks_in_conditions due to false positives with #[tracing::instrument(…)]:
# https://github.com/rust-lang/rust-clippy/issues/12281
blocks_in_conditions = "allow"
[workspace.dependencies]
anyhow = "1.0.86"
async-compression = "0.4.12"
async-process = "2.2.4"
async-stream = "0.3.5"
async-tempfile = "0.4.0"
axum = "0.7.5"
axum-extra = "0.9.3"
axum-range = "0.4.0"
bigtable_rs = "0.2.16"
bitflags = "2.6.0"
blake3 = "1.5.4"
bstr = "1.10.0"
bytes = "1.7.1"
clap = "4.5.16"
codemap = "0.1.3"
codemap-diagnostic = "0.1.2"
count-write = "0.1.0"
criterion = "0.5"
data-encoding = "2.6.0"
digest = "0.10.7"
dirs = "4.0.0"
ed25519 = "2.2.3"
ed25519-dalek = "2.1.1"
enum-primitive-derive = "0.3.0"
erased-serde = "0.4.5"
expect-test = "1.5.0"
fastcdc = "3.1.0"
fuse-backend-rs = "0.12.0"
futures = "0.3.30"
genawaiter = { version = "0.99.1", default-features = false }
glob = "0.3.1"
hex-literal = "0.4.1"
http = "1.1.0"
hyper-util = "0.1.7"
indicatif = "0.17.8"
itertools = "0.12.1"
lexical-core = "0.8.5"
libc = "0.2.158"
lru = "0.12.4"
magic = "0.16.2"
md-5 = "0.10.6"
mimalloc = "0.1.43"
nix = "0.27.1"
nohash-hasher = "0.2.0"
nom = "8.0"
num-traits = "0.2.19"
object_store = "0.10.2"
opentelemetry = "0.28.0"
opentelemetry-http = "0.28.0"
opentelemetry-otlp = "0.28.0"
opentelemetry-semantic-conventions = "0.28.0"
opentelemetry_sdk = "0.28.0"
os_str_bytes = "6.6"
parking_lot = "0.12.3"
path-clean = "0.1"
petgraph = "0.6.5"
pin-project = "1.1"
pin-project-lite = "0.2.14"
pretty_assertions = "1.4.0"
proc-macro2 = "1.0.86"
proptest = { version = "1.5.0", default-features = false }
prost = "0.13.1"
prost-build = "0.13.1"
quote = "1.0.37"
redb = "2.1.2"
regex = "1.10.6"
reqwest = { version = "0.12.7", default-features = false }
reqwest-middleware = { version = "0.4.1", default-features = false }
reqwest-tracing = { version = "0.5.6", default-features = false }
rnix = "0.11.0"
rowan = "0.15" # keep in sync with rnix
rstest = "0.19.0"
rstest_reuse = "0.6.0"
rustc-hash = "2.0.0"
rustyline = "10.1.1"
serde = "1.0.209"
serde_json = "1.0"
serde_qs = "0.12.0"
serde_tagged = "0.3.0"
serde_with = "3.9.0"
sha1 = "0.10.6"
sha2 = "0.10.8"
smol_str = "0.2.2"
tabwriter = "1.4"
tempfile = "3.12.0"
test-strategy = "0.2.1"
thiserror = "2.0"
threadpool = "1.8.1"
tokio = "1.39.3"
tokio-listener = "0.4.3"
tokio-retry = "0.3.0"
tokio-stream = "0.1.15"
tokio-tar = "0.3.1"
tokio-test = "0.4.4"
tokio-util = "0.7.11"
tonic = "0.12.2"
tonic-build = "0.12.2"
tonic-health = { version = "0.12.2", default-features = false }
tonic-reflection = "0.12.2"
tower = "0.4.13"
tower-http = "0.6.2"
tower-otel-http-metrics = "0.11.0"
tracing = "0.1.40"
tracing-indicatif = "0.3.6"
tracing-opentelemetry = "0.29.0"
tracing-subscriber = "0.3.18"
tracing-test = "0.2.5"
tracing-tracy = "0.11.2"
trybuild = "1.0.99"
url = "2.5.2"
vhost = "0.6"
vhost-user-backend = "0.8"
virtio-bindings = "0.2.2"
virtio-queue = "0.7"
vm-memory = "0.10"
vmm-sys-util = "0.11"
vu128 = "1.1.0"
walkdir = "2.5.0"
# https://github.com/jneem/wu-manber/pull/1
wu-manber = { git = "https://github.com/tvlfyi/wu-manber.git" }
xattr = "1.3.1"
zstd = "0.13.2"
# Add a profile to all targets that enables release optimisations, but
# retains debug symbols. This is great for use with
# benchmarking/profiling tools.
[profile.release-with-debug]
inherits = "release"
debug = true

674
snix/LICENSE Normal file
View file

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.

7
snix/OWNERS Normal file
View file

@ -0,0 +1,7 @@
set noparent
adisbladis
flokli
aspen
sterni
tazjin

153
snix/boot/README.md Normal file
View file

@ -0,0 +1,153 @@
# snix/boot
This directory provides tooling to boot VMs with /nix/store provided by
virtiofs.
In the `tests/` subdirectory, there's some integration tests.
## //snix/boot:runVM
A script spinning up a `snix-store virtiofs` daemon, then starting a cloud-
hypervisor VM.
The cloud-hypervisor VM is using a (semi-)minimal kernel image with virtiofs
support, and a custom initrd (using u-root). It supports various command line
options, to be able to do VM tests, act as an interactive shell or exec a binary
from a closure.
It supports the following env vars:
- `CH_NUM_CPUS=1` controls the number of CPUs available to the VM
- `CH_MEM_SIZE=512M` controls the memory availabe to the VM
- `CH_CMDLINE=` controls the kernel cmdline (which can be used to control the
boot)
### Usage
First, ensure you have `snix-store` in `$PATH`, as that's what `run-snix-vm`
expects:
Assuming you ran `cargo build --profile=release-with-debug` before, and are in
the `snix` directory:
```
export PATH=$PATH:$PWD/target/release-with-debug
```
Now, spin up snix-daemon, connecting to some (local) backends:
```
snix-store --otlp=false daemon \
--blob-service-addr=objectstore+file://$PWD/blobs \
--directory-service-addr=redb://$PWD/directories.redb \
--path-info-service-addr=redb://$PWD/pathinfo.redb &
```
Copy some data into snix-store (we use `nar-bridge` for this for now):
```
mg run //snix:nar-bridge -- --otlp=false &
rm -Rf ~/.cache/nix; nix copy --to http://localhost:9000\?compression\=none $(mg build //third_party/nixpkgs:hello)
pkill nar-bridge
```
By default, the `snix-store virtiofs` command used in the `runVM` script
connects to a running `snix-store daemon` via gRPC - in which case you want to
keep `snix-store daemon` running.
In case you want to have `snix-store virtiofs` open the stores directly, kill
`snix-store daemon` too, and export the addresses from above:
```
pkill snix-store
export BLOB_SERVICE_ADDR=objectstore+file://$PWD/blobs
export DIRECTORY_SERVICE_ADDR=redb://$PWD/directories.redb
export PATH_INFO_SERVICE_ADDR=redb://$PWD/pathinfo.redb
```
#### Interactive shell
Run the VM like this:
```
CH_CMDLINE=snix.shell mg run //snix/boot:runVM --
```
You'll get dropped into an interactive shell, from which you can do things with
the store:
```
______ _ ____ _ __
/_ __/ __(_) __ / _/___ (_) /_
/ / | | / / / |/_/ / // __ \/ / __/
/ / | |/ / /> < _/ // / / / / /_
/_/ |___/_/_/|_| /___/_/ /_/_/\__/
/# ls -la /nix/store/
dr-xr-xr-x root 0 0 Jan 1 00:00 .
dr-xr-xr-x root 0 989 Jan 1 00:00 aw2fw9ag10wr9pf0qk4nk5sxi0q0bn56-glibc-2.37-8
dr-xr-xr-x root 0 3 Jan 1 00:00 jbwb8d8l28lg9z0xzl784wyb9vlbwss6-xgcc-12.3.0-libgcc
dr-xr-xr-x root 0 82 Jan 1 00:00 k8ivghpggjrq1n49xp8sj116i4sh8lia-libidn2-2.3.4
dr-xr-xr-x root 0 141 Jan 1 00:00 mdi7lvrn2mx7rfzv3fdq3v5yw8swiks6-hello-2.12.1
dr-xr-xr-x root 0 5 Jan 1 00:00 s2gi8pfjszy6rq3ydx0z1vwbbskw994i-libunistring-1.1
```
Once you exit the shell, the VM will power off itself.
#### Execute a specific binary
Run the VM like this:
```
hello_cmd=$(mg build //third_party/nixpkgs:hello)/bin/hello
CH_CMDLINE=snix.run=$hello_cmd mg run //snix/boot:runVM --
```
Observe it executing the file (and closure) from the snix-store:
```
[ 0.277486] Run /init as init process
______ _ ____ _ __
/_ __/ __(_) __ / _/___ (_) /_
/ / | | / / / |/_/ / // __ \/ / __/
/ / | |/ / /> < _/ // / / / / /_
/_/ |___/_/_/|_| /___/_/ /_/_/\__/
Hello, world!
2023/09/24 21:10:19 Nothing left to be done, powering off.
[ 0.299122] ACPI: PM: Preparing to enter system sleep state S5
[ 0.299422] reboot: Power down
```
#### Boot a NixOS system closure
It's also possible to boot a system closure. To do this, snix-init honors the
init= cmdline option, and will `switch_root` to it.
Make sure to first copy that system closure into snix-store,
using a similar `nix copy` comamnd as above.
```
CH_CMDLINE=init=/nix/store/…-nixos-system-…/init mg run //snix/boot:runVM --
```
```
______ _ ____ _ __
/_ __/ __(_) __ / _/___ (_) /_
/ / | | / / / |/_/ / // __ \/ / __/
/ / | |/ / /> < _/ // / / / / /_
/_/ |___/_/_/|_| /___/_/ /_/_/\__/
2023/09/24 21:16:43 switch_root: moving mounts
2023/09/24 21:16:43 switch_root: Skipping "/run" as the dir does not exist
2023/09/24 21:16:43 switch_root: Changing directory
2023/09/24 21:16:43 switch_root: Moving /
2023/09/24 21:16:43 switch_root: Changing root!
2023/09/24 21:16:43 switch_root: Deleting old /
2023/09/24 21:16:43 switch_root: executing init
<<< NixOS Stage 2 >>>
[ 0.322096] booting system configuration /nix/store/g657sdxinpqfcdv0162zmb8vv9b5c4c5-nixos-system-client-23.11.git.82102fc37da
running activation script...
setting up /etc...
starting systemd...
[ 0.980740] systemd[1]: systemd 253.6 running in system mode (+PAM +AUDIT -SELINUX +APPARMOR +IMA +SMACK +SECCOMP +GCRYPT -GNUTLS +OPENSSL +ACL +BLKID +CURL +ELFUTILS +FIDO2 +IDN2 -IDN +IPTC +KMOD +LIBCRYPTSETUP +LIBFDISK +PCRE2 -PWQUALITY +P11KIT -QRENCODE +TPM2 +BZIP2 +LZ4 +XZ +ZLIB +ZSTD +BPF_FRAMEWORK -XKBCOMMON +UTMP -SYSVINIT default-hierarchy=unified)
```
This effectively replaces the NixOS Stage 1 entirely.

116
snix/boot/default.nix Normal file
View file

@ -0,0 +1,116 @@
{ lib, pkgs, ... }:
rec {
# A binary that sets up /nix/store from virtiofs, lists all store paths, and
# powers off the machine.
snix-init = pkgs.buildGoModule rec {
name = "snix-init";
src = lib.fileset.toSource {
root = ./.;
fileset = ./snix-init.go;
};
vendorHash = null;
postPatch = "go mod init ${name}";
};
# A kernel with virtiofs support baked in
# TODO: make a smaller kernel, we don't need a gazillion filesystems and
# device drivers in it.
kernel = pkgs.buildLinux ({ } // {
inherit (pkgs.linuxPackages_latest.kernel) src version modDirVersion;
autoModules = false;
kernelPreferBuiltin = true;
ignoreConfigErrors = true;
kernelPatches = [ ];
structuredExtraConfig = with pkgs.lib.kernel; {
FUSE_FS = option yes;
DAX_DRIVER = option yes;
DAX = option yes;
FS_DAX = option yes;
VIRTIO_FS = option yes;
VIRTIO = option yes;
ZONE_DEVICE = option yes;
};
});
# A build framework for minimal initrds
uroot = pkgs.buildGoModule rec {
pname = "u-root";
version = "0.14.0";
src = pkgs.fetchFromGitHub {
owner = "u-root";
repo = "u-root";
rev = "v${version}";
hash = "sha256-8zA3pHf45MdUcq/MA/mf0KCTxB1viHieU/oigYwIPgo=";
};
vendorHash = null;
doCheck = false; # Some tests invoke /bin/bash
};
# Use u-root to build a initrd with our snix-init inside.
initrd = pkgs.stdenv.mkDerivation {
name = "initrd.cpio";
nativeBuildInputs = [ pkgs.go ];
# https://github.com/u-root/u-root/issues/2466
buildCommand = ''
mkdir -p /tmp/go/src/github.com/u-root/
cp -R ${uroot.src} /tmp/go/src/github.com/u-root/u-root
cd /tmp/go/src/github.com/u-root/u-root
chmod +w .
cp ${snix-init}/bin/snix-init snix-init
export HOME=$(mktemp -d)
export GOROOT="$(go env GOROOT)"
GO111MODULE=off GOPATH=/tmp/go GOPROXY=off ${uroot}/bin/u-root -files ./snix-init -initcmd "/snix-init" -o $out
'';
};
# Start a `snix-store` virtiofs daemon from $PATH, then a cloud-hypervisor
# pointed to it.
# Supports the following env vars (and defaults)
# CH_NUM_CPUS=2
# CH_MEM_SIZE=512M
# CH_CMDLINE=""
runVM = pkgs.writers.writeBashBin "run-snix-vm" ''
tempdir=$(mktemp -d)
cleanup() {
kill $virtiofsd_pid
if [[ -n ''${work_dir-} ]]; then
chmod -R u+rw "$tempdir"
rm -rf "$tempdir"
fi
}
trap cleanup EXIT
# Spin up the virtiofs daemon
snix-store --otlp=false virtiofs -l $tempdir/snix.sock &
virtiofsd_pid=$!
# Wait for the socket to exist.
until [ -e $tempdir/snix.sock ]; do sleep 0.1; done
CH_NUM_CPUS="''${CH_NUM_CPUS:-2}"
CH_MEM_SIZE="''${CH_MEM_SIZE:-512M}"
CH_CMDLINE="''${CH_CMDLINE:-}"
# spin up cloud_hypervisor
${pkgs.cloud-hypervisor}/bin/cloud-hypervisor \
--cpus boot=$CH_NUM_CPU \
--memory mergeable=on,shared=on,size=$CH_MEM_SIZE \
--console null \
--serial tty \
--kernel ${kernel}/${pkgs.stdenv.hostPlatform.linux-kernel.target} \
--initramfs ${initrd} \
--cmdline "console=ttyS0 $CH_CMDLINE" \
--fs tag=snix,socket=$tempdir/snix.sock,num_queues=''${CH_NUM_CPU},queue_size=512
'';
meta.ci.targets = [
"initrd"
"kernel"
"runVM"
];
}

138
snix/boot/snix-init.go Normal file
View file

@ -0,0 +1,138 @@
package main
import (
"fmt"
"log"
"os"
"os/exec"
"strings"
"syscall"
)
// run the given command, connecting std{in,err,out} with the OS one.
func run(args ...string) error {
cmd := exec.Command(args[0], args[1:]...)
cmd.Stdin = os.Stdin
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
return cmd.Run()
}
// parse the cmdline, return a map[string]string.
func parseCmdline(cmdline string) map[string]string {
line := strings.TrimSuffix(cmdline, "\n")
fields := strings.Fields(line)
out := make(map[string]string, 0)
for _, arg := range fields {
kv := strings.SplitN(arg, "=", 2)
switch len(kv) {
case 1:
out[kv[0]] = ""
case 2:
out[kv[0]] = kv[1]
}
}
return out
}
// mounts the snix store from the virtiofs tag to the given destination,
// creating the destination if it doesn't exist already.
func mountSnixStore(dest string) error {
if err := os.MkdirAll(dest, os.ModePerm); err != nil {
return fmt.Errorf("unable to mkdir dest: %w", err)
}
if err := run("mount", "-t", "virtiofs", "snix", dest, "-o", "ro"); err != nil {
return fmt.Errorf("unable to run mount: %w", err)
}
return nil
}
func main() {
fmt.Print(`
______ _ ____ _ __
/_ __/ __(_) __ / _/___ (_) /_
/ / | | / / / |/_/ / // __ \/ / __/
/ / | |/ / /> < _/ // / / / / /_
/_/ |___/_/_/|_| /___/_/ /_/_/\__/
`)
// Set PATH to "/bbin", so we can find the u-root tools
os.Setenv("PATH", "/bbin")
if err := run("mount", "-t", "proc", "none", "/proc"); err != nil {
log.Printf("Failed to mount /proc: %v\n", err)
}
if err := run("mount", "-t", "sysfs", "none", "/sys"); err != nil {
log.Printf("Failed to mount /sys: %v\n", err)
}
if err := run("mount", "-t", "devtmpfs", "devtmpfs", "/dev"); err != nil {
log.Printf("Failed to mount /dev: %v\n", err)
}
cmdline, err := os.ReadFile("/proc/cmdline")
if err != nil {
log.Printf("Failed to read cmdline: %s\n", err)
}
cmdlineFields := parseCmdline(string(cmdline))
if _, ok := cmdlineFields["snix.find"]; ok {
// If snix.find is set, invoke find /nix/store
if err := mountSnixStore("/nix/store"); err != nil {
log.Printf("Failed to mount snix store: %v\n", err)
}
if err := run("find", "/nix/store"); err != nil {
log.Printf("Failed to run find command: %s\n", err)
}
} else if _, ok := cmdlineFields["snix.shell"]; ok {
// If snix.shell is set, mount the nix store to /nix/store directly,
// then invoke the elvish shell
if err := mountSnixStore("/nix/store"); err != nil {
log.Printf("Failed to mount snix store: %v\n", err)
}
if err := run("elvish"); err != nil {
log.Printf("Failed to run shell: %s\n", err)
}
} else if v, ok := cmdlineFields["snix.run"]; ok {
// If snix.run is set, mount the nix store to /nix/store directly,
// then invoke the command.
if err := mountSnixStore("/nix/store"); err != nil {
log.Printf("Failed to mount snix store: %v\n", err)
}
if err := run(v); err != nil {
log.Printf("Failed to run command: %s\n", err)
}
} else if v, ok := cmdlineFields["init"]; ok {
// If init is set, invoke the binary specified (with switch_root),
// and prepare /fs beforehand as well.
os.Mkdir("/fs", os.ModePerm)
if err := run("mount", "-t", "tmpfs", "none", "/fs"); err != nil {
log.Fatalf("Failed to mount /fs tmpfs: %s\n", err)
}
// Mount /fs/nix/store
if err := mountSnixStore("/fs/nix/store"); err != nil {
log.Fatalf("Failed to mount snix store: %v\n", err)
}
// Invoke switch_root, which will take care of moving /proc, /sys and /dev.
if err := syscall.Exec("/bbin/switch_root", []string{"switch_root", "/fs", v}, []string{}); err != nil {
log.Printf("Failed to switch root: %s\n", err)
}
} else {
log.Printf("No command detected, not knowing what to do!")
}
// This is only reached in the non switch_root case.
log.Printf("Nothing left to be done, powering off.")
if err := run("poweroff"); err != nil {
log.Printf("Failed to run poweroff command: %v\n", err)
}
}

250
snix/boot/tests/default.nix Normal file
View file

@ -0,0 +1,250 @@
{ depot, pkgs, lib, ... }:
let
# Seed a snix-store with the specified path, then start a VM with the
# snix-boot initrd.
# Allows customizing the cmdline, which can be used to list files,
# or specify what init should be booted.
mkBootTest =
{ blobServiceAddr ? "memory://"
, directoryServiceAddr ? "memory://"
, pathInfoServiceAddr ? "memory://"
# The path to import.
, path
# Whether the path should be imported as a closure.
# If false, importPathName must be specified.
, isClosure ? false
# Whether to use nar-bridge to upload, rather than snix-store copy.
# using nar-bridge currently is "slower", as the `pkgs.mkBinaryCache` build
# takes quite some time.
, useNarBridge ? false
, importPathName ? null
# Commands to run before starting the snix-daemon. Useful to provide
# auxillary mock services.
, preStart ? ""
# The cmdline to pass to the VM.
# Defaults to snix.find, which lists all files in the store.
, vmCmdline ? "snix.find"
# The string we expect to find in the VM output.
# Defaults the value of `path` (the store path we upload).
, assertVMOutput ? path
}:
assert isClosure -> importPathName == null;
assert (!isClosure) -> importPathName != null;
pkgs.stdenv.mkDerivation ({
name = "run-vm";
nativeBuildInputs = [
depot.snix.store
depot.snix.boot.runVM
] ++ lib.optionals (isClosure && useNarBridge) [
depot.snix.nar-bridge
pkgs.curl
pkgs.rush-parallel
pkgs.zstd.bin
pkgs.nix
];
buildCommand = ''
set -eou pipefail
touch $out
# Ensure we can construct http clients.
export SSL_CERT_FILE=/dev/null
${preStart}
# Start the snix daemon, listening on a unix socket.
BLOB_SERVICE_ADDR=${lib.escapeShellArg blobServiceAddr} \
DIRECTORY_SERVICE_ADDR=${lib.escapeShellArg directoryServiceAddr} \
PATH_INFO_SERVICE_ADDR=${lib.escapeShellArg pathInfoServiceAddr} \
snix-store \
--otlp=false \
daemon -l $PWD/snix-store.sock &
# Wait for the service to report healthy.
timeout 22 sh -c "until ${pkgs.ip2unix}/bin/ip2unix -r out,path=$PWD/snix-store.sock ${pkgs.grpc-health-check}/bin/grpc-health-check --address 127.0.0.1 --port 8080; do sleep 1; done"
# Export env vars so that subsequent snix-store commands will talk to
# our snix-store daemon over the unix socket.
export BLOB_SERVICE_ADDR=grpc+unix://$PWD/snix-store.sock
export DIRECTORY_SERVICE_ADDR=grpc+unix://$PWD/snix-store.sock
export PATH_INFO_SERVICE_ADDR=grpc+unix://$PWD/snix-store.sock
'' + lib.optionalString (!isClosure) ''
echo "Importing ${path} into snix-store with name ${importPathName}"
cp -R ${path} ${importPathName}
outpath=$(snix-store import ${importPathName})
echo "imported to $outpath"
'' + lib.optionalString (isClosure && !useNarBridge) ''
echo "Copying closure ${path}"
# This picks up the `closure` key in `$NIX_ATTRS_JSON_FILE` automatically.
snix-store --otlp=false copy
'' + lib.optionalString (isClosure && useNarBridge) ''
echo "Starting nar-bridge"
nar-bridge \
--otlp=false \
-l $PWD/nar-bridge.sock &
# Wait for nar-bridge to report healthy.
timeout 22 sh -c "until ${pkgs.curl}/bin/curl -s --unix-socket $PWD/nar-bridge.sock http:///nix-binary-cache; do sleep 1; done"
# Upload. We can't use nix copy --to http://…, as it wants access to the nix db.
# However, we can use mkBinaryCache to assemble .narinfo and .nar.xz to upload,
# and then drive a HTTP client ourselves.
to_upload=${
pkgs.mkBinaryCache {
rootPaths = [ path ];
# Implemented in https://github.com/NixOS/nixpkgs/pull/376365
compression = "zstd";
}
}
# Upload all NAR files (with some parallelism).
# As mkBinaryCache produces them xz-compressed, unpack them on the fly.
# nar-bridge doesn't care about the path we upload *to*, but a
# subsequent .narinfo upload need to refer to its contents (by narhash).
echo -e "Uploading NARs "
# TODO(flokli): extension of the nar files where changed from .nar.{compression} to .{compression}
# https://github.com/NixOS/nixpkgs/pull/376365
ls -d $to_upload/nar/*.zst | rush -n1 'nar_hash=$(zstdcat < {} | nix-hash --base32 --type sha256 --flat /dev/stdin);zstdcat < {} | curl -s --fail-with-body -T - --unix-socket $PWD/nar-bridge.sock http://localhost:9000/nar/''${nar_hash}.nar'
echo "Done."
# Upload all NARInfo files.
# FUTUREWORK: This doesn't upload them in order, and currently relies
# on PathInfoService not doing any checking.
# In the future, we might want to make this behaviour configurable,
# and disable checking here, to keep the logic simple.
ls -d $to_upload/*.narinfo | rush 'curl -s -T - --unix-socket $PWD/nar-bridge.sock http://localhost:9000/$(basename {}) < {}'
'' + ''
# Invoke a VM using snix as the backing store, ensure the outpath appears in its listing.
echo "Starting VM"
CH_CMDLINE="${vmCmdline}" run-snix-vm 2>&1 | tee output.txt
grep "${assertVMOutput}" output.txt
'';
requiredSystemFeatures = [ "kvm" ];
# HACK: The boot tests are sometimes flaky, and we don't want them to
# periodically fail other build. Have Buildkite auto-retry them 2 times
# on failure.
# Logs for individual failures are still available, so it won't hinder
# flakiness debuggability.
meta.ci.buildkiteExtraStepArgs = {
retry.automatic = true;
};
} // lib.optionalAttrs (isClosure && !useNarBridge) {
__structuredAttrs = true;
exportReferencesGraph.closure = [ path ];
});
testSystem = (pkgs.nixos {
# Set some options necessary to evaluate.
boot.loader.systemd-boot.enable = true;
# TODO: figure out how to disable this without causing eval to fail
fileSystems."/" = {
device = "/dev/root";
fsType = "tmpfs";
};
services.getty.helpLine = "Onwards and upwards.";
systemd.services.do-shutdown = {
after = [ "getty.target" ];
description = "Shut down again";
wantedBy = [ "multi-user.target" ];
serviceConfig.Type = "oneshot";
script = "/run/current-system/sw/bin/systemctl poweroff --when=+10s";
};
# Don't warn about stateVersion.
system.stateVersion = "24.05";
# Speed-up evaluation and building.
documentation.enable = lib.mkForce false;
}).config.system.build.toplevel;
in
depot.nix.readTree.drvTargets {
docs-memory = (mkBootTest {
path = ../../docs;
importPathName = "docs";
});
docs-persistent = (mkBootTest {
blobServiceAddr = "objectstore+file:///build/blobs";
directoryServiceAddr = "redb:///build/directories.redb";
pathInfoServiceAddr = "redb:///build/pathinfo.redb";
path = ../../docs;
importPathName = "docs";
});
closure-snix = (mkBootTest {
blobServiceAddr = "objectstore+file:///build/blobs";
path = depot.snix.store;
isClosure = true;
});
closure-nixos = (mkBootTest {
blobServiceAddr = "objectstore+file:///build/blobs";
pathInfoServiceAddr = "redb:///build/pathinfo.redb";
directoryServiceAddr = "redb:///build/directories.redb";
path = testSystem;
isClosure = true;
vmCmdline = "init=${testSystem}/init panic=-1"; # reboot immediately on panic
assertVMOutput = "Onwards and upwards.";
});
closure-nixos-bigtable = (mkBootTest {
blobServiceAddr = "objectstore+file:///build/blobs";
directoryServiceAddr = "bigtable://instance-1?project_id=project-1&table_name=directories&family_name=cf1";
pathInfoServiceAddr = "bigtable://instance-1?project_id=project-1&table_name=pathinfos&family_name=cf1";
path = testSystem;
useNarBridge = true;
preStart = ''
${pkgs.cbtemulator}/bin/cbtemulator -address $PWD/cbtemulator.sock &
timeout 22 sh -c 'until [ -e $PWD/cbtemulator.sock ]; do sleep 1; done'
export BIGTABLE_EMULATOR_HOST=unix://$PWD/cbtemulator.sock
${pkgs.google-cloud-bigtable-tool}/bin/cbt -instance instance-1 -project project-1 createtable directories
${pkgs.google-cloud-bigtable-tool}/bin/cbt -instance instance-1 -project project-1 createfamily directories cf1
${pkgs.google-cloud-bigtable-tool}/bin/cbt -instance instance-1 -project project-1 createtable pathinfos
${pkgs.google-cloud-bigtable-tool}/bin/cbt -instance instance-1 -project project-1 createfamily pathinfos cf1
'';
isClosure = true;
vmCmdline = "init=${testSystem}/init panic=-1"; # reboot immediately on panic
assertVMOutput = "Onwards and upwards.";
});
closure-nixos-s3 = (mkBootTest {
blobServiceAddr = "objectstore+s3://mybucket/blobs?aws_access_key_id=myaccesskey&aws_secret_access_key=supersecret&aws_endpoint_url=http%3A%2F%2Flocalhost%3A9000&aws_allow_http=1";
# we cannot use s3 here yet without any caching layer, as we don't allow "deeper" access to directories (non-root nodes)
# directoryServiceAddr = "objectstore+s3://mybucket/directories?aws_access_key_id=myaccesskey&aws_secret_access_key=supersecret&endpoint=http%3A%2F%2Flocalhost%3A9000&aws_allow_http=1";
directoryServiceAddr = "memory://";
pathInfoServiceAddr = "memory://";
path = testSystem;
useNarBridge = true;
preStart = ''
MINIO_ACCESS_KEY=myaccesskey MINIO_SECRET_KEY=supersecret MINIO_ADDRESS=127.0.0.1:9000 ${pkgs.minio}/bin/minio server $(mktemp -d) &
timeout 22 sh -c 'until ${pkgs.netcat}/bin/nc -z $0 $1; do sleep 1; done' localhost 9000
mc_config_dir=$(mktemp -d)
${pkgs.minio-client}/bin/mc --config-dir $mc_config_dir alias set 'myminio' 'http://127.0.0.1:9000' 'myaccesskey' 'supersecret'
${pkgs.minio-client}/bin/mc --config-dir $mc_config_dir mb myminio/mybucket
'';
isClosure = true;
vmCmdline = "init=${testSystem}/init panic=-1"; # reboot immediately on panic
assertVMOutput = "Onwards and upwards.";
});
closure-nixos-nar-bridge = (mkBootTest {
blobServiceAddr = "objectstore+file:///build/blobs";
path = testSystem;
useNarBridge = true;
isClosure = true;
vmCmdline = "init=${testSystem}/init panic=-1"; # reboot immediately on panic
assertVMOutput = "Onwards and upwards.";
});
}

22
snix/build-go/LICENSE Normal file
View file

@ -0,0 +1,22 @@
Copyright © The Tvix Authors
Copyright © The Snix Project
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
“Software”), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

10
snix/build-go/README.md Normal file
View file

@ -0,0 +1,10 @@
# build-go
This directory contains generated golang bindings, both for the `snix-build`
data models, as well as the gRPC bindings.
They are generated with `mg run //snix/build-go:regenerate`.
These files end with `.pb.go`, and are ensured to be up to date by a CI check.
Additionally, code useful when interacting with these data structures
(ending just with `.go`) is provided.

672
snix/build-go/build.pb.go Normal file
View file

@ -0,0 +1,672 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.5
// protoc (unknown)
// source: snix/build/protos/build.proto
package buildv1
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
proto "snix.dev/castore/proto"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
// A BuildRequest describes the request of something to be run on the builder.
// It is distinct from an actual [Build] that has already happened, or might be
// currently ongoing.
//
// A BuildRequest can be seen as a more normalized version of a Derivation
// (parsed from A-Term), "writing out" some of the Nix-internal details about
// how e.g. environment variables in the build are set.
//
// Nix has some impurities when building a Derivation, for example the --cores option
// ends up as an environment variable in the build, that's not part of the ATerm.
//
// As of now, we serialize this into the BuildRequest, so builders can stay dumb.
// This might change in the future.
//
// There's also a big difference when it comes to how inputs are modelled:
// - Nix only uses store path (strings) to describe the inputs.
// As store paths can be input-addressed, a certain store path can contain
// different contents (as not all store paths are binary reproducible).
// This requires that for every input-addressed input, the builder has access
// to either the input's deriver (and needs to build it) or else a trusted
// source for the built input.
// to upload input-addressed paths, requiring the trusted users concept.
// - snix-build records a list of snix.castore.v1.Node as inputs.
// These map from the store path base name to their contents, relieving the
// builder from having to "trust" any input-addressed paths, contrary to Nix.
//
// While this approach gives a better hermeticity, it has one downside:
// A BuildRequest can only be sent once the contents of all its inputs are known.
//
// As of now, we're okay to accept this, but it prevents uploading an
// entirely-non-IFD subgraph of BuildRequests eagerly.
//
// FUTUREWORK: We might be introducing another way to refer to inputs, to
// support "send all BuildRequest for a nixpkgs eval to a remote builder and put
// the laptop to sleep" usecases later.
type BuildRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The list of all root nodes that should be visible in `inputs_dir` at the
// time of the build.
// As all references are content-addressed, no additional signatures are
// needed to substitute / make these available in the build environment.
// Inputs MUST be sorted by their names.
Inputs []*proto.Node `protobuf:"bytes,1,rep,name=inputs,proto3" json:"inputs,omitempty"`
// The command (and its args) executed as the build script.
// In the case of a Nix derivation, this is usually
// ["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"].
CommandArgs []string `protobuf:"bytes,2,rep,name=command_args,json=commandArgs,proto3" json:"command_args,omitempty"`
// The working dir of the command, relative to the build root.
// "build", in the case of Nix.
// This MUST be a clean relative path, without any ".", "..", or superfluous
// slashes.
WorkingDir string `protobuf:"bytes,3,opt,name=working_dir,json=workingDir,proto3" json:"working_dir,omitempty"`
// A list of "scratch" paths, relative to the build root.
// These will be write-able during the build.
// [build, nix/store] in the case of Nix.
// These MUST be clean relative paths, without any ".", "..", or superfluous
// slashes, and sorted.
ScratchPaths []string `protobuf:"bytes,4,rep,name=scratch_paths,json=scratchPaths,proto3" json:"scratch_paths,omitempty"`
// The path where the castore input nodes will be located at,
// "nix/store" in case of Nix.
// Builds might also write into here (Nix builds do that).
// This MUST be a clean relative path, without any ".", "..", or superfluous
// slashes.
InputsDir string `protobuf:"bytes,5,opt,name=inputs_dir,json=inputsDir,proto3" json:"inputs_dir,omitempty"`
// The list of output paths the build is expected to produce,
// relative to the root.
// If the path is not produced, the build is considered to have failed.
// These MUST be clean relative paths, without any ".", "..", or superfluous
// slashes, and sorted.
Outputs []string `protobuf:"bytes,6,rep,name=outputs,proto3" json:"outputs,omitempty"`
// The list of environment variables and their values that should be set
// inside the build environment.
// This includes both environment vars set inside the derivation, as well as
// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores`
// CLI option of `nix-build`.
// For now, we consume this as an option when turning a Derivation into a BuildRequest,
// similar to how Nix has a `--cores` option.
// We don't want to bleed these very nix-specific sandbox impl details into
// (dumber) builders if we don't have to.
// Environment variables are sorted by their keys.
EnvironmentVars []*BuildRequest_EnvVar `protobuf:"bytes,7,rep,name=environment_vars,json=environmentVars,proto3" json:"environment_vars,omitempty"`
// A set of constraints that need to be satisfied on a build host before a
// Build can be started.
Constraints *BuildRequest_BuildConstraints `protobuf:"bytes,8,opt,name=constraints,proto3" json:"constraints,omitempty"`
// Additional (small) files and their contents that should be placed into the
// build environment, but outside inputs_dir.
// Used for passAsFile and structuredAttrs in Nix.
AdditionalFiles []*BuildRequest_AdditionalFile `protobuf:"bytes,9,rep,name=additional_files,json=additionalFiles,proto3" json:"additional_files,omitempty"`
// If this is an non-empty list, all paths in `outputs` are scanned for these.
// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of
// every input store path and output store path. The latter is necessary to scan
// for references between multi-output derivations.
RefscanNeedles []string `protobuf:"bytes,10,rep,name=refscan_needles,json=refscanNeedles,proto3" json:"refscan_needles,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BuildRequest) Reset() {
*x = BuildRequest{}
mi := &file_snix_build_protos_build_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BuildRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BuildRequest) ProtoMessage() {}
func (x *BuildRequest) ProtoReflect() protoreflect.Message {
mi := &file_snix_build_protos_build_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BuildRequest.ProtoReflect.Descriptor instead.
func (*BuildRequest) Descriptor() ([]byte, []int) {
return file_snix_build_protos_build_proto_rawDescGZIP(), []int{0}
}
func (x *BuildRequest) GetInputs() []*proto.Node {
if x != nil {
return x.Inputs
}
return nil
}
func (x *BuildRequest) GetCommandArgs() []string {
if x != nil {
return x.CommandArgs
}
return nil
}
func (x *BuildRequest) GetWorkingDir() string {
if x != nil {
return x.WorkingDir
}
return ""
}
func (x *BuildRequest) GetScratchPaths() []string {
if x != nil {
return x.ScratchPaths
}
return nil
}
func (x *BuildRequest) GetInputsDir() string {
if x != nil {
return x.InputsDir
}
return ""
}
func (x *BuildRequest) GetOutputs() []string {
if x != nil {
return x.Outputs
}
return nil
}
func (x *BuildRequest) GetEnvironmentVars() []*BuildRequest_EnvVar {
if x != nil {
return x.EnvironmentVars
}
return nil
}
func (x *BuildRequest) GetConstraints() *BuildRequest_BuildConstraints {
if x != nil {
return x.Constraints
}
return nil
}
func (x *BuildRequest) GetAdditionalFiles() []*BuildRequest_AdditionalFile {
if x != nil {
return x.AdditionalFiles
}
return nil
}
func (x *BuildRequest) GetRefscanNeedles() []string {
if x != nil {
return x.RefscanNeedles
}
return nil
}
// A Build is (one possible) outcome of executing a [BuildRequest].
type Build struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The orginal build request producing the build.
BuildRequest *BuildRequest `protobuf:"bytes,1,opt,name=build_request,json=buildRequest,proto3" json:"build_request,omitempty"` // <- TODO: define hashing scheme for BuildRequest, refer to it by hash?
// The outputs that were produced after successfully building.
// They are sorted by their names.
Outputs []*proto.Node `protobuf:"bytes,2,rep,name=outputs,proto3" json:"outputs,omitempty"`
// Contains the same number of elements as the `outputs` field.
OutputsNeedles []*Build_OutputNeedles `protobuf:"bytes,3,rep,name=outputs_needles,json=outputsNeedles,proto3" json:"outputs_needles,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Build) Reset() {
*x = Build{}
mi := &file_snix_build_protos_build_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Build) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Build) ProtoMessage() {}
func (x *Build) ProtoReflect() protoreflect.Message {
mi := &file_snix_build_protos_build_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Build.ProtoReflect.Descriptor instead.
func (*Build) Descriptor() ([]byte, []int) {
return file_snix_build_protos_build_proto_rawDescGZIP(), []int{1}
}
func (x *Build) GetBuildRequest() *BuildRequest {
if x != nil {
return x.BuildRequest
}
return nil
}
func (x *Build) GetOutputs() []*proto.Node {
if x != nil {
return x.Outputs
}
return nil
}
func (x *Build) GetOutputsNeedles() []*Build_OutputNeedles {
if x != nil {
return x.OutputsNeedles
}
return nil
}
type BuildRequest_EnvVar struct {
state protoimpl.MessageState `protogen:"open.v1"`
// name of the environment variable. Must not contain =.
Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"`
Value []byte `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BuildRequest_EnvVar) Reset() {
*x = BuildRequest_EnvVar{}
mi := &file_snix_build_protos_build_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BuildRequest_EnvVar) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BuildRequest_EnvVar) ProtoMessage() {}
func (x *BuildRequest_EnvVar) ProtoReflect() protoreflect.Message {
mi := &file_snix_build_protos_build_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BuildRequest_EnvVar.ProtoReflect.Descriptor instead.
func (*BuildRequest_EnvVar) Descriptor() ([]byte, []int) {
return file_snix_build_protos_build_proto_rawDescGZIP(), []int{0, 0}
}
func (x *BuildRequest_EnvVar) GetKey() string {
if x != nil {
return x.Key
}
return ""
}
func (x *BuildRequest_EnvVar) GetValue() []byte {
if x != nil {
return x.Value
}
return nil
}
// BuildConstraints represents certain conditions that must be fulfilled
// inside the build environment to be able to build this.
// Constraints can be things like required architecture and minimum amount of memory.
// The required input paths are *not* represented in here, because it
// wouldn't be hermetic enough - see the comment around inputs too.
type BuildRequest_BuildConstraints struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The system that's needed to execute the build.
// Must not be empty.
System string `protobuf:"bytes,1,opt,name=system,proto3" json:"system,omitempty"`
// The amount of memory required to be available for the build, in bytes.
MinMemory uint64 `protobuf:"varint,2,opt,name=min_memory,json=minMemory,proto3" json:"min_memory,omitempty"`
// A list of (absolute) paths that need to be available in the build
// environment, like `/dev/kvm`.
// This is distinct from the castore nodes in inputs.
// TODO: check if these should be individual constraints instead.
// These MUST be clean absolute paths, without any ".", "..", or superfluous
// slashes, and sorted.
AvailableRoPaths []string `protobuf:"bytes,3,rep,name=available_ro_paths,json=availableRoPaths,proto3" json:"available_ro_paths,omitempty"`
// Whether the build should be able to access the network,
NetworkAccess bool `protobuf:"varint,4,opt,name=network_access,json=networkAccess,proto3" json:"network_access,omitempty"`
// Whether to provide a /bin/sh inside the build environment, usually a static bash.
ProvideBinSh bool `protobuf:"varint,5,opt,name=provide_bin_sh,json=provideBinSh,proto3" json:"provide_bin_sh,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BuildRequest_BuildConstraints) Reset() {
*x = BuildRequest_BuildConstraints{}
mi := &file_snix_build_protos_build_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BuildRequest_BuildConstraints) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BuildRequest_BuildConstraints) ProtoMessage() {}
func (x *BuildRequest_BuildConstraints) ProtoReflect() protoreflect.Message {
mi := &file_snix_build_protos_build_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BuildRequest_BuildConstraints.ProtoReflect.Descriptor instead.
func (*BuildRequest_BuildConstraints) Descriptor() ([]byte, []int) {
return file_snix_build_protos_build_proto_rawDescGZIP(), []int{0, 1}
}
func (x *BuildRequest_BuildConstraints) GetSystem() string {
if x != nil {
return x.System
}
return ""
}
func (x *BuildRequest_BuildConstraints) GetMinMemory() uint64 {
if x != nil {
return x.MinMemory
}
return 0
}
func (x *BuildRequest_BuildConstraints) GetAvailableRoPaths() []string {
if x != nil {
return x.AvailableRoPaths
}
return nil
}
func (x *BuildRequest_BuildConstraints) GetNetworkAccess() bool {
if x != nil {
return x.NetworkAccess
}
return false
}
func (x *BuildRequest_BuildConstraints) GetProvideBinSh() bool {
if x != nil {
return x.ProvideBinSh
}
return false
}
type BuildRequest_AdditionalFile struct {
state protoimpl.MessageState `protogen:"open.v1"`
Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"`
Contents []byte `protobuf:"bytes,2,opt,name=contents,proto3" json:"contents,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BuildRequest_AdditionalFile) Reset() {
*x = BuildRequest_AdditionalFile{}
mi := &file_snix_build_protos_build_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BuildRequest_AdditionalFile) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BuildRequest_AdditionalFile) ProtoMessage() {}
func (x *BuildRequest_AdditionalFile) ProtoReflect() protoreflect.Message {
mi := &file_snix_build_protos_build_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BuildRequest_AdditionalFile.ProtoReflect.Descriptor instead.
func (*BuildRequest_AdditionalFile) Descriptor() ([]byte, []int) {
return file_snix_build_protos_build_proto_rawDescGZIP(), []int{0, 2}
}
func (x *BuildRequest_AdditionalFile) GetPath() string {
if x != nil {
return x.Path
}
return ""
}
func (x *BuildRequest_AdditionalFile) GetContents() []byte {
if x != nil {
return x.Contents
}
return nil
}
type Build_OutputNeedles struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The numbers are indexing into `refscan_needles` originally specified in the BuildRequest.
Needles []uint64 `protobuf:"varint,1,rep,packed,name=needles,proto3" json:"needles,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Build_OutputNeedles) Reset() {
*x = Build_OutputNeedles{}
mi := &file_snix_build_protos_build_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Build_OutputNeedles) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Build_OutputNeedles) ProtoMessage() {}
func (x *Build_OutputNeedles) ProtoReflect() protoreflect.Message {
mi := &file_snix_build_protos_build_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Build_OutputNeedles.ProtoReflect.Descriptor instead.
func (*Build_OutputNeedles) Descriptor() ([]byte, []int) {
return file_snix_build_protos_build_proto_rawDescGZIP(), []int{1, 0}
}
func (x *Build_OutputNeedles) GetNeedles() []uint64 {
if x != nil {
return x.Needles
}
return nil
}
var File_snix_build_protos_build_proto protoreflect.FileDescriptor
var file_snix_build_protos_build_proto_rawDesc = string([]byte{
0x0a, 0x1d, 0x73, 0x6e, 0x69, 0x78, 0x2f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2f, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x73, 0x2f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12,
0x0d, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x1a, 0x21,
0x73, 0x6e, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x73, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74,
0x6f, 0x22, 0xb9, 0x06, 0x0a, 0x0c, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65,
0x73, 0x74, 0x12, 0x2d, 0x0a, 0x06, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03,
0x28, 0x0b, 0x32, 0x15, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72,
0x65, 0x2e, 0x76, 0x31, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x06, 0x69, 0x6e, 0x70, 0x75, 0x74,
0x73, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x5f, 0x61, 0x72, 0x67,
0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64,
0x41, 0x72, 0x67, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x5f,
0x64, 0x69, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x77, 0x6f, 0x72, 0x6b, 0x69,
0x6e, 0x67, 0x44, 0x69, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x63, 0x72, 0x61, 0x74, 0x63, 0x68,
0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x63,
0x72, 0x61, 0x74, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x69, 0x6e,
0x70, 0x75, 0x74, 0x73, 0x5f, 0x64, 0x69, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09,
0x69, 0x6e, 0x70, 0x75, 0x74, 0x73, 0x44, 0x69, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x75, 0x74,
0x70, 0x75, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x6f, 0x75, 0x74, 0x70,
0x75, 0x74, 0x73, 0x12, 0x4d, 0x0a, 0x10, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65,
0x6e, 0x74, 0x5f, 0x76, 0x61, 0x72, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e,
0x73, 0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x75,
0x69, 0x6c, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x45, 0x6e, 0x76, 0x56, 0x61,
0x72, 0x52, 0x0f, 0x65, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x56, 0x61,
0x72, 0x73, 0x12, 0x4e, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74,
0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x62,
0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x52, 0x65, 0x71,
0x75, 0x65, 0x73, 0x74, 0x2e, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x72,
0x61, 0x69, 0x6e, 0x74, 0x73, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e,
0x74, 0x73, 0x12, 0x55, 0x0a, 0x10, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c,
0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x73,
0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x75, 0x69,
0x6c, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x41, 0x64, 0x64, 0x69, 0x74, 0x69,
0x6f, 0x6e, 0x61, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x0f, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69,
0x6f, 0x6e, 0x61, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x72, 0x65, 0x66,
0x73, 0x63, 0x61, 0x6e, 0x5f, 0x6e, 0x65, 0x65, 0x64, 0x6c, 0x65, 0x73, 0x18, 0x0a, 0x20, 0x03,
0x28, 0x09, 0x52, 0x0e, 0x72, 0x65, 0x66, 0x73, 0x63, 0x61, 0x6e, 0x4e, 0x65, 0x65, 0x64, 0x6c,
0x65, 0x73, 0x1a, 0x30, 0x0a, 0x06, 0x45, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x12, 0x10, 0x0a, 0x03,
0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14,
0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x76,
0x61, 0x6c, 0x75, 0x65, 0x1a, 0xc4, 0x01, 0x0a, 0x10, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x43, 0x6f,
0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x79, 0x73,
0x74, 0x65, 0x6d, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x79, 0x73, 0x74, 0x65,
0x6d, 0x12, 0x1d, 0x0a, 0x0a, 0x6d, 0x69, 0x6e, 0x5f, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18,
0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x6d, 0x69, 0x6e, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79,
0x12, 0x2c, 0x0a, 0x12, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x72, 0x6f,
0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x10, 0x61, 0x76,
0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x6f, 0x50, 0x61, 0x74, 0x68, 0x73, 0x12, 0x25,
0x0a, 0x0e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x5f, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73,
0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x41,
0x63, 0x63, 0x65, 0x73, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x70, 0x72, 0x6f, 0x76, 0x69, 0x64, 0x65,
0x5f, 0x62, 0x69, 0x6e, 0x5f, 0x73, 0x68, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x70,
0x72, 0x6f, 0x76, 0x69, 0x64, 0x65, 0x42, 0x69, 0x6e, 0x53, 0x68, 0x1a, 0x40, 0x0a, 0x0e, 0x41,
0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x12, 0x0a,
0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74,
0x68, 0x12, 0x1a, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x02, 0x20,
0x01, 0x28, 0x0c, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xf2, 0x01,
0x0a, 0x05, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x40, 0x0a, 0x0d, 0x62, 0x75, 0x69, 0x6c, 0x64,
0x5f, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b,
0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x2e, 0x42,
0x75, 0x69, 0x6c, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x0c, 0x62, 0x75, 0x69,
0x6c, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2f, 0x0a, 0x07, 0x6f, 0x75, 0x74,
0x70, 0x75, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x73, 0x6e, 0x69,
0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x4e, 0x6f, 0x64,
0x65, 0x52, 0x07, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x73, 0x12, 0x4b, 0x0a, 0x0f, 0x6f, 0x75,
0x74, 0x70, 0x75, 0x74, 0x73, 0x5f, 0x6e, 0x65, 0x65, 0x64, 0x6c, 0x65, 0x73, 0x18, 0x03, 0x20,
0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64,
0x2e, 0x76, 0x31, 0x2e, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74,
0x4e, 0x65, 0x65, 0x64, 0x6c, 0x65, 0x73, 0x52, 0x0e, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x73,
0x4e, 0x65, 0x65, 0x64, 0x6c, 0x65, 0x73, 0x1a, 0x29, 0x0a, 0x0d, 0x4f, 0x75, 0x74, 0x70, 0x75,
0x74, 0x4e, 0x65, 0x65, 0x64, 0x6c, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6e, 0x65, 0x65, 0x64,
0x6c, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x04, 0x52, 0x07, 0x6e, 0x65, 0x65, 0x64, 0x6c,
0x65, 0x73, 0x42, 0x1e, 0x5a, 0x1c, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x64, 0x65, 0x76, 0x2f, 0x62,
0x75, 0x69, 0x6c, 0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x3b, 0x62, 0x75, 0x69, 0x6c, 0x64,
0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
})
var (
file_snix_build_protos_build_proto_rawDescOnce sync.Once
file_snix_build_protos_build_proto_rawDescData []byte
)
func file_snix_build_protos_build_proto_rawDescGZIP() []byte {
file_snix_build_protos_build_proto_rawDescOnce.Do(func() {
file_snix_build_protos_build_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_snix_build_protos_build_proto_rawDesc), len(file_snix_build_protos_build_proto_rawDesc)))
})
return file_snix_build_protos_build_proto_rawDescData
}
var file_snix_build_protos_build_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
var file_snix_build_protos_build_proto_goTypes = []any{
(*BuildRequest)(nil), // 0: snix.build.v1.BuildRequest
(*Build)(nil), // 1: snix.build.v1.Build
(*BuildRequest_EnvVar)(nil), // 2: snix.build.v1.BuildRequest.EnvVar
(*BuildRequest_BuildConstraints)(nil), // 3: snix.build.v1.BuildRequest.BuildConstraints
(*BuildRequest_AdditionalFile)(nil), // 4: snix.build.v1.BuildRequest.AdditionalFile
(*Build_OutputNeedles)(nil), // 5: snix.build.v1.Build.OutputNeedles
(*proto.Node)(nil), // 6: snix.castore.v1.Node
}
var file_snix_build_protos_build_proto_depIdxs = []int32{
6, // 0: snix.build.v1.BuildRequest.inputs:type_name -> snix.castore.v1.Node
2, // 1: snix.build.v1.BuildRequest.environment_vars:type_name -> snix.build.v1.BuildRequest.EnvVar
3, // 2: snix.build.v1.BuildRequest.constraints:type_name -> snix.build.v1.BuildRequest.BuildConstraints
4, // 3: snix.build.v1.BuildRequest.additional_files:type_name -> snix.build.v1.BuildRequest.AdditionalFile
0, // 4: snix.build.v1.Build.build_request:type_name -> snix.build.v1.BuildRequest
6, // 5: snix.build.v1.Build.outputs:type_name -> snix.castore.v1.Node
5, // 6: snix.build.v1.Build.outputs_needles:type_name -> snix.build.v1.Build.OutputNeedles
7, // [7:7] is the sub-list for method output_type
7, // [7:7] is the sub-list for method input_type
7, // [7:7] is the sub-list for extension type_name
7, // [7:7] is the sub-list for extension extendee
0, // [0:7] is the sub-list for field type_name
}
func init() { file_snix_build_protos_build_proto_init() }
func file_snix_build_protos_build_proto_init() {
if File_snix_build_protos_build_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_snix_build_protos_build_proto_rawDesc), len(file_snix_build_protos_build_proto_rawDesc)),
NumEnums: 0,
NumMessages: 6,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_snix_build_protos_build_proto_goTypes,
DependencyIndexes: file_snix_build_protos_build_proto_depIdxs,
MessageInfos: file_snix_build_protos_build_proto_msgTypes,
}.Build()
File_snix_build_protos_build_proto = out.File
file_snix_build_protos_build_proto_goTypes = nil
file_snix_build_protos_build_proto_depIdxs = nil
}

33
snix/build-go/default.nix Normal file
View file

@ -0,0 +1,33 @@
{ depot, pkgs, ... }:
let
regenerate = pkgs.writeShellScript "regenerate" ''
(cd $(git rev-parse --show-toplevel)/snix/build-go && rm *.pb.go && cp ${depot.snix.build.protos.go-bindings}/*.pb.go . && chmod +w *.pb.go)
'';
in
(pkgs.buildGoModule {
name = "build-go";
src = depot.third_party.gitignoreSource ./.;
vendorHash = "sha256:1j652an8ir1ybyj21znaipsir7mbs3v972mw27ppsjz9dgh2crx6";
}).overrideAttrs (_: {
meta.ci.extraSteps = {
check = {
label = ":water_buffalo: ensure generated protobuf files match";
needsOutput = true;
command = pkgs.writeShellScript "pb-go-check" ''
${regenerate}
if [[ -n "$(git status --porcelain -unormal)" ]]; then
echo "-----------------------------"
echo ".pb.go files need to be updated, mg run //snix/build-go/regenerate"
echo "-----------------------------"
git status -unormal
exit 1
fi
'';
alwaysRun = true;
};
};
# https://git.snix.dev/snix/snix/issues/60
meta.ci.skip = true;
passthru.regenerate = regenerate;
})

21
snix/build-go/go.mod Normal file
View file

@ -0,0 +1,21 @@
module code.tvl.fyi/tvix/build-go
go 1.22
toolchain go1.23.4
require (
code.tvl.fyi/tvix/castore-go v0.0.0-20231105151352-990d6ba2175e
google.golang.org/grpc v1.69.2
google.golang.org/protobuf v1.36.1
)
require (
github.com/golang/protobuf v1.5.4 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.19.0 // indirect
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
lukechampine.com/blake3 v1.1.7 // indirect
)

104
snix/build-go/go.sum Normal file
View file

@ -0,0 +1,104 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
code.tvl.fyi/tvix/castore-go v0.0.0-20231105151352-990d6ba2175e h1:Nj+anfyEYeEdhnIo2BG/N1ZwQl1IvI7AH3TbNDLwUOA=
code.tvl.fyi/tvix/castore-go v0.0.0-20231105151352-990d6ba2175e/go.mod h1:+vKbozsa04yy2TWh3kUVU568jaza3Hf0p1jAEoMoCwA=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY=
go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE=
go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE=
go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY=
go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk=
go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0=
go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc=
go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8=
go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys=
go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU=
google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
lukechampine.com/blake3 v1.1.7 h1:GgRMhmdsuK8+ii6UZFDL8Nb+VyMwadAgcJyfYHxG6n0=
lukechampine.com/blake3 v1.1.7/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA=

View file

@ -0,0 +1,81 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.5
// protoc (unknown)
// source: snix/build/protos/rpc_build.proto
package buildv1
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
var File_snix_build_protos_rpc_build_proto protoreflect.FileDescriptor
var file_snix_build_protos_rpc_build_proto_rawDesc = string([]byte{
0x0a, 0x21, 0x73, 0x6e, 0x69, 0x78, 0x2f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2f, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x73, 0x2f, 0x72, 0x70, 0x63, 0x5f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e,
0x76, 0x31, 0x1a, 0x1d, 0x73, 0x6e, 0x69, 0x78, 0x2f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2f, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x70, 0x72, 0x6f, 0x74,
0x6f, 0x32, 0x4c, 0x0a, 0x0c, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63,
0x65, 0x12, 0x3c, 0x0a, 0x07, 0x44, 0x6f, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x1b, 0x2e, 0x73,
0x6e, 0x69, 0x78, 0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x75, 0x69,
0x6c, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, 0x73, 0x6e, 0x69, 0x78,
0x2e, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x42,
0x1e, 0x5a, 0x1c, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x64, 0x65, 0x76, 0x2f, 0x62, 0x75, 0x69, 0x6c,
0x64, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x3b, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x76, 0x31, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
})
var file_snix_build_protos_rpc_build_proto_goTypes = []any{
(*BuildRequest)(nil), // 0: snix.build.v1.BuildRequest
(*Build)(nil), // 1: snix.build.v1.Build
}
var file_snix_build_protos_rpc_build_proto_depIdxs = []int32{
0, // 0: snix.build.v1.BuildService.DoBuild:input_type -> snix.build.v1.BuildRequest
1, // 1: snix.build.v1.BuildService.DoBuild:output_type -> snix.build.v1.Build
1, // [1:2] is the sub-list for method output_type
0, // [0:1] is the sub-list for method input_type
0, // [0:0] is the sub-list for extension type_name
0, // [0:0] is the sub-list for extension extendee
0, // [0:0] is the sub-list for field type_name
}
func init() { file_snix_build_protos_rpc_build_proto_init() }
func file_snix_build_protos_rpc_build_proto_init() {
if File_snix_build_protos_rpc_build_proto != nil {
return
}
file_snix_build_protos_build_proto_init()
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_snix_build_protos_rpc_build_proto_rawDesc), len(file_snix_build_protos_rpc_build_proto_rawDesc)),
NumEnums: 0,
NumMessages: 0,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_snix_build_protos_rpc_build_proto_goTypes,
DependencyIndexes: file_snix_build_protos_rpc_build_proto_depIdxs,
}.Build()
File_snix_build_protos_rpc_build_proto = out.File
file_snix_build_protos_rpc_build_proto_goTypes = nil
file_snix_build_protos_rpc_build_proto_depIdxs = nil
}

View file

@ -0,0 +1,125 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.5.1
// - protoc (unknown)
// source: snix/build/protos/rpc_build.proto
package buildv1
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
BuildService_DoBuild_FullMethodName = "/snix.build.v1.BuildService/DoBuild"
)
// BuildServiceClient is the client API for BuildService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type BuildServiceClient interface {
DoBuild(ctx context.Context, in *BuildRequest, opts ...grpc.CallOption) (*Build, error)
}
type buildServiceClient struct {
cc grpc.ClientConnInterface
}
func NewBuildServiceClient(cc grpc.ClientConnInterface) BuildServiceClient {
return &buildServiceClient{cc}
}
func (c *buildServiceClient) DoBuild(ctx context.Context, in *BuildRequest, opts ...grpc.CallOption) (*Build, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(Build)
err := c.cc.Invoke(ctx, BuildService_DoBuild_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// BuildServiceServer is the server API for BuildService service.
// All implementations must embed UnimplementedBuildServiceServer
// for forward compatibility.
type BuildServiceServer interface {
DoBuild(context.Context, *BuildRequest) (*Build, error)
mustEmbedUnimplementedBuildServiceServer()
}
// UnimplementedBuildServiceServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedBuildServiceServer struct{}
func (UnimplementedBuildServiceServer) DoBuild(context.Context, *BuildRequest) (*Build, error) {
return nil, status.Errorf(codes.Unimplemented, "method DoBuild not implemented")
}
func (UnimplementedBuildServiceServer) mustEmbedUnimplementedBuildServiceServer() {}
func (UnimplementedBuildServiceServer) testEmbeddedByValue() {}
// UnsafeBuildServiceServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to BuildServiceServer will
// result in compilation errors.
type UnsafeBuildServiceServer interface {
mustEmbedUnimplementedBuildServiceServer()
}
func RegisterBuildServiceServer(s grpc.ServiceRegistrar, srv BuildServiceServer) {
// If the following call pancis, it indicates UnimplementedBuildServiceServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&BuildService_ServiceDesc, srv)
}
func _BuildService_DoBuild_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(BuildRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BuildServiceServer).DoBuild(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: BuildService_DoBuild_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BuildServiceServer).DoBuild(ctx, req.(*BuildRequest))
}
return interceptor(ctx, in, info, handler)
}
// BuildService_ServiceDesc is the grpc.ServiceDesc for BuildService service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var BuildService_ServiceDesc = grpc.ServiceDesc{
ServiceName: "snix.build.v1.BuildService",
HandlerType: (*BuildServiceServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "DoBuild",
Handler: _BuildService_DoBuild_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "snix/build/protos/rpc_build.proto",
}

45
snix/build/Cargo.toml Normal file
View file

@ -0,0 +1,45 @@
[package]
name = "snix-build"
version = "0.1.0"
edition = "2021"
[dependencies]
bytes.workspace = true
clap = { workspace = true, features = ["derive", "env"] }
itertools.workspace = true
prost.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["process"] }
tokio-listener = { workspace = true, features = ["tonic012"] }
tonic = { workspace = true, features = ["tls", "tls-roots"] }
# TODO: put the fuse dep behind a feature flag?
snix-castore = { path = "../castore", features = ["fuse"]}
tracing.workspace = true
url.workspace = true
mimalloc.workspace = true
tonic-reflection = { workspace = true, optional = true }
anyhow = "1.0.79"
blake3 = "1.5.0"
bstr = "1.6.0"
data-encoding = "2.5.0"
futures = "0.3.30"
oci-spec = "0.7.0"
serde_json = "1.0.111"
snix-tracing = { path = "../tracing" }
uuid = { version = "1.7.0", features = ["v4"] }
[build-dependencies]
prost-build.workspace = true
tonic-build.workspace = true
[features]
default = []
tonic-reflection = ["dep:tonic-reflection", "snix-castore/tonic-reflection"]
[dev-dependencies]
rstest.workspace = true
tempfile = "3.3.0"
[lints]
workspace = true

36
snix/build/build.rs Normal file
View file

@ -0,0 +1,36 @@
use std::io::Result;
fn main() -> Result<()> {
#[allow(unused_mut)]
let mut builder = tonic_build::configure();
#[cfg(feature = "tonic-reflection")]
{
let out_dir = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap());
let descriptor_path = out_dir.join("snix.build.v1.bin");
builder = builder.file_descriptor_set_path(descriptor_path);
};
builder
.build_server(true)
.build_client(true)
.emit_rerun_if_changed(false)
.bytes(["."])
.extern_path(".snix.castore.v1", "::snix_castore::proto")
.compile_protos(
&[
"snix/build/protos/build.proto",
"snix/build/protos/rpc_build.proto",
],
// If we are in running `cargo build` manually, using `../..` works fine,
// but in case we run inside a nix build, we need to instead point PROTO_ROOT
// to a custom tree containing that structure.
&[match std::env::var_os("PROTO_ROOT") {
Some(proto_root) => proto_root.to_str().unwrap().to_owned(),
None => "../..".to_string(),
}],
)?;
Ok(())
}

11
snix/build/default.nix Normal file
View file

@ -0,0 +1,11 @@
{ depot, lib, ... }:
(depot.snix.crates.workspaceMembers.snix-build.build.override {
runTests = true;
}).overrideAttrs (old: rec {
meta.ci.targets = lib.filter (x: lib.hasPrefix "with-features" x || x == "no-features") (lib.attrNames passthru);
passthru = old.passthru // (depot.snix.utils.mkFeaturePowerset {
inherit (old) crateName;
features = [ "tonic-reflection" ];
});
})

22
snix/build/protos/LICENSE Normal file
View file

@ -0,0 +1,22 @@
Copyright © The Tvix Authors
Copyright © The Snix Project
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
“Software”), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,178 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
syntax = "proto3";
package snix.build.v1;
import "snix/castore/protos/castore.proto";
option go_package = "snix.dev/build/proto;buildv1";
// A BuildRequest describes the request of something to be run on the builder.
// It is distinct from an actual [Build] that has already happened, or might be
// currently ongoing.
//
// A BuildRequest can be seen as a more normalized version of a Derivation
// (parsed from A-Term), "writing out" some of the Nix-internal details about
// how e.g. environment variables in the build are set.
//
// Nix has some impurities when building a Derivation, for example the --cores option
// ends up as an environment variable in the build, that's not part of the ATerm.
//
// As of now, we serialize this into the BuildRequest, so builders can stay dumb.
// This might change in the future.
//
// There's also a big difference when it comes to how inputs are modelled:
// - Nix only uses store path (strings) to describe the inputs.
// As store paths can be input-addressed, a certain store path can contain
// different contents (as not all store paths are binary reproducible).
// This requires that for every input-addressed input, the builder has access
// to either the input's deriver (and needs to build it) or else a trusted
// source for the built input.
// to upload input-addressed paths, requiring the trusted users concept.
// - snix-build records a list of snix.castore.v1.Node as inputs.
// These map from the store path base name to their contents, relieving the
// builder from having to "trust" any input-addressed paths, contrary to Nix.
//
// While this approach gives a better hermeticity, it has one downside:
// A BuildRequest can only be sent once the contents of all its inputs are known.
//
// As of now, we're okay to accept this, but it prevents uploading an
// entirely-non-IFD subgraph of BuildRequests eagerly.
//
// FUTUREWORK: We might be introducing another way to refer to inputs, to
// support "send all BuildRequest for a nixpkgs eval to a remote builder and put
// the laptop to sleep" usecases later.
message BuildRequest {
// The list of all root nodes that should be visible in `inputs_dir` at the
// time of the build.
// As all references are content-addressed, no additional signatures are
// needed to substitute / make these available in the build environment.
// Inputs MUST be sorted by their names.
repeated snix.castore.v1.Node inputs = 1;
// The command (and its args) executed as the build script.
// In the case of a Nix derivation, this is usually
// ["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"].
repeated string command_args = 2;
// The working dir of the command, relative to the build root.
// "build", in the case of Nix.
// This MUST be a clean relative path, without any ".", "..", or superfluous
// slashes.
string working_dir = 3;
// A list of "scratch" paths, relative to the build root.
// These will be write-able during the build.
// [build, nix/store] in the case of Nix.
// These MUST be clean relative paths, without any ".", "..", or superfluous
// slashes, and sorted.
repeated string scratch_paths = 4;
// The path where the castore input nodes will be located at,
// "nix/store" in case of Nix.
// Builds might also write into here (Nix builds do that).
// This MUST be a clean relative path, without any ".", "..", or superfluous
// slashes.
string inputs_dir = 5;
// The list of output paths the build is expected to produce,
// relative to the root.
// If the path is not produced, the build is considered to have failed.
// These MUST be clean relative paths, without any ".", "..", or superfluous
// slashes, and sorted.
repeated string outputs = 6;
// The list of environment variables and their values that should be set
// inside the build environment.
// This includes both environment vars set inside the derivation, as well as
// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores`
// CLI option of `nix-build`.
// For now, we consume this as an option when turning a Derivation into a BuildRequest,
// similar to how Nix has a `--cores` option.
// We don't want to bleed these very nix-specific sandbox impl details into
// (dumber) builders if we don't have to.
// Environment variables are sorted by their keys.
repeated EnvVar environment_vars = 7;
message EnvVar {
// name of the environment variable. Must not contain =.
string key = 1;
bytes value = 2;
}
// A set of constraints that need to be satisfied on a build host before a
// Build can be started.
BuildConstraints constraints = 8;
// BuildConstraints represents certain conditions that must be fulfilled
// inside the build environment to be able to build this.
// Constraints can be things like required architecture and minimum amount of memory.
// The required input paths are *not* represented in here, because it
// wouldn't be hermetic enough - see the comment around inputs too.
message BuildConstraints {
// The system that's needed to execute the build.
// Must not be empty.
string system = 1;
// The amount of memory required to be available for the build, in bytes.
uint64 min_memory = 2;
// A list of (absolute) paths that need to be available in the build
// environment, like `/dev/kvm`.
// This is distinct from the castore nodes in inputs.
// TODO: check if these should be individual constraints instead.
// These MUST be clean absolute paths, without any ".", "..", or superfluous
// slashes, and sorted.
repeated string available_ro_paths = 3;
// Whether the build should be able to access the network,
bool network_access = 4;
// Whether to provide a /bin/sh inside the build environment, usually a static bash.
bool provide_bin_sh = 5;
}
// Additional (small) files and their contents that should be placed into the
// build environment, but outside inputs_dir.
// Used for passAsFile and structuredAttrs in Nix.
repeated AdditionalFile additional_files = 9;
message AdditionalFile {
string path = 1;
bytes contents = 2;
}
// If this is an non-empty list, all paths in `outputs` are scanned for these.
// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of
// every input store path and output store path. The latter is necessary to scan
// for references between multi-output derivations.
repeated string refscan_needles = 10;
// TODO: allow describing something like "preferLocal", to influence composition?
}
// A Build is (one possible) outcome of executing a [BuildRequest].
message Build {
// The orginal build request producing the build.
BuildRequest build_request = 1; // <- TODO: define hashing scheme for BuildRequest, refer to it by hash?
// The outputs that were produced after successfully building.
// They are sorted by their names.
repeated snix.castore.v1.Node outputs = 2;
message OutputNeedles {
// The numbers are indexing into `refscan_needles` originally specified in the BuildRequest.
repeated uint64 needles = 1;
}
// Contains the same number of elements as the `outputs` field.
repeated OutputNeedles outputs_needles = 3;
// TODO: where did this run, how long, logs,
}
/// TODO: check remarkable notes on constraints again
/// TODO: https://github.com/adisbladis/go-nix/commit/603df5db86ab97ba29f6f94d74f4e51642c56834

View file

@ -0,0 +1,51 @@
{ depot, pkgs, lib, ... }:
let
protos = lib.sourceByRegex depot.path.origSrc [
"buf.yaml"
"buf.gen.yaml"
# We need to include castore.proto (only), as it's referred.
"^snix(/castore(/protos(/castore\.proto)?)?)?$"
"^snix(/build(/protos(/.*\.proto)?)?)?$"
];
in
depot.nix.readTree.drvTargets {
inherit protos;
# Lints and ensures formatting of the proto files.
check = pkgs.stdenv.mkDerivation {
name = "proto-check";
src = protos;
nativeBuildInputs = [
pkgs.buf
];
buildPhase = ''
export HOME=$TMPDIR
buf lint
buf format -d --exit-code
touch $out
'';
};
# Produces the golang bindings.
go-bindings = pkgs.stdenv.mkDerivation {
name = "go-bindings";
src = protos;
nativeBuildInputs = [
pkgs.buf
pkgs.protoc-gen-go
pkgs.protoc-gen-go-grpc
];
buildPhase = ''
export HOME=$TMPDIR
buf generate
mkdir -p $out
cp snix/build/protos/*.pb.go $out/
'';
};
}

View file

@ -0,0 +1,14 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
syntax = "proto3";
package snix.build.v1;
import "snix/build/protos/build.proto";
option go_package = "snix.dev/build/proto;buildv1";
service BuildService {
rpc DoBuild(BuildRequest) returns (Build);
}

View file

@ -0,0 +1,114 @@
use clap::Parser;
use clap::Subcommand;
use snix_build::{
buildservice,
proto::{build_service_server::BuildServiceServer, GRPCBuildServiceWrapper},
};
use snix_castore::blobservice;
use snix_castore::directoryservice;
use tokio_listener::Listener;
use tokio_listener::SystemOptions;
use tokio_listener::UserOptions;
use tonic::{self, transport::Server};
use tracing::info;
#[cfg(feature = "tonic-reflection")]
use snix_build::proto::FILE_DESCRIPTOR_SET;
#[cfg(feature = "tonic-reflection")]
use snix_castore::proto::FILE_DESCRIPTOR_SET as CASTORE_FILE_DESCRIPTOR_SET;
use mimalloc::MiMalloc;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Runs the snix-build daemon.
Daemon {
#[arg(long, short = 'l')]
listen_address: Option<String>,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
blob_service_addr: String,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
directory_service_addr: String,
#[arg(long, env, default_value = "dummy://")]
build_service_addr: String,
},
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let cli = Cli::parse();
snix_tracing::TracingBuilder::default()
.enable_progressbar()
.build()?;
match cli.command {
Commands::Daemon {
listen_address,
blob_service_addr,
directory_service_addr,
build_service_addr,
} => {
// initialize stores
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr).await?;
let build_service =
buildservice::from_addr(&build_service_addr, blob_service, directory_service)
.await?;
let listen_address = listen_address
.unwrap_or_else(|| "[::]:8000".to_string())
.parse()
.unwrap();
let mut server = Server::builder();
#[allow(unused_mut)]
let mut router = server.add_service(BuildServiceServer::new(
GRPCBuildServiceWrapper::new(build_service),
));
#[cfg(feature = "tonic-reflection")]
{
router = router.add_service(
tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(CASTORE_FILE_DESCRIPTOR_SET)
.register_encoded_file_descriptor_set(FILE_DESCRIPTOR_SET)
.build_v1alpha()?,
);
router = router.add_service(
tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(CASTORE_FILE_DESCRIPTOR_SET)
.register_encoded_file_descriptor_set(FILE_DESCRIPTOR_SET)
.build_v1()?,
);
}
info!(listen_address=%listen_address, "listening");
let listener = Listener::bind(
&listen_address,
&SystemOptions::default(),
&UserOptions::default(),
)
.await?;
router.serve_with_incoming(listener).await?;
}
}
Ok(())
}

View file

@ -0,0 +1,131 @@
use std::collections::{BTreeMap, HashSet};
use std::path::PathBuf;
use bytes::Bytes;
use snix_castore::{Node, PathComponent};
/// A BuildRequest describes the request of something to be run on the builder.
/// It is distinct from an actual \[Build\] that has already happened, or might be
/// currently ongoing.
///
/// A BuildRequest can be seen as a more normalized version of a Derivation
/// (parsed from A-Term), "writing out" some of the Nix-internal details about
/// how e.g. environment variables in the build are set.
///
/// Nix has some impurities when building a Derivation, for example the --cores option
/// ends up as an environment variable in the build, that's not part of the ATerm.
///
/// As of now, we serialize this into the BuildRequest, so builders can stay dumb.
/// This might change in the future.
///
/// There's also a big difference when it comes to how inputs are modelled:
///
/// * Nix only uses store path (strings) to describe the inputs.
/// As store paths can be input-addressed, a certain store path can contain
/// different contents (as not all store paths are binary reproducible).
/// This requires that for every input-addressed input, the builder has access
/// to either the input's deriver (and needs to build it) or else a trusted
/// source for the built input.
/// to upload input-addressed paths, requiring the trusted users concept.
/// * snix-build records a list of snix.castore.v1.Node as inputs.
/// These map from the store path base name to their contents, relieving the
/// builder from having to "trust" any input-addressed paths, contrary to Nix.
///
/// While this approach gives a better hermeticity, it has one downside:
/// A BuildRequest can only be sent once the contents of all its inputs are known.
///
/// As of now, we're okay to accept this, but it prevents uploading an
/// entirely-non-IFD subgraph of BuildRequests eagerly.
#[derive(Default, Debug, Clone, PartialEq)]
pub struct BuildRequest {
/// The list of all root nodes that should be visible in `inputs_dir` at the
/// time of the build.
/// As all references are content-addressed, no additional signatures are
/// needed to substitute / make these available in the build environment.
pub inputs: BTreeMap<PathComponent, Node>,
/// The command (and its args) executed as the build script.
/// In the case of a Nix derivation, this is usually
/// \["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"\].
pub command_args: Vec<String>,
/// The working dir of the command, relative to the build root.
/// "build", in the case of Nix.
/// This MUST be a clean relative path, without any ".", "..", or superfluous
/// slashes.
pub working_dir: PathBuf,
/// A list of "scratch" paths, relative to the build root.
/// These will be write-able during the build.
/// \[build, nix/store\] in the case of Nix.
/// These MUST be clean relative paths, without any ".", "..", or superfluous
/// slashes, and sorted.
pub scratch_paths: Vec<PathBuf>,
/// The path where the castore input nodes will be located at,
/// "nix/store" in case of Nix.
/// Builds might also write into here (Nix builds do that).
/// This MUST be a clean relative path, without any ".", "..", or superfluous
/// slashes.
pub inputs_dir: PathBuf,
/// The list of output paths the build is expected to produce,
/// relative to the root.
/// If the path is not produced, the build is considered to have failed.
/// These MUST be clean relative paths, without any ".", "..", or superfluous
/// slashes, and sorted.
pub outputs: Vec<PathBuf>,
/// The list of environment variables and their values that should be set
/// inside the build environment.
/// This includes both environment vars set inside the derivation, as well as
/// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores`
/// CLI option of `nix-build`.
/// For now, we consume this as an option when turning a Derivation into a BuildRequest,
/// similar to how Nix has a `--cores` option.
/// We don't want to bleed these very nix-specific sandbox impl details into
/// (dumber) builders if we don't have to.
/// Environment variables are sorted by their keys.
pub environment_vars: Vec<EnvVar>,
/// A set of constraints that need to be satisfied on a build host before a
/// Build can be started.
pub constraints: HashSet<BuildConstraints>,
/// Additional (small) files and their contents that should be placed into the
/// build environment, but outside inputs_dir.
/// Used for passAsFile and structuredAttrs in Nix.
pub additional_files: Vec<AdditionalFile>,
/// If this is an non-empty list, all paths in `outputs` are scanned for these.
/// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of
/// every input store path and output store path. The latter is necessary to scan
/// for references between multi-output derivations.
pub refscan_needles: Vec<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnvVar {
/// name of the environment variable. Must not contain =.
pub key: String,
pub value: Bytes,
}
/// BuildConstraints represents certain conditions that must be fulfilled
/// inside the build environment to be able to build this.
/// Constraints can be things like required architecture and minimum amount of memory.
/// The required input paths are *not* represented in here, because it
/// wouldn't be hermetic enough - see the comment around inputs too.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum BuildConstraints {
/// The system that's needed to execute the build.
/// Must not be empty.
System(String),
/// The amount of memory required to be available for the build, in bytes.
MinMemory(u64),
/// An absolute path that need to be available in the build
/// environment, like `/dev/kvm`.
/// This is distinct from the castore nodes in inputs.
/// These MUST be clean absolute paths, without any ".", "..", or superfluous
/// slashes, and sorted.
AvailableReadOnlyPath(PathBuf),
/// Whether the build should be able to access the network.
NetworkAccess,
/// Whether to provide a /bin/sh inside the build environment, usually a static bash.
ProvideBinSh,
}
#[derive(Debug, Clone, PartialEq)]
pub struct AdditionalFile {
pub path: PathBuf,
pub contents: Bytes,
}

View file

@ -0,0 +1,20 @@
use tonic::async_trait;
use tracing::instrument;
use super::BuildService;
use crate::buildservice::BuildRequest;
use crate::proto;
#[derive(Default)]
pub struct DummyBuildService {}
#[async_trait]
impl BuildService for DummyBuildService {
#[instrument(skip(self), ret, err)]
async fn do_build(&self, _request: BuildRequest) -> std::io::Result<proto::Build> {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"builds are not supported with DummyBuildService",
))
}
}

View file

@ -0,0 +1,120 @@
use super::{grpc::GRPCBuildService, BuildService, DummyBuildService};
use snix_castore::{blobservice::BlobService, directoryservice::DirectoryService};
use url::Url;
#[cfg(target_os = "linux")]
use super::oci::OCIBuildService;
/// Constructs a new instance of a [BuildService] from an URI.
///
/// The following schemes are supported by the following services:
/// - `dummy://` ([DummyBuildService])
/// - `oci://` ([OCIBuildService])
/// - `grpc+*://` ([GRPCBuildService])
///
/// As some of these [BuildService] need to talk to a [BlobService] and
/// [DirectoryService], these also need to be passed in.
#[cfg_attr(target_os = "macos", allow(unused_variables))]
pub async fn from_addr<BS, DS>(
uri: &str,
blob_service: BS,
directory_service: DS,
) -> std::io::Result<Box<dyn BuildService>>
where
BS: BlobService + Send + Sync + Clone + 'static,
DS: DirectoryService + Send + Sync + Clone + 'static,
{
let url = Url::parse(uri)
.map_err(|e| std::io::Error::other(format!("unable to parse url: {}", e)))?;
Ok(match url.scheme() {
// dummy doesn't care about parameters.
"dummy" => Box::<DummyBuildService>::default(),
#[cfg(target_os = "linux")]
"oci" => {
// oci wants a path in which it creates bundles.
if url.path().is_empty() {
Err(std::io::Error::other("oci needs a bundle dir as path"))?
}
// TODO: make sandbox shell and rootless_uid_gid
Box::new(OCIBuildService::new(
url.path().into(),
blob_service,
directory_service,
))
}
scheme => {
if scheme.starts_with("grpc+") {
let client = crate::proto::build_service_client::BuildServiceClient::new(
snix_castore::tonic::channel_from_url(&url)
.await
.map_err(std::io::Error::other)?,
);
// FUTUREWORK: also allow responding to {blob,directory}_service
// requests from the remote BuildService?
Box::new(GRPCBuildService::from_client(client))
} else {
Err(std::io::Error::other(format!(
"unknown scheme: {}",
url.scheme()
)))?
}
}
})
}
#[cfg(test)]
mod tests {
use super::from_addr;
use rstest::rstest;
use snix_castore::{
blobservice::{BlobService, MemoryBlobService},
directoryservice::{DirectoryService, MemoryDirectoryService},
};
use std::sync::Arc;
#[cfg(target_os = "linux")]
use std::sync::LazyLock;
#[cfg(target_os = "linux")]
use tempfile::TempDir;
#[cfg(target_os = "linux")]
static TMPDIR_OCI_1: LazyLock<TempDir> = LazyLock::new(|| TempDir::new().unwrap());
#[rstest]
/// This uses an unsupported scheme.
#[case::unsupported_scheme("http://foo.example/test", false)]
/// This configures dummy
#[case::valid_dummy("dummy://", true)]
/// Correct scheme to connect to a unix socket.
#[case::grpc_valid_unix_socket("grpc+unix:///path/to/somewhere", true)]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
#[case::grpc_invalid_unix_socket_and_host("grpc+unix://host.example/path/to/somewhere", false)]
/// Correct scheme to connect to localhost, with port 12345
#[case::grpc_valid_ipv6_localhost_port_12345("grpc+http://[::1]:12345", true)]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[case::grpc_valid_http_host_without_port("grpc+http://localhost", true)]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[case::grpc_valid_https_host_without_port("grpc+https://localhost", true)]
/// Correct scheme to connect to localhost over http, but with additional path, which is invalid.
#[case::grpc_invalid_host_and_path("grpc+http://localhost/some-path", false)]
/// This configures OCI, but doesn't specify the bundle path
#[cfg_attr(target_os = "linux", case::oci_missing_bundle_dir("oci://", false))]
/// This configures OCI, specifying the bundle path
#[cfg_attr(target_os = "linux", case::oci_bundle_path(&format!("oci://{}", TMPDIR_OCI_1.path().to_str().unwrap()), true))]
#[tokio::test]
async fn test_from_addr(#[case] uri_str: &str, #[case] exp_succeed: bool) {
let blob_service: Arc<dyn BlobService> = Arc::from(MemoryBlobService::default());
let directory_service: Arc<dyn DirectoryService> =
Arc::from(MemoryDirectoryService::default());
let resp = from_addr(uri_str, blob_service, directory_service).await;
if exp_succeed {
resp.expect("should succeed");
} else {
assert!(resp.is_err(), "should fail");
}
}
}

View file

@ -0,0 +1,29 @@
use tonic::{async_trait, transport::Channel};
use crate::buildservice::BuildRequest;
use crate::proto::{self, build_service_client::BuildServiceClient};
use super::BuildService;
pub struct GRPCBuildService {
client: BuildServiceClient<Channel>,
}
impl GRPCBuildService {
#[allow(dead_code)]
pub fn from_client(client: BuildServiceClient<Channel>) -> Self {
Self { client }
}
}
#[async_trait]
impl BuildService for GRPCBuildService {
async fn do_build(&self, request: BuildRequest) -> std::io::Result<proto::Build> {
let mut client = self.client.clone();
client
.do_build(Into::<proto::BuildRequest>::into(request))
.await
.map(|resp| resp.into_inner())
.map_err(std::io::Error::other)
}
}

View file

@ -0,0 +1,21 @@
use tonic::async_trait;
use crate::proto;
pub mod build_request;
pub use crate::buildservice::build_request::*;
mod dummy;
mod from_addr;
mod grpc;
#[cfg(target_os = "linux")]
mod oci;
pub use dummy::DummyBuildService;
pub use from_addr::from_addr;
#[async_trait]
pub trait BuildService: Send + Sync {
/// TODO: document
async fn do_build(&self, request: BuildRequest) -> std::io::Result<proto::Build>;
}

View file

@ -0,0 +1,266 @@
use anyhow::Context;
use bstr::BStr;
use oci_spec::runtime::{LinuxIdMapping, LinuxIdMappingBuilder};
use snix_castore::{
blobservice::BlobService,
directoryservice::DirectoryService,
fs::fuse::FuseDaemon,
import::fs::ingest_path,
refscan::{ReferencePattern, ReferenceScanner},
};
use tokio::process::{Child, Command};
use tonic::async_trait;
use tracing::{debug, instrument, warn, Span};
use uuid::Uuid;
use crate::buildservice::BuildRequest;
use crate::{
oci::{get_host_output_paths, make_bundle, make_spec},
proto::{self, build::OutputNeedles},
};
use std::{ffi::OsStr, path::PathBuf, process::Stdio};
use super::BuildService;
const SANDBOX_SHELL: &str = env!("SNIX_BUILD_SANDBOX_SHELL");
const MAX_CONCURRENT_BUILDS: usize = 2; // TODO: make configurable
pub struct OCIBuildService<BS, DS> {
/// Root path in which all bundles are created in
bundle_root: PathBuf,
/// uid mappings to set up for the workloads
uid_mappings: Vec<LinuxIdMapping>,
/// uid mappings to set up for the workloads
gid_mappings: Vec<LinuxIdMapping>,
/// Handle to a [BlobService], used by filesystems spawned during builds.
blob_service: BS,
/// Handle to a [DirectoryService], used by filesystems spawned during builds.
directory_service: DS,
// semaphore to track number of concurrently running builds.
// this is necessary, as otherwise we very quickly run out of open file handles.
concurrent_builds: tokio::sync::Semaphore,
}
impl<BS, DS> OCIBuildService<BS, DS> {
pub fn new(bundle_root: PathBuf, blob_service: BS, directory_service: DS) -> Self {
// We map root inside the container to the uid/gid this is running at,
// and allocate one for uid 1000 into the container from the range we
// got in /etc/sub{u,g}id.
// TODO: actually read uid, and /etc/subuid. Maybe only when we try to build?
// FUTUREWORK: use different uids?
Self {
bundle_root,
blob_service,
directory_service,
uid_mappings: vec![
LinuxIdMappingBuilder::default()
.host_id(1000_u32)
.container_id(0_u32)
.size(1_u32)
.build()
.unwrap(),
LinuxIdMappingBuilder::default()
.host_id(100000_u32)
.container_id(1000_u32)
.size(1_u32)
.build()
.unwrap(),
],
gid_mappings: vec![
LinuxIdMappingBuilder::default()
.host_id(100_u32)
.container_id(0_u32)
.size(1_u32)
.build()
.unwrap(),
LinuxIdMappingBuilder::default()
.host_id(100000_u32)
.container_id(100_u32)
.size(1_u32)
.build()
.unwrap(),
],
concurrent_builds: tokio::sync::Semaphore::new(MAX_CONCURRENT_BUILDS),
}
}
}
#[async_trait]
impl<BS, DS> BuildService for OCIBuildService<BS, DS>
where
BS: BlobService + Clone + 'static,
DS: DirectoryService + Clone + 'static,
{
#[instrument(skip_all, err)]
async fn do_build(&self, request: BuildRequest) -> std::io::Result<proto::Build> {
let _permit = self.concurrent_builds.acquire().await.unwrap();
let bundle_name = Uuid::new_v4();
let bundle_path = self.bundle_root.join(bundle_name.to_string());
let span = Span::current();
span.record("bundle_name", bundle_name.to_string());
let mut runtime_spec = make_spec(&request, true, SANDBOX_SHELL)
.context("failed to create spec")
.map_err(std::io::Error::other)?;
let mut linux = runtime_spec.linux().clone().unwrap();
// edit the spec, we need to setup uid/gid mappings.
linux.set_uid_mappings(Some(self.uid_mappings.clone()));
linux.set_gid_mappings(Some(self.gid_mappings.clone()));
runtime_spec.set_linux(Some(linux));
make_bundle(&request, &runtime_spec, &bundle_path)
.context("failed to produce bundle")
.map_err(std::io::Error::other)?;
// pre-calculate the locations we want to later ingest, in the order of
// the original outputs.
// If we can't find calculate that path, don't start the build in first place.
let host_output_paths = get_host_output_paths(&request, &bundle_path)
.context("failed to calculate host output paths")
.map_err(std::io::Error::other)?;
// assemble a BTreeMap of Nodes to pass into SnixStoreFs.
let patterns = ReferencePattern::new(request.refscan_needles.clone());
// NOTE: impl Drop for FuseDaemon unmounts, so if the call is cancelled, umount.
let _fuse_daemon = tokio::task::spawn_blocking({
let blob_service = self.blob_service.clone();
let directory_service = self.directory_service.clone();
let dest = bundle_path.join("inputs");
let root_nodes = Box::new(request.inputs.clone());
move || {
let fs = snix_castore::fs::SnixStoreFs::new(
blob_service,
directory_service,
root_nodes,
true,
false,
);
// mount the filesystem and wait for it to be unmounted.
// FUTUREWORK: make fuse daemon threads configurable?
FuseDaemon::new(fs, dest, 4, true).context("failed to start fuse daemon")
}
})
.await?
.context("mounting")
.map_err(std::io::Error::other)?;
debug!(bundle.path=?bundle_path, bundle.name=%bundle_name, "about to spawn bundle");
// start the bundle as another process.
let child = spawn_bundle(bundle_path, &bundle_name.to_string())?;
// wait for the process to exit
// FUTUREWORK: change the trait to allow reporting progress / logs…
let child_output = child
.wait_with_output()
.await
.context("failed to run process")
.map_err(std::io::Error::other)?;
// Check the exit code
if !child_output.status.success() {
let stdout = BStr::new(&child_output.stdout);
let stderr = BStr::new(&child_output.stderr);
warn!(stdout=%stdout, stderr=%stderr, exit_code=%child_output.status, "build failed");
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"nonzero exit code".to_string(),
));
}
// Ingest build outputs into the castore.
// We use try_join_all here. No need to spawn new tasks, as this is
// mostly IO bound.
let (outputs, outputs_needles) = futures::future::try_join_all(
host_output_paths.into_iter().enumerate().map(|(i, p)| {
let output_path = request.outputs[i].clone();
let patterns = patterns.clone();
async move {
debug!(host.path=?p, output.path=?output_path, "ingesting path");
let scanner = ReferenceScanner::new(patterns);
let output_node = ingest_path(
self.blob_service.clone(),
&self.directory_service,
p,
Some(&scanner),
)
.await
.map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Unable to ingest output: {}", e),
)
})?;
let needles = OutputNeedles {
needles: scanner
.matches()
.into_iter()
.enumerate()
.filter(|(_, val)| *val)
.map(|(idx, _)| idx as u64)
.collect(),
};
Ok::<_, std::io::Error>((
snix_castore::proto::Node::from_name_and_node(
output_path
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
.unwrap_or("".into())
.into(),
output_node,
),
needles,
))
}
}),
)
.await?
.into_iter()
.unzip();
Ok(proto::Build {
build_request: Some(request.into()),
outputs,
outputs_needles,
})
}
}
/// Spawns runc with the bundle at bundle_path.
/// On success, returns the child.
#[instrument(err)]
fn spawn_bundle(
bundle_path: impl AsRef<OsStr> + std::fmt::Debug,
bundle_name: &str,
) -> std::io::Result<Child> {
let mut command = Command::new("runc");
command
.args(&[
"run".into(),
"--bundle".into(),
bundle_path.as_ref().to_os_string(),
bundle_name.into(),
])
.stderr(Stdio::piped())
.stdout(Stdio::piped())
.stdin(Stdio::null());
command.spawn()
}

4
snix/build/src/lib.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod buildservice;
#[cfg(target_os = "linux")]
mod oci;
pub mod proto;

View file

@ -0,0 +1,144 @@
//! Module to create an OCI runtime bundle for a given [BuildRequest].
use std::{
fs,
path::{Path, PathBuf},
};
use super::scratch_name;
use crate::buildservice::BuildRequest;
use anyhow::{bail, Context};
use tracing::{debug, instrument};
/// Produce an OCI bundle in a given path.
/// Check [make_spec] for a description about the paths produced.
#[instrument(err)]
pub(crate) fn make_bundle<'a>(
request: &BuildRequest,
runtime_spec: &oci_spec::runtime::Spec,
path: &Path,
) -> anyhow::Result<()> {
fs::create_dir_all(path).context("failed to create bundle path")?;
let spec_json = serde_json::to_string(runtime_spec).context("failed to render spec to json")?;
fs::write(path.join("config.json"), spec_json).context("failed to write config.json")?;
fs::create_dir_all(path.join("inputs")).context("failed to create inputs dir")?;
let root_path = path.join("root");
fs::create_dir_all(&root_path).context("failed to create root path dir")?;
fs::create_dir_all(root_path.join("etc")).context("failed to create root/etc dir")?;
// TODO: populate /etc/{group,passwd}. It's a mess?
let scratch_root = path.join("scratch");
fs::create_dir_all(&scratch_root).context("failed to create scratch/ dir")?;
// for each scratch path, calculate its name inside scratch, and ensure the
// directory exists.
for p in request.scratch_paths.iter() {
let scratch_path = scratch_root.join(scratch_name(p));
debug!(scratch_path=?scratch_path, path=?p, "about to create scratch dir");
fs::create_dir_all(scratch_path).context("Unable to create scratch dir")?;
}
Ok(())
}
/// Determine the path of all outputs specified in a [BuildRequest]
/// as seen from the host, for post-build ingestion.
/// This lookup needs to take scratch paths into consideration, as the build
/// root is not writable on its own.
/// If a path can't be determined, an error is returned.
pub(crate) fn get_host_output_paths(
request: &BuildRequest,
bundle_path: &Path,
) -> anyhow::Result<Vec<PathBuf>> {
let scratch_root = bundle_path.join("scratch");
let mut host_output_paths: Vec<PathBuf> = Vec::with_capacity(request.outputs.len());
for output_path in request.outputs.iter() {
// calculate the location of the path.
if let Some((mp, relpath)) = find_path_in_scratchs(output_path, &request.scratch_paths) {
host_output_paths.push(scratch_root.join(scratch_name(mp)).join(relpath));
} else {
bail!("unable to find path {output_path:?}");
}
}
Ok(host_output_paths)
}
/// For a given list of mountpoints (sorted) and a search_path, find the
/// specific mountpoint parenting that search_path and return it, as well as the
/// relative path from there to the search_path.
/// mountpoints must be sorted, so we can iterate over the list from the back
/// and match on the prefix.
fn find_path_in_scratchs<'a, 'b, I>(
search_path: &'a Path,
mountpoints: I,
) -> Option<(&'b Path, &'a Path)>
where
I: IntoIterator<Item = &'b PathBuf>,
I::IntoIter: DoubleEndedIterator,
{
mountpoints
.into_iter()
.rev()
.find_map(|mp| Some((mp.as_path(), search_path.strip_prefix(mp).ok()?)))
}
#[cfg(test)]
mod tests {
use std::path::{Path, PathBuf};
use rstest::rstest;
use crate::{buildservice::BuildRequest, oci::scratch_name};
use super::{find_path_in_scratchs, get_host_output_paths};
#[rstest]
#[case::simple("nix/store/aaaa", &["nix/store".into()], Some(("nix/store", "aaaa")))]
#[case::prefix_no_sep("nix/store/aaaa", &["nix/sto".into()], None)]
#[case::not_found("nix/store/aaaa", &["build".into()], None)]
fn test_test_find_path_in_scratchs(
#[case] search_path: &str,
#[case] mountpoints: &[String],
#[case] expected: Option<(&str, &str)>,
) {
let expected = expected.map(|e| (Path::new(e.0), Path::new(e.1)));
assert_eq!(
find_path_in_scratchs(
Path::new(search_path),
mountpoints
.iter()
.map(PathBuf::from)
.collect::<Vec<_>>()
.as_slice()
),
expected
);
}
#[test]
fn test_get_host_output_paths_simple() {
let request = BuildRequest {
outputs: vec!["nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo".into()],
scratch_paths: vec!["build".into(), "nix/store".into()],
..Default::default()
};
let paths =
get_host_output_paths(&request, Path::new("bundle-root")).expect("must succeed");
let mut expected_path = PathBuf::new();
expected_path.push("bundle-root");
expected_path.push("scratch");
expected_path.push(scratch_name(Path::new("nix/store")));
expected_path.push("fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo");
assert_eq!(vec![expected_path], paths)
}
}

16
snix/build/src/oci/mod.rs Normal file
View file

@ -0,0 +1,16 @@
mod bundle;
mod spec;
pub(crate) use bundle::get_host_output_paths;
pub(crate) use bundle::make_bundle;
pub(crate) use spec::make_spec;
use std::path::Path;
/// For a given scratch path, return the scratch_name that's allocated.
// We currently use use lower hex encoding of the b3 digest of the scratch
// path, so we don't need to globally allocate and pass down some uuids.
pub(crate) fn scratch_name(scratch_path: &Path) -> String {
data_encoding::BASE32
.encode(blake3::hash(scratch_path.as_os_str().as_encoded_bytes()).as_bytes())
}

307
snix/build/src/oci/spec.rs Normal file
View file

@ -0,0 +1,307 @@
//! Module to create a OCI runtime spec for a given [BuildRequest].
use crate::buildservice::{BuildConstraints, BuildRequest};
use oci_spec::{
runtime::{Capability, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType},
OciSpecError,
};
use std::{collections::HashSet, path::Path};
use super::scratch_name;
/// For a given [BuildRequest], return an OCI runtime spec.
///
/// While there's no IO occuring in this function, the generated spec contains
/// path references relative to the "bundle location".
/// Due to overlayfs requiring its layers to be absolute paths, we also need a
/// [bundle_dir] parameter, pointing to the location of the bundle dir itself.
///
/// The paths used in the spec are the following (relative to a "bundle root"):
///
/// - `inputs`, a directory where the castore nodes specified the build request
/// inputs are supposed to be populated.
/// - `outputs`, a directory where all writes to the store_dir during the build
/// are directed to.
/// - `root`, a minimal skeleton of files that'll be present at /.
/// - `scratch`, a directory containing other directories which will be
/// bind-mounted read-write into the container and used as scratch space
/// during the build.
/// No assumptions should be made about what's inside this directory.
///
/// Generating these paths, and populating contents, like a skeleton root
/// is up to another function, this function doesn't do filesystem IO.
pub(crate) fn make_spec(
request: &BuildRequest,
rootless: bool,
sandbox_shell: &str,
) -> Result<oci_spec::runtime::Spec, oci_spec::OciSpecError> {
let allow_network = request
.constraints
.contains(&BuildConstraints::NetworkAccess);
// Assemble ro_host_mounts. Start with constraints.available_ro_paths.
let mut ro_host_mounts: Vec<_> = request
.constraints
.iter()
.filter_map(|constraint| match constraint {
BuildConstraints::AvailableReadOnlyPath(path) => Some((path.as_path(), path.as_path())),
_ => None,
})
.collect();
// If provide_bin_sh is set, mount sandbox_shell to /bin/sh
if request
.constraints
.contains(&BuildConstraints::ProvideBinSh)
{
ro_host_mounts.push((Path::new(sandbox_shell), Path::new("/bin/sh")))
}
oci_spec::runtime::SpecBuilder::default()
.process(configure_process(
&request.command_args,
&request.working_dir,
request
.environment_vars
.iter()
.map(|e| {
(
e.key.as_str(),
// TODO: decide what to do with non-bytes env values
String::from_utf8(e.value.to_vec()).expect("invalid string in env"),
)
})
.collect::<Vec<_>>(),
rootless,
)?)
.linux(configure_linux(allow_network, rootless)?)
.root(
oci_spec::runtime::RootBuilder::default()
.path("root")
.readonly(true)
.build()?,
)
.hostname("localhost")
.mounts(configure_mounts(
rootless,
allow_network,
request.scratch_paths.iter().map(|e| e.as_path()),
request.inputs.iter(),
&request.inputs_dir,
ro_host_mounts,
)?)
.build()
}
/// Return the Process part of the OCI Runtime spec.
/// This configures the command, it's working dir, env and terminal setup.
/// It also takes care of setting rlimits and capabilities.
/// Capabilities are a bit more complicated in case rootless building is requested.
fn configure_process<'a>(
command_args: &[String],
cwd: &Path,
env: impl IntoIterator<Item = (&'a str, String)>,
rootless: bool,
) -> Result<oci_spec::runtime::Process, oci_spec::OciSpecError> {
let spec_builder = oci_spec::runtime::ProcessBuilder::default()
.args(command_args)
.env(
env.into_iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>(),
)
.terminal(true)
.user(
oci_spec::runtime::UserBuilder::default()
.uid(1000u32)
.gid(100u32)
.build()?,
)
.cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute.
.capabilities({
let caps: HashSet<Capability> = if !rootless {
HashSet::from([Capability::AuditWrite, Capability::Kill])
} else {
HashSet::from([
Capability::AuditWrite,
Capability::Chown,
Capability::DacOverride,
Capability::Fowner,
Capability::Fsetid,
Capability::Kill,
Capability::Mknod,
Capability::NetBindService,
Capability::NetRaw,
Capability::Setfcap,
Capability::Setgid,
Capability::Setpcap,
Capability::Setuid,
Capability::SysChroot,
])
};
oci_spec::runtime::LinuxCapabilitiesBuilder::default()
.bounding(caps.clone())
.effective(caps.clone())
.inheritable(caps.clone())
.permitted(caps.clone())
.ambient(caps)
.build()?
})
.rlimits([oci_spec::runtime::PosixRlimitBuilder::default()
.typ(oci_spec::runtime::PosixRlimitType::RlimitNofile)
.hard(1024_u64)
.soft(1024_u64)
.build()?])
.no_new_privileges(true);
spec_builder.build()
}
/// Return the Linux part of the OCI Runtime spec.
/// This configures various namespaces, masked and read-only paths.
fn configure_linux(
allow_network: bool,
rootless: bool,
) -> Result<oci_spec::runtime::Linux, OciSpecError> {
let mut linux = oci_spec::runtime::Linux::default();
// explicitly set namespaces, depending on allow_network.
linux.set_namespaces(Some({
let mut namespace_types = vec![
LinuxNamespaceType::Pid,
LinuxNamespaceType::Ipc,
LinuxNamespaceType::Uts,
LinuxNamespaceType::Mount,
LinuxNamespaceType::Cgroup,
];
if !allow_network {
namespace_types.push(LinuxNamespaceType::Network)
}
if rootless {
namespace_types.push(LinuxNamespaceType::User)
}
namespace_types
.into_iter()
.map(|e| LinuxNamespaceBuilder::default().typ(e).build())
.collect::<Result<Vec<LinuxNamespace>, _>>()?
}));
linux.set_masked_paths(Some(
[
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
]
.into_iter()
.map(|e| e.to_string())
.collect::<Vec<_>>(),
));
linux.set_readonly_paths(Some(
[
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
]
.into_iter()
.map(|e| e.to_string())
.collect::<Vec<_>>(),
));
Ok(linux)
}
/// Return the Mounts part of the OCI Runtime spec.
/// It first sets up the standard mounts, then scratch paths, bind mounts for
/// all inputs, and finally read-only paths from the hosts.
fn configure_mounts<'a>(
rootless: bool,
allow_network: bool,
scratch_paths: impl IntoIterator<Item = &'a Path>,
inputs: impl Iterator<Item = (&'a snix_castore::PathComponent, &'a snix_castore::Node)>,
inputs_dir: &Path,
ro_host_mounts: impl IntoIterator<Item = (&'a Path, &'a Path)>,
) -> Result<Vec<oci_spec::runtime::Mount>, oci_spec::OciSpecError> {
let mut mounts: Vec<_> = if rootless {
oci_spec::runtime::get_rootless_mounts()
} else {
oci_spec::runtime::get_default_mounts()
};
mounts.push(configure_mount(
Path::new("tmpfs"),
Path::new("/tmp"),
"tmpfs",
&["nosuid", "noatime", "mode=700"],
)?);
// For each scratch path, create a bind mount entry.
let scratch_root = Path::new("scratch"); // relative path
for scratch_path in scratch_paths.into_iter() {
let src = scratch_root.join(scratch_name(scratch_path));
mounts.push(configure_mount(
&src,
&Path::new("/").join(scratch_path),
"none",
&["rbind", "rw"],
)?);
}
// For each input, create a bind mount from inputs/$name into $inputs_dir/$name.
for (input_name, _input) in inputs {
let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name");
mounts.push(configure_mount(
&Path::new("inputs").join(input_name),
&Path::new("/").join(inputs_dir).join(input_name),
"none",
&[
"rbind", "ro",
// "nosuid" is required, otherwise mounting will just fail with
// a generic permission error.
// See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd
"nosuid", "nodev",
],
)?);
}
// Process ro_host_mounts
for (src, dst) in ro_host_mounts.into_iter() {
mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?);
}
// In case network is enabled, also mount in /etc/{resolv.conf,services,hosts}
if allow_network {
for p in [
Path::new("/etc/resolv.conf"),
Path::new("/etc/services"),
Path::new("/etc/hosts"),
] {
mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?);
}
}
Ok(mounts)
}
/// Helper function to produce a mount.
fn configure_mount(
source: &Path,
destination: &Path,
typ: &str,
options: &[&str],
) -> Result<oci_spec::runtime::Mount, oci_spec::OciSpecError> {
oci_spec::runtime::MountBuilder::default()
.destination(destination)
.typ(typ.to_string())
.source(source)
.options(options.iter().map(|e| e.to_string()).collect::<Vec<_>>())
.build()
}

View file

@ -0,0 +1,37 @@
use crate::buildservice::BuildService;
use std::ops::Deref;
use tonic::async_trait;
use super::{Build, BuildRequest};
/// Implements the gRPC server trait ([crate::proto::build_service_server::BuildService]
/// for anything implementing [BuildService].
pub struct GRPCBuildServiceWrapper<BUILD> {
inner: BUILD,
}
impl<BUILD> GRPCBuildServiceWrapper<BUILD> {
pub fn new(build_service: BUILD) -> Self {
Self {
inner: build_service,
}
}
}
#[async_trait]
impl<BUILD> crate::proto::build_service_server::BuildService for GRPCBuildServiceWrapper<BUILD>
where
BUILD: Deref<Target = dyn BuildService> + Send + Sync + 'static,
{
async fn do_build(
&self,
request: tonic::Request<BuildRequest>,
) -> Result<tonic::Response<Build>, tonic::Status> {
let request = TryInto::<crate::buildservice::BuildRequest>::try_into(request.into_inner())
.map_err(|err| tonic::Status::new(tonic::Code::InvalidArgument, err.to_string()))?;
match self.inner.do_build(request).await {
Ok(resp) => Ok(tonic::Response::new(resp)),
Err(e) => Err(tonic::Status::internal(e.to_string())),
}
}
}

391
snix/build/src/proto/mod.rs Normal file
View file

@ -0,0 +1,391 @@
use std::collections::{BTreeMap, HashSet};
use std::path::{Path, PathBuf};
use itertools::Itertools;
use snix_castore::{DirectoryError, Node, PathComponent};
mod grpc_buildservice_wrapper;
pub use grpc_buildservice_wrapper::GRPCBuildServiceWrapper;
tonic::include_proto!("snix.build.v1");
#[cfg(feature = "tonic-reflection")]
/// Compiled file descriptors for implementing [gRPC
/// reflection](https://github.com/grpc/grpc/blob/master/doc/server-reflection.md) with e.g.
/// [`tonic_reflection`](https://docs.rs/tonic-reflection).
pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("snix.build.v1");
/// Errors that occur during the validation of [BuildRequest] messages.
#[derive(Debug, thiserror::Error)]
pub enum ValidateBuildRequestError {
#[error("invalid input node at position {0}: {1}")]
InvalidInputNode(usize, DirectoryError),
#[error("input nodes are not sorted by name")]
InputNodesNotSorted,
#[error("invalid working_dir")]
InvalidWorkingDir,
#[error("scratch_paths not sorted")]
ScratchPathsNotSorted,
#[error("invalid scratch path at position {0}")]
InvalidScratchPath(usize),
#[error("invalid inputs_dir")]
InvalidInputsDir,
#[error("invalid output path at position {0}")]
InvalidOutputPath(usize),
#[error("outputs not sorted")]
OutputsNotSorted,
#[error("invalid environment variable at position {0}")]
InvalidEnvVar(usize),
#[error("EnvVar not sorted by their keys")]
EnvVarNotSorted,
#[error("invalid build constraints: {0}")]
InvalidBuildConstraints(ValidateBuildConstraintsError),
#[error("invalid additional file path at position: {0}")]
InvalidAdditionalFilePath(usize),
#[error("additional_files not sorted")]
AdditionalFilesNotSorted,
}
/// Checks a path to be without any '..' components, and clean (no superfluous
/// slashes).
fn is_clean_path<P: AsRef<Path>>(p: P) -> bool {
let p = p.as_ref();
// Look at all components, bail in case of ".", ".." and empty normal
// segments (superfluous slashes)
// We still need to assemble a cleaned PathBuf, and compare the OsString
// later, as .components() already does do some normalization before
// yielding.
let mut cleaned_p = PathBuf::new();
for component in p.components() {
match component {
std::path::Component::Prefix(_) => {}
std::path::Component::RootDir => {}
std::path::Component::CurDir => return false,
std::path::Component::ParentDir => return false,
std::path::Component::Normal(a) => {
if a.is_empty() {
return false;
}
}
}
cleaned_p.push(component);
}
// if cleaned_p looks like p, we're good.
if cleaned_p.as_os_str() != p.as_os_str() {
return false;
}
true
}
fn is_clean_relative_path<P: AsRef<Path>>(p: P) -> bool {
if p.as_ref().is_absolute() {
return false;
}
is_clean_path(p)
}
fn is_clean_absolute_path<P: AsRef<Path>>(p: P) -> bool {
if !p.as_ref().is_absolute() {
return false;
}
is_clean_path(p)
}
/// Checks if a given list is sorted.
fn is_sorted<I>(data: I) -> bool
where
I: Iterator,
I::Item: Ord + Clone,
{
data.tuple_windows().all(|(a, b)| a <= b)
}
fn path_to_string(path: &Path) -> String {
path.to_str()
.expect("Snix Bug: unable to convert Path to String")
.to_string()
}
impl From<crate::buildservice::BuildRequest> for BuildRequest {
fn from(value: crate::buildservice::BuildRequest) -> Self {
let constraints = if value.constraints.is_empty() {
None
} else {
let mut constraints = build_request::BuildConstraints::default();
for constraint in value.constraints {
use crate::buildservice::BuildConstraints;
match constraint {
BuildConstraints::System(system) => constraints.system = system,
BuildConstraints::MinMemory(min_memory) => constraints.min_memory = min_memory,
BuildConstraints::AvailableReadOnlyPath(path) => {
constraints.available_ro_paths.push(path_to_string(&path))
}
BuildConstraints::ProvideBinSh => constraints.provide_bin_sh = true,
BuildConstraints::NetworkAccess => constraints.network_access = true,
}
}
Some(constraints)
};
Self {
inputs: value
.inputs
.into_iter()
.map(|(name, node)| {
snix_castore::proto::Node::from_name_and_node(name.into(), node)
})
.collect(),
command_args: value.command_args,
working_dir: path_to_string(&value.working_dir),
scratch_paths: value
.scratch_paths
.iter()
.map(|p| path_to_string(p))
.collect(),
inputs_dir: path_to_string(&value.inputs_dir),
outputs: value.outputs.iter().map(|p| path_to_string(p)).collect(),
environment_vars: value.environment_vars.into_iter().map(Into::into).collect(),
constraints,
additional_files: value.additional_files.into_iter().map(Into::into).collect(),
refscan_needles: value.refscan_needles,
}
}
}
impl TryFrom<BuildRequest> for crate::buildservice::BuildRequest {
type Error = ValidateBuildRequestError;
fn try_from(value: BuildRequest) -> Result<Self, Self::Error> {
// validate input names. Make sure they're sorted
let mut last_name: bytes::Bytes = "".into();
let mut inputs: BTreeMap<PathComponent, Node> = BTreeMap::new();
for (i, node) in value.inputs.iter().enumerate() {
let (name, node) = node
.clone()
.try_into_name_and_node()
.map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?;
if name.as_ref() <= last_name.as_ref() {
return Err(ValidateBuildRequestError::InputNodesNotSorted);
} else {
inputs.insert(name.clone(), node);
last_name = name.into();
}
}
// validate working_dir
if !is_clean_relative_path(&value.working_dir) {
Err(ValidateBuildRequestError::InvalidWorkingDir)?;
}
// validate scratch paths
for (i, p) in value.scratch_paths.iter().enumerate() {
if !is_clean_relative_path(p) {
Err(ValidateBuildRequestError::InvalidScratchPath(i))?
}
}
if !is_sorted(value.scratch_paths.iter().map(|e| e.as_bytes())) {
Err(ValidateBuildRequestError::ScratchPathsNotSorted)?;
}
// validate inputs_dir
if !is_clean_relative_path(&value.inputs_dir) {
Err(ValidateBuildRequestError::InvalidInputsDir)?;
}
// validate outputs
for (i, p) in value.outputs.iter().enumerate() {
if !is_clean_relative_path(p) {
Err(ValidateBuildRequestError::InvalidOutputPath(i))?
}
}
if !is_sorted(value.outputs.iter().map(|e| e.as_bytes())) {
Err(ValidateBuildRequestError::OutputsNotSorted)?;
}
// validate environment_vars.
for (i, e) in value.environment_vars.iter().enumerate() {
if e.key.is_empty() || e.key.contains('=') {
Err(ValidateBuildRequestError::InvalidEnvVar(i))?
}
}
if !is_sorted(value.environment_vars.iter().map(|e| e.key.as_bytes())) {
Err(ValidateBuildRequestError::EnvVarNotSorted)?;
}
// validate build constraints
let constraints = value
.constraints
.map_or(Ok(HashSet::new()), |constraints| {
constraints
.try_into()
.map_err(ValidateBuildRequestError::InvalidBuildConstraints)
})?;
// validate additional_files
for (i, additional_file) in value.additional_files.iter().enumerate() {
if !is_clean_relative_path(&additional_file.path) {
Err(ValidateBuildRequestError::InvalidAdditionalFilePath(i))?
}
}
if !is_sorted(value.additional_files.iter().map(|e| e.path.as_bytes())) {
Err(ValidateBuildRequestError::AdditionalFilesNotSorted)?;
}
Ok(Self {
inputs,
command_args: value.command_args,
working_dir: PathBuf::from(value.working_dir),
scratch_paths: value.scratch_paths.iter().map(PathBuf::from).collect(),
inputs_dir: PathBuf::from(value.inputs_dir),
outputs: value.outputs.iter().map(PathBuf::from).collect(),
environment_vars: value.environment_vars.into_iter().map(Into::into).collect(),
constraints,
additional_files: value.additional_files.into_iter().map(Into::into).collect(),
refscan_needles: value.refscan_needles,
})
}
}
/// Errors that occur during the validation of
/// [build_request::BuildConstraints] messages.
#[derive(Debug, thiserror::Error)]
pub enum ValidateBuildConstraintsError {
#[error("invalid system")]
InvalidSystem,
#[error("invalid available_ro_paths at position {0}")]
InvalidAvailableRoPaths(usize),
#[error("available_ro_paths not sorted")]
AvailableRoPathsNotSorted,
}
impl From<build_request::EnvVar> for crate::buildservice::EnvVar {
fn from(value: build_request::EnvVar) -> Self {
Self {
key: value.key,
value: value.value,
}
}
}
impl From<crate::buildservice::EnvVar> for build_request::EnvVar {
fn from(value: crate::buildservice::EnvVar) -> Self {
Self {
key: value.key,
value: value.value,
}
}
}
impl From<build_request::AdditionalFile> for crate::buildservice::AdditionalFile {
fn from(value: build_request::AdditionalFile) -> Self {
Self {
path: PathBuf::from(value.path),
contents: value.contents,
}
}
}
impl From<crate::buildservice::AdditionalFile> for build_request::AdditionalFile {
fn from(value: crate::buildservice::AdditionalFile) -> Self {
Self {
path: value
.path
.to_str()
.expect("Snix bug: expected a valid path")
.to_string(),
contents: value.contents,
}
}
}
impl TryFrom<build_request::BuildConstraints> for HashSet<crate::buildservice::BuildConstraints> {
type Error = ValidateBuildConstraintsError;
fn try_from(value: build_request::BuildConstraints) -> Result<Self, Self::Error> {
use crate::buildservice::BuildConstraints;
// validate system
if value.system.is_empty() {
Err(ValidateBuildConstraintsError::InvalidSystem)?;
}
let mut build_constraints = HashSet::from([
BuildConstraints::System(value.system),
BuildConstraints::MinMemory(value.min_memory),
]);
// validate available_ro_paths
for (i, p) in value.available_ro_paths.iter().enumerate() {
if !is_clean_absolute_path(p) {
Err(ValidateBuildConstraintsError::InvalidAvailableRoPaths(i))?
} else {
build_constraints.insert(BuildConstraints::AvailableReadOnlyPath(PathBuf::from(p)));
}
}
if !is_sorted(value.available_ro_paths.iter().map(|e| e.as_bytes())) {
Err(ValidateBuildConstraintsError::AvailableRoPathsNotSorted)?;
}
if value.network_access {
build_constraints.insert(BuildConstraints::NetworkAccess);
}
if value.provide_bin_sh {
build_constraints.insert(BuildConstraints::ProvideBinSh);
}
Ok(build_constraints)
}
}
#[cfg(test)]
// TODO: add testcases for constraints special cases. The default cases in the protos
// should result in the constraints not being added. For example min_memory 0 can be omitted.
// Also interesting testcases are "merging semantics". MimMemory(1) and MinMemory(100) will
// result in mim_memory 100, multiple AvailableReadOnlyPaths need to be merged. Contradicting
// system constraints need to fail somewhere (maybe an assertion, as only buggy code can construct it)
mod tests {
use super::{is_clean_path, is_clean_relative_path};
use rstest::rstest;
#[rstest]
#[case::fail_trailing_slash("foo/bar/", false)]
#[case::fail_dotdot("foo/../bar", false)]
#[case::fail_singledot("foo/./bar", false)]
#[case::fail_unnecessary_slashes("foo//bar", false)]
#[case::fail_absolute_unnecessary_slashes("//foo/bar", false)]
#[case::ok_empty("", true)]
#[case::ok_relative("foo/bar", true)]
#[case::ok_absolute("/", true)]
#[case::ok_absolute2("/foo/bar", true)]
fn test_is_clean_path(#[case] s: &str, #[case] expected: bool) {
assert_eq!(is_clean_path(s), expected);
}
#[rstest]
#[case::fail_absolute("/", false)]
#[case::ok_relative("foo/bar", true)]
fn test_is_clean_relative_path(#[case] s: &str, #[case] expected: bool) {
assert_eq!(is_clean_relative_path(s), expected);
}
// TODO: add tests for BuildRequest validation itself
}

22
snix/castore-go/LICENSE Normal file
View file

@ -0,0 +1,22 @@
Copyright © The Tvix Authors
Copyright © The Snix Project
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
“Software”), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

10
snix/castore-go/README.md Normal file
View file

@ -0,0 +1,10 @@
# castore-go
This directory contains generated golang bindings, both for the `snix-castore`
data models, as well as the gRPC bindings.
They are generated with `mg run //snix/castore-go:regenerate`.
These files end with `.pb.go`, and are ensured to be up to date by a CI check.
Additionally, code useful when interacting with these data structures
(ending just with `.go`) is provided.

212
snix/castore-go/castore.go Normal file
View file

@ -0,0 +1,212 @@
package castorev1
import (
"bytes"
"encoding/base64"
"fmt"
"google.golang.org/protobuf/proto"
"lukechampine.com/blake3"
)
// The size of a directory is calculated by summing up the numbers of
// `directories`, `files` and `symlinks`, and for each directory, its size
// field.
func (d *Directory) Size() uint64 {
var size uint64
size = uint64(len(d.Files) + len(d.Symlinks))
for _, d := range d.Directories {
size += 1 + d.Size
}
return size
}
func (d *Directory) Digest() ([]byte, error) {
b, err := proto.MarshalOptions{
Deterministic: true,
}.Marshal(d)
if err != nil {
return nil, fmt.Errorf("error while marshalling directory: %w", err)
}
h := blake3.New(32, nil)
_, err = h.Write(b)
if err != nil {
return nil, fmt.Errorf("error writing to hasher: %w", err)
}
return h.Sum(nil), nil
}
// isValidName checks a name for validity.
// We disallow slashes, null bytes, '.', '..' and the empty string.
// Depending on the context, a *Node message with an empty string as name is
// allowed, but they don't occur inside a Directory message.
func isValidName(n []byte) bool {
if len(n) == 0 || bytes.Equal(n, []byte("..")) || bytes.Equal(n, []byte{'.'}) || bytes.Contains(n, []byte{'\x00'}) || bytes.Contains(n, []byte{'/'}) {
return false
}
return true
}
// Validate ensures a DirectoryNode has a valid name and correct digest len.
func (n *DirectoryNode) Validate() error {
if len(n.Digest) != 32 {
return fmt.Errorf("invalid digest length for %s, expected %d, got %d", n.Name, 32, len(n.Digest))
}
if !isValidName(n.Name) {
return fmt.Errorf("invalid node name: %s", n.Name)
}
return nil
}
// Validate ensures a FileNode has a valid name and correct digest len.
func (n *FileNode) Validate() error {
if len(n.Digest) != 32 {
return fmt.Errorf("invalid digest length for %s, expected %d, got %d", n.Name, 32, len(n.Digest))
}
if !isValidName(n.Name) {
return fmt.Errorf("invalid node name: %s", n.Name)
}
return nil
}
// Validate ensures a SymlinkNode has a valid name and target.
func (n *SymlinkNode) Validate() error {
if len(n.Target) == 0 || bytes.Contains(n.Target, []byte{0}) {
return fmt.Errorf("invalid symlink target: %s", n.Target)
}
if !isValidName(n.Name) {
return fmt.Errorf("invalid node name: %s", n.Name)
}
return nil
}
// Validate ensures a node is valid, by dispatching to the per-type validation functions.
func (n *Node) Validate() error {
if node := n.GetDirectory(); node != nil {
if err := node.Validate(); err != nil {
return fmt.Errorf("SymlinkNode failed validation: %w", err)
}
} else if node := n.GetFile(); node != nil {
if err := node.Validate(); err != nil {
return fmt.Errorf("FileNode failed validation: %w", err)
}
} else if node := n.GetSymlink(); node != nil {
if err := node.Validate(); err != nil {
return fmt.Errorf("SymlinkNode failed validation: %w", err)
}
} else {
// this would only happen if we introduced a new type
return fmt.Errorf("no specific node found")
}
return nil
}
// Validate thecks the Directory message for invalid data, such as:
// - violations of name restrictions
// - invalid digest lengths
// - not properly sorted lists
// - duplicate names in the three lists
func (d *Directory) Validate() error {
// seenNames contains all seen names so far.
// We populate this to ensure node names are unique across all three lists.
seenNames := make(map[string]interface{})
// We also track the last seen name in each of the three lists,
// to ensure nodes are sorted by their names.
var lastDirectoryName, lastFileName, lastSymlinkName []byte
// helper function to only insert in sorted order.
// used with the three lists above.
// Note this consumes a *pointer to* a string, as it mutates it.
insertIfGt := func(lastName *[]byte, name []byte) error {
// update if it's greater than the previous name
if bytes.Compare(name, *lastName) == 1 {
*lastName = name
return nil
} else {
return fmt.Errorf("%v is not in sorted order", name)
}
}
// insertOnce inserts into seenNames if the key doesn't exist yet.
insertOnce := func(name []byte) error {
encoded := base64.StdEncoding.EncodeToString(name)
if _, found := seenNames[encoded]; found {
return fmt.Errorf("duplicate name: %v", string(name))
}
seenNames[encoded] = nil
return nil
}
// Loop over all Directories, Files and Symlinks individually,
// check them for validity, then check for sorting in the current list, and
// uniqueness across all three lists.
for _, directoryNode := range d.Directories {
directoryName := directoryNode.GetName()
if err := directoryNode.Validate(); err != nil {
return fmt.Errorf("DirectoryNode %s failed validation: %w", directoryName, err)
}
// ensure names are sorted
if err := insertIfGt(&lastDirectoryName, directoryName); err != nil {
return err
}
// add to seenNames
if err := insertOnce(directoryName); err != nil {
return err
}
}
for _, fileNode := range d.Files {
fileName := fileNode.GetName()
if err := fileNode.Validate(); err != nil {
return fmt.Errorf("FileNode %s failed validation: %w", fileName, err)
}
// ensure names are sorted
if err := insertIfGt(&lastFileName, fileName); err != nil {
return err
}
// add to seenNames
if err := insertOnce(fileName); err != nil {
return err
}
}
for _, symlinkNode := range d.Symlinks {
symlinkName := symlinkNode.GetName()
if err := symlinkNode.Validate(); err != nil {
return fmt.Errorf("SymlinkNode %s failed validation: %w", symlinkName, err)
}
// ensure names are sorted
if err := insertIfGt(&lastSymlinkName, symlinkName); err != nil {
return err
}
// add to seenNames
if err := insertOnce(symlinkName); err != nil {
return err
}
}
return nil
}

View file

@ -0,0 +1,509 @@
// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
// SPDX-License-Identifier: OSL-3.0 OR MIT OR Apache-2.0
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.5
// protoc (unknown)
// source: snix/castore/protos/castore.proto
package castorev1
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
// A Directory can contain Directory, File or Symlink nodes.
// Each of these nodes have a name attribute, which is the basename in that
// directory and node type specific attributes.
// The name attribute:
// - MUST not contain slashes or null bytes
// - MUST not be '.' or '..'
// - MUST be unique across all three lists
//
// Elements in each list need to be lexicographically ordered by the name
// attribute.
type Directory struct {
state protoimpl.MessageState `protogen:"open.v1"`
Directories []*DirectoryNode `protobuf:"bytes,1,rep,name=directories,proto3" json:"directories,omitempty"`
Files []*FileNode `protobuf:"bytes,2,rep,name=files,proto3" json:"files,omitempty"`
Symlinks []*SymlinkNode `protobuf:"bytes,3,rep,name=symlinks,proto3" json:"symlinks,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Directory) Reset() {
*x = Directory{}
mi := &file_snix_castore_protos_castore_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Directory) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Directory) ProtoMessage() {}
func (x *Directory) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_castore_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Directory.ProtoReflect.Descriptor instead.
func (*Directory) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_castore_proto_rawDescGZIP(), []int{0}
}
func (x *Directory) GetDirectories() []*DirectoryNode {
if x != nil {
return x.Directories
}
return nil
}
func (x *Directory) GetFiles() []*FileNode {
if x != nil {
return x.Files
}
return nil
}
func (x *Directory) GetSymlinks() []*SymlinkNode {
if x != nil {
return x.Symlinks
}
return nil
}
// A DirectoryNode represents a directory in a Directory.
type DirectoryNode struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The (base)name of the directory
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
// The blake3 hash of a Directory message, serialized in protobuf canonical form.
Digest []byte `protobuf:"bytes,2,opt,name=digest,proto3" json:"digest,omitempty"`
// Number of child elements in the Directory referred to by `digest`.
// Calculated by summing up the numbers of `directories`, `files` and
// `symlinks`, and for each directory, its size field. Used for inode number
// calculation.
// This field is precisely as verifiable as any other Merkle tree edge.
// Resolve `digest`, and you can compute it incrementally. Resolve the entire
// tree, and you can fully compute it from scratch.
// A credulous implementation won't reject an excessive size, but this is
// harmless: you'll have some ordinals without nodes. Undersizing is obvious
// and easy to reject: you won't have an ordinal for some nodes.
Size uint64 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DirectoryNode) Reset() {
*x = DirectoryNode{}
mi := &file_snix_castore_protos_castore_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DirectoryNode) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DirectoryNode) ProtoMessage() {}
func (x *DirectoryNode) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_castore_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DirectoryNode.ProtoReflect.Descriptor instead.
func (*DirectoryNode) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_castore_proto_rawDescGZIP(), []int{1}
}
func (x *DirectoryNode) GetName() []byte {
if x != nil {
return x.Name
}
return nil
}
func (x *DirectoryNode) GetDigest() []byte {
if x != nil {
return x.Digest
}
return nil
}
func (x *DirectoryNode) GetSize() uint64 {
if x != nil {
return x.Size
}
return 0
}
// A FileNode represents a regular or executable file in a Directory.
type FileNode struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The (base)name of the file
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
// The blake3 digest of the file contents
Digest []byte `protobuf:"bytes,2,opt,name=digest,proto3" json:"digest,omitempty"`
// The file content size
Size uint64 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"`
// Whether the file is executable
Executable bool `protobuf:"varint,4,opt,name=executable,proto3" json:"executable,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FileNode) Reset() {
*x = FileNode{}
mi := &file_snix_castore_protos_castore_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FileNode) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FileNode) ProtoMessage() {}
func (x *FileNode) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_castore_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FileNode.ProtoReflect.Descriptor instead.
func (*FileNode) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_castore_proto_rawDescGZIP(), []int{2}
}
func (x *FileNode) GetName() []byte {
if x != nil {
return x.Name
}
return nil
}
func (x *FileNode) GetDigest() []byte {
if x != nil {
return x.Digest
}
return nil
}
func (x *FileNode) GetSize() uint64 {
if x != nil {
return x.Size
}
return 0
}
func (x *FileNode) GetExecutable() bool {
if x != nil {
return x.Executable
}
return false
}
// A SymlinkNode represents a symbolic link in a Directory.
type SymlinkNode struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The (base)name of the symlink
Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
// The target of the symlink.
Target []byte `protobuf:"bytes,2,opt,name=target,proto3" json:"target,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SymlinkNode) Reset() {
*x = SymlinkNode{}
mi := &file_snix_castore_protos_castore_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SymlinkNode) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SymlinkNode) ProtoMessage() {}
func (x *SymlinkNode) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_castore_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SymlinkNode.ProtoReflect.Descriptor instead.
func (*SymlinkNode) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_castore_proto_rawDescGZIP(), []int{3}
}
func (x *SymlinkNode) GetName() []byte {
if x != nil {
return x.Name
}
return nil
}
func (x *SymlinkNode) GetTarget() []byte {
if x != nil {
return x.Target
}
return nil
}
// A Node is either a DirectoryNode, FileNode or SymlinkNode.
type Node struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Types that are valid to be assigned to Node:
//
// *Node_Directory
// *Node_File
// *Node_Symlink
Node isNode_Node `protobuf_oneof:"node"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Node) Reset() {
*x = Node{}
mi := &file_snix_castore_protos_castore_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Node) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Node) ProtoMessage() {}
func (x *Node) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_castore_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Node.ProtoReflect.Descriptor instead.
func (*Node) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_castore_proto_rawDescGZIP(), []int{4}
}
func (x *Node) GetNode() isNode_Node {
if x != nil {
return x.Node
}
return nil
}
func (x *Node) GetDirectory() *DirectoryNode {
if x != nil {
if x, ok := x.Node.(*Node_Directory); ok {
return x.Directory
}
}
return nil
}
func (x *Node) GetFile() *FileNode {
if x != nil {
if x, ok := x.Node.(*Node_File); ok {
return x.File
}
}
return nil
}
func (x *Node) GetSymlink() *SymlinkNode {
if x != nil {
if x, ok := x.Node.(*Node_Symlink); ok {
return x.Symlink
}
}
return nil
}
type isNode_Node interface {
isNode_Node()
}
type Node_Directory struct {
Directory *DirectoryNode `protobuf:"bytes,1,opt,name=directory,proto3,oneof"`
}
type Node_File struct {
File *FileNode `protobuf:"bytes,2,opt,name=file,proto3,oneof"`
}
type Node_Symlink struct {
Symlink *SymlinkNode `protobuf:"bytes,3,opt,name=symlink,proto3,oneof"`
}
func (*Node_Directory) isNode_Node() {}
func (*Node_File) isNode_Node() {}
func (*Node_Symlink) isNode_Node() {}
var File_snix_castore_protos_castore_proto protoreflect.FileDescriptor
var file_snix_castore_protos_castore_proto_rawDesc = string([]byte{
0x0a, 0x21, 0x73, 0x6e, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72,
0x65, 0x2e, 0x76, 0x31, 0x22, 0xb8, 0x01, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f,
0x72, 0x79, 0x12, 0x40, 0x0a, 0x0b, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x69, 0x65,
0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63,
0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74,
0x6f, 0x72, 0x79, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x0b, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f,
0x72, 0x69, 0x65, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x02, 0x20,
0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f,
0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x05,
0x66, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x38, 0x0a, 0x08, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b,
0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63,
0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e,
0x6b, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x08, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x73, 0x22,
0x4f, 0x0a, 0x0d, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x4e, 0x6f, 0x64, 0x65,
0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04,
0x6e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x02,
0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04,
0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65,
0x22, 0x6a, 0x0a, 0x08, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04,
0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65,
0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c,
0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65,
0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1e, 0x0a, 0x0a,
0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08,
0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x39, 0x0a, 0x0b,
0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e,
0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12,
0x16, 0x0a, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52,
0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x22, 0xb9, 0x01, 0x0a, 0x04, 0x4e, 0x6f, 0x64, 0x65,
0x12, 0x3e, 0x0a, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x18, 0x01, 0x20,
0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f,
0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x4e,
0x6f, 0x64, 0x65, 0x48, 0x00, 0x52, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79,
0x12, 0x2f, 0x0a, 0x04, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19,
0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31,
0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x48, 0x00, 0x52, 0x04, 0x66, 0x69, 0x6c,
0x65, 0x12, 0x38, 0x0a, 0x07, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01,
0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72,
0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x4e, 0x6f, 0x64, 0x65,
0x48, 0x00, 0x52, 0x07, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x42, 0x06, 0x0a, 0x04, 0x6e,
0x6f, 0x64, 0x65, 0x42, 0x22, 0x5a, 0x20, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x64, 0x65, 0x76, 0x2f,
0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x3b, 0x63, 0x61,
0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
})
var (
file_snix_castore_protos_castore_proto_rawDescOnce sync.Once
file_snix_castore_protos_castore_proto_rawDescData []byte
)
func file_snix_castore_protos_castore_proto_rawDescGZIP() []byte {
file_snix_castore_protos_castore_proto_rawDescOnce.Do(func() {
file_snix_castore_protos_castore_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_snix_castore_protos_castore_proto_rawDesc), len(file_snix_castore_protos_castore_proto_rawDesc)))
})
return file_snix_castore_protos_castore_proto_rawDescData
}
var file_snix_castore_protos_castore_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
var file_snix_castore_protos_castore_proto_goTypes = []any{
(*Directory)(nil), // 0: snix.castore.v1.Directory
(*DirectoryNode)(nil), // 1: snix.castore.v1.DirectoryNode
(*FileNode)(nil), // 2: snix.castore.v1.FileNode
(*SymlinkNode)(nil), // 3: snix.castore.v1.SymlinkNode
(*Node)(nil), // 4: snix.castore.v1.Node
}
var file_snix_castore_protos_castore_proto_depIdxs = []int32{
1, // 0: snix.castore.v1.Directory.directories:type_name -> snix.castore.v1.DirectoryNode
2, // 1: snix.castore.v1.Directory.files:type_name -> snix.castore.v1.FileNode
3, // 2: snix.castore.v1.Directory.symlinks:type_name -> snix.castore.v1.SymlinkNode
1, // 3: snix.castore.v1.Node.directory:type_name -> snix.castore.v1.DirectoryNode
2, // 4: snix.castore.v1.Node.file:type_name -> snix.castore.v1.FileNode
3, // 5: snix.castore.v1.Node.symlink:type_name -> snix.castore.v1.SymlinkNode
6, // [6:6] is the sub-list for method output_type
6, // [6:6] is the sub-list for method input_type
6, // [6:6] is the sub-list for extension type_name
6, // [6:6] is the sub-list for extension extendee
0, // [0:6] is the sub-list for field type_name
}
func init() { file_snix_castore_protos_castore_proto_init() }
func file_snix_castore_protos_castore_proto_init() {
if File_snix_castore_protos_castore_proto != nil {
return
}
file_snix_castore_protos_castore_proto_msgTypes[4].OneofWrappers = []any{
(*Node_Directory)(nil),
(*Node_File)(nil),
(*Node_Symlink)(nil),
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_snix_castore_protos_castore_proto_rawDesc), len(file_snix_castore_protos_castore_proto_rawDesc)),
NumEnums: 0,
NumMessages: 5,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_snix_castore_protos_castore_proto_goTypes,
DependencyIndexes: file_snix_castore_protos_castore_proto_depIdxs,
MessageInfos: file_snix_castore_protos_castore_proto_msgTypes,
}.Build()
File_snix_castore_protos_castore_proto = out.File
file_snix_castore_protos_castore_proto_goTypes = nil
file_snix_castore_protos_castore_proto_depIdxs = nil
}

View file

@ -0,0 +1,298 @@
package castorev1_test
import (
"testing"
"github.com/stretchr/testify/assert"
castorev1pb "snix.dev/castore/proto"
)
var (
dummyDigest = []byte{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
}
)
func TestDirectorySize(t *testing.T) {
t.Run("empty", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint64(0), d.Size())
})
t.Run("containing single empty directory", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte([]byte("foo")),
Digest: dummyDigest,
Size: 0,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint64(1), d.Size())
})
t.Run("containing single non-empty directory", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("foo"),
Digest: dummyDigest,
Size: 4,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint64(5), d.Size())
})
t.Run("containing single file", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{{
Name: []byte("foo"),
Digest: dummyDigest,
Size: 42,
Executable: false,
}},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.Equal(t, uint64(1), d.Size())
})
t.Run("containing single symlink", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{{
Name: []byte("foo"),
Target: []byte("bar"),
}},
}
assert.Equal(t, uint64(1), d.Size())
})
}
func TestDirectoryDigest(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
dgst, err := d.Digest()
assert.NoError(t, err, "calling Digest() on a directory shouldn't error")
assert.Equal(t, []byte{
0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc,
0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca,
0xe4, 0x1f, 0x32, 0x62,
}, dgst)
}
func TestDirectoryValidate(t *testing.T) {
t.Run("empty", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.NoError(t, d.Validate())
})
t.Run("invalid names", func(t *testing.T) {
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte{},
Digest: dummyDigest,
Size: 42,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.ErrorContains(t, d.Validate(), "invalid node name")
}
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("."),
Digest: dummyDigest,
Size: 42,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.ErrorContains(t, d.Validate(), "invalid node name")
}
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{{
Name: []byte(".."),
Digest: dummyDigest,
Size: 42,
Executable: false,
}},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.ErrorContains(t, d.Validate(), "invalid node name")
}
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{{
Name: []byte("\x00"),
Target: []byte("foo"),
}},
}
assert.ErrorContains(t, d.Validate(), "invalid node name")
}
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{{
Name: []byte("foo/bar"),
Target: []byte("foo"),
}},
}
assert.ErrorContains(t, d.Validate(), "invalid node name")
}
})
t.Run("invalid digest", func(t *testing.T) {
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("foo"),
Digest: nil,
Size: 42,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.ErrorContains(t, d.Validate(), "invalid digest length")
})
t.Run("invalid symlink targets", func(t *testing.T) {
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{{
Name: []byte("foo"),
Target: []byte{},
}},
}
assert.ErrorContains(t, d.Validate(), "invalid symlink target")
}
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{{
Name: []byte("foo"),
Target: []byte{0x66, 0x6f, 0x6f, 0},
}},
}
assert.ErrorContains(t, d.Validate(), "invalid symlink target")
}
})
t.Run("sorting", func(t *testing.T) {
// "b" comes before "a", bad.
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("b"),
Digest: dummyDigest,
Size: 42,
}, {
Name: []byte("a"),
Digest: dummyDigest,
Size: 42,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.ErrorContains(t, d.Validate(), "is not in sorted order")
}
// "a" exists twice, bad.
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("a"),
Digest: dummyDigest,
Size: 42,
}},
Files: []*castorev1pb.FileNode{{
Name: []byte("a"),
Digest: dummyDigest,
Size: 42,
Executable: false,
}},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.ErrorContains(t, d.Validate(), "duplicate name")
}
// "a" comes before "b", all good.
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("a"),
Digest: dummyDigest,
Size: 42,
}, {
Name: []byte("b"),
Digest: dummyDigest,
Size: 42,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{},
}
assert.NoError(t, d.Validate(), "shouldn't error")
}
// [b, c] and [a] are both properly sorted.
{
d := castorev1pb.Directory{
Directories: []*castorev1pb.DirectoryNode{{
Name: []byte("b"),
Digest: dummyDigest,
Size: 42,
}, {
Name: []byte("c"),
Digest: dummyDigest,
Size: 42,
}},
Files: []*castorev1pb.FileNode{},
Symlinks: []*castorev1pb.SymlinkNode{{
Name: []byte("a"),
Target: []byte("foo"),
}},
}
assert.NoError(t, d.Validate(), "shouldn't error")
}
})
}

View file

@ -0,0 +1,31 @@
{ depot, pkgs, ... }:
let
regenerate = pkgs.writeShellScript "regenerate" ''
(cd $(git rev-parse --show-toplevel)/snix/castore-go && rm *.pb.go && cp ${depot.snix.castore.protos.go-bindings}/*.pb.go . && chmod +w *.pb.go)
'';
in
(pkgs.buildGoModule {
name = "castore-go";
src = depot.third_party.gitignoreSource ./.;
vendorHash = "sha256:03wwzk7irlb05y0zjfmpp5c2dxhcpnmfc169g05sn6d3ni07aly8";
}).overrideAttrs (_: {
meta.ci.extraSteps = {
check = {
label = ":water_buffalo: ensure generated protobuf files match";
needsOutput = true;
command = pkgs.writeShellScript "pb-go-check" ''
${regenerate}
if [[ -n "$(git status --porcelain -unormal)" ]]; then
echo "-----------------------------"
echo ".pb.go files need to be updated, mg run //snix/castore-go/regenerate"
echo "-----------------------------"
git status -unormal
exit 1
fi
'';
alwaysRun = true;
};
};
passthru.regenerate = regenerate;
})

24
snix/castore-go/go.mod Normal file
View file

@ -0,0 +1,24 @@
module snix.dev/castore/proto
go 1.22
toolchain go1.23.4
require (
github.com/stretchr/testify v1.8.1
google.golang.org/grpc v1.69.2
google.golang.org/protobuf v1.36.1
lukechampine.com/blake3 v1.1.7
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.19.0 // indirect
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

111
snix/castore-go/go.sum Normal file
View file

@ -0,0 +1,111 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY=
go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE=
go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE=
go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY=
go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk=
go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0=
go.opentelemetry.io/otel/sdk/metric v1.31.0 h1:i9hxxLJF/9kkvfHppyLL55aW7iIJz4JjxTeYusH7zMc=
go.opentelemetry.io/otel/sdk/metric v1.31.0/go.mod h1:CRInTMVvNhUKgSAMbKyTMxqOBC0zgyxzW55lZzX43Y8=
go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys=
go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU=
google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
lukechampine.com/blake3 v1.1.7 h1:GgRMhmdsuK8+ii6UZFDL8Nb+VyMwadAgcJyfYHxG6n0=
lukechampine.com/blake3 v1.1.7/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA=

View file

@ -0,0 +1,38 @@
package castorev1
// RenamedNode returns a node with a new name.
func RenamedNode(node *Node, name string) *Node {
if directoryNode := node.GetDirectory(); directoryNode != nil {
return &Node{
Node: &Node_Directory{
Directory: &DirectoryNode{
Name: []byte(name),
Digest: directoryNode.GetDigest(),
Size: directoryNode.GetSize(),
},
},
}
} else if fileNode := node.GetFile(); fileNode != nil {
return &Node{
Node: &Node_File{
File: &FileNode{
Name: []byte(name),
Digest: fileNode.GetDigest(),
Size: fileNode.GetSize(),
Executable: fileNode.GetExecutable(),
},
},
}
} else if symlinkNode := node.GetSymlink(); symlinkNode != nil {
return &Node{
Node: &Node_Symlink{
Symlink: &SymlinkNode{
Name: []byte(name),
Target: symlinkNode.GetTarget(),
},
},
}
} else {
panic("unreachable")
}
}

View file

@ -0,0 +1,447 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.5
// protoc (unknown)
// source: snix/castore/protos/rpc_blobstore.proto
package castorev1
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type StatBlobRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The blake3 digest of the blob requested
Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"`
// Whether the server should reply with a list of more granular chunks.
SendChunks bool `protobuf:"varint,2,opt,name=send_chunks,json=sendChunks,proto3" json:"send_chunks,omitempty"`
// Whether the server should reply with a bao.
SendBao bool `protobuf:"varint,3,opt,name=send_bao,json=sendBao,proto3" json:"send_bao,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *StatBlobRequest) Reset() {
*x = StatBlobRequest{}
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *StatBlobRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*StatBlobRequest) ProtoMessage() {}
func (x *StatBlobRequest) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use StatBlobRequest.ProtoReflect.Descriptor instead.
func (*StatBlobRequest) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{0}
}
func (x *StatBlobRequest) GetDigest() []byte {
if x != nil {
return x.Digest
}
return nil
}
func (x *StatBlobRequest) GetSendChunks() bool {
if x != nil {
return x.SendChunks
}
return false
}
func (x *StatBlobRequest) GetSendBao() bool {
if x != nil {
return x.SendBao
}
return false
}
type StatBlobResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// If `send_chunks` was set to true, this MAY contain a list of more
// granular chunks, which then may be read individually via the `Read`
// method.
Chunks []*StatBlobResponse_ChunkMeta `protobuf:"bytes,2,rep,name=chunks,proto3" json:"chunks,omitempty"`
// If `send_bao` was set to true, this MAY contain a outboard bao.
// The exact format and message types here will still be fleshed out.
Bao []byte `protobuf:"bytes,3,opt,name=bao,proto3" json:"bao,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *StatBlobResponse) Reset() {
*x = StatBlobResponse{}
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *StatBlobResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*StatBlobResponse) ProtoMessage() {}
func (x *StatBlobResponse) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use StatBlobResponse.ProtoReflect.Descriptor instead.
func (*StatBlobResponse) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{1}
}
func (x *StatBlobResponse) GetChunks() []*StatBlobResponse_ChunkMeta {
if x != nil {
return x.Chunks
}
return nil
}
func (x *StatBlobResponse) GetBao() []byte {
if x != nil {
return x.Bao
}
return nil
}
type ReadBlobRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The blake3 digest of the blob or chunk requested
Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReadBlobRequest) Reset() {
*x = ReadBlobRequest{}
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReadBlobRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReadBlobRequest) ProtoMessage() {}
func (x *ReadBlobRequest) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReadBlobRequest.ProtoReflect.Descriptor instead.
func (*ReadBlobRequest) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{2}
}
func (x *ReadBlobRequest) GetDigest() []byte {
if x != nil {
return x.Digest
}
return nil
}
// This represents some bytes of a blob.
// Blobs are sent in smaller chunks to keep message sizes manageable.
type BlobChunk struct {
state protoimpl.MessageState `protogen:"open.v1"`
Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BlobChunk) Reset() {
*x = BlobChunk{}
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BlobChunk) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BlobChunk) ProtoMessage() {}
func (x *BlobChunk) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BlobChunk.ProtoReflect.Descriptor instead.
func (*BlobChunk) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{3}
}
func (x *BlobChunk) GetData() []byte {
if x != nil {
return x.Data
}
return nil
}
type PutBlobResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The blake3 digest of the data that was sent.
Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *PutBlobResponse) Reset() {
*x = PutBlobResponse{}
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *PutBlobResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PutBlobResponse) ProtoMessage() {}
func (x *PutBlobResponse) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PutBlobResponse.ProtoReflect.Descriptor instead.
func (*PutBlobResponse) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{4}
}
func (x *PutBlobResponse) GetDigest() []byte {
if x != nil {
return x.Digest
}
return nil
}
type StatBlobResponse_ChunkMeta struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Digest of that specific chunk
Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"`
// Length of that chunk, in bytes.
Size uint64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *StatBlobResponse_ChunkMeta) Reset() {
*x = StatBlobResponse_ChunkMeta{}
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *StatBlobResponse_ChunkMeta) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*StatBlobResponse_ChunkMeta) ProtoMessage() {}
func (x *StatBlobResponse_ChunkMeta) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_blobstore_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use StatBlobResponse_ChunkMeta.ProtoReflect.Descriptor instead.
func (*StatBlobResponse_ChunkMeta) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{1, 0}
}
func (x *StatBlobResponse_ChunkMeta) GetDigest() []byte {
if x != nil {
return x.Digest
}
return nil
}
func (x *StatBlobResponse_ChunkMeta) GetSize() uint64 {
if x != nil {
return x.Size
}
return 0
}
var File_snix_castore_protos_rpc_blobstore_proto protoreflect.FileDescriptor
var file_snix_castore_protos_rpc_blobstore_proto_rawDesc = string([]byte{
0x0a, 0x27, 0x73, 0x6e, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x72, 0x70, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x73, 0x74,
0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x73, 0x6e, 0x69, 0x78, 0x2e,
0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x22, 0x65, 0x0a, 0x0f, 0x53, 0x74,
0x61, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a,
0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64,
0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x65, 0x6e, 0x64, 0x5f, 0x63, 0x68,
0x75, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x73, 0x65, 0x6e, 0x64,
0x43, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x65, 0x6e, 0x64, 0x5f, 0x62,
0x61, 0x6f, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x65, 0x6e, 0x64, 0x42, 0x61,
0x6f, 0x22, 0xa2, 0x01, 0x0a, 0x10, 0x53, 0x74, 0x61, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65,
0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x43, 0x0a, 0x06, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73,
0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61,
0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x42, 0x6c, 0x6f,
0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x4d,
0x65, 0x74, 0x61, 0x52, 0x06, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x62,
0x61, 0x6f, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x62, 0x61, 0x6f, 0x1a, 0x37, 0x0a,
0x09, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x4d, 0x65, 0x74, 0x61, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69,
0x67, 0x65, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65,
0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04,
0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x22, 0x29, 0x0a, 0x0f, 0x52, 0x65, 0x61, 0x64, 0x42, 0x6c,
0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67,
0x65, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73,
0x74, 0x22, 0x1f, 0x0a, 0x09, 0x42, 0x6c, 0x6f, 0x62, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x12, 0x12,
0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61,
0x74, 0x61, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x75, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x73,
0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18,
0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x32, 0xe9, 0x01,
0x0a, 0x0b, 0x42, 0x6c, 0x6f, 0x62, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x4b, 0x0a,
0x04, 0x53, 0x74, 0x61, 0x74, 0x12, 0x20, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73,
0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x42, 0x6c, 0x6f, 0x62,
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63,
0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x42, 0x6c,
0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x46, 0x0a, 0x04, 0x52, 0x65,
0x61, 0x64, 0x12, 0x20, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72,
0x65, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x61, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71,
0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74,
0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x6c, 0x6f, 0x62, 0x43, 0x68, 0x75, 0x6e, 0x6b,
0x30, 0x01, 0x12, 0x45, 0x0a, 0x03, 0x50, 0x75, 0x74, 0x12, 0x1a, 0x2e, 0x73, 0x6e, 0x69, 0x78,
0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x6c, 0x6f, 0x62,
0x43, 0x68, 0x75, 0x6e, 0x6b, 0x1a, 0x20, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73,
0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x75, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52,
0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x22, 0x5a, 0x20, 0x73, 0x6e, 0x69,
0x78, 0x2e, 0x64, 0x65, 0x76, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x33,
})
var (
file_snix_castore_protos_rpc_blobstore_proto_rawDescOnce sync.Once
file_snix_castore_protos_rpc_blobstore_proto_rawDescData []byte
)
func file_snix_castore_protos_rpc_blobstore_proto_rawDescGZIP() []byte {
file_snix_castore_protos_rpc_blobstore_proto_rawDescOnce.Do(func() {
file_snix_castore_protos_rpc_blobstore_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_snix_castore_protos_rpc_blobstore_proto_rawDesc), len(file_snix_castore_protos_rpc_blobstore_proto_rawDesc)))
})
return file_snix_castore_protos_rpc_blobstore_proto_rawDescData
}
var file_snix_castore_protos_rpc_blobstore_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
var file_snix_castore_protos_rpc_blobstore_proto_goTypes = []any{
(*StatBlobRequest)(nil), // 0: snix.castore.v1.StatBlobRequest
(*StatBlobResponse)(nil), // 1: snix.castore.v1.StatBlobResponse
(*ReadBlobRequest)(nil), // 2: snix.castore.v1.ReadBlobRequest
(*BlobChunk)(nil), // 3: snix.castore.v1.BlobChunk
(*PutBlobResponse)(nil), // 4: snix.castore.v1.PutBlobResponse
(*StatBlobResponse_ChunkMeta)(nil), // 5: snix.castore.v1.StatBlobResponse.ChunkMeta
}
var file_snix_castore_protos_rpc_blobstore_proto_depIdxs = []int32{
5, // 0: snix.castore.v1.StatBlobResponse.chunks:type_name -> snix.castore.v1.StatBlobResponse.ChunkMeta
0, // 1: snix.castore.v1.BlobService.Stat:input_type -> snix.castore.v1.StatBlobRequest
2, // 2: snix.castore.v1.BlobService.Read:input_type -> snix.castore.v1.ReadBlobRequest
3, // 3: snix.castore.v1.BlobService.Put:input_type -> snix.castore.v1.BlobChunk
1, // 4: snix.castore.v1.BlobService.Stat:output_type -> snix.castore.v1.StatBlobResponse
3, // 5: snix.castore.v1.BlobService.Read:output_type -> snix.castore.v1.BlobChunk
4, // 6: snix.castore.v1.BlobService.Put:output_type -> snix.castore.v1.PutBlobResponse
4, // [4:7] is the sub-list for method output_type
1, // [1:4] is the sub-list for method input_type
1, // [1:1] is the sub-list for extension type_name
1, // [1:1] is the sub-list for extension extendee
0, // [0:1] is the sub-list for field type_name
}
func init() { file_snix_castore_protos_rpc_blobstore_proto_init() }
func file_snix_castore_protos_rpc_blobstore_proto_init() {
if File_snix_castore_protos_rpc_blobstore_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_snix_castore_protos_rpc_blobstore_proto_rawDesc), len(file_snix_castore_protos_rpc_blobstore_proto_rawDesc)),
NumEnums: 0,
NumMessages: 6,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_snix_castore_protos_rpc_blobstore_proto_goTypes,
DependencyIndexes: file_snix_castore_protos_rpc_blobstore_proto_depIdxs,
MessageInfos: file_snix_castore_protos_rpc_blobstore_proto_msgTypes,
}.Build()
File_snix_castore_protos_rpc_blobstore_proto = out.File
file_snix_castore_protos_rpc_blobstore_proto_goTypes = nil
file_snix_castore_protos_rpc_blobstore_proto_depIdxs = nil
}

View file

@ -0,0 +1,248 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.5.1
// - protoc (unknown)
// source: snix/castore/protos/rpc_blobstore.proto
package castorev1
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
BlobService_Stat_FullMethodName = "/snix.castore.v1.BlobService/Stat"
BlobService_Read_FullMethodName = "/snix.castore.v1.BlobService/Read"
BlobService_Put_FullMethodName = "/snix.castore.v1.BlobService/Put"
)
// BlobServiceClient is the client API for BlobService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
//
// BlobService allows reading (or uploading) content-addressed blobs of data.
// BLAKE3 is used as a hashing function for the data. Uploading a blob will
// return the BLAKE3 digest of it, and that's the identifier used to Read/Stat
// them too.
type BlobServiceClient interface {
// Stat can be used to check for the existence of a blob, as well as
// gathering more data about it, like more granular chunking information
// or baos.
// Server implementations are not required to provide more granular chunking
// information, especially if the digest specified in `StatBlobRequest` is
// already a chunk of a blob.
Stat(ctx context.Context, in *StatBlobRequest, opts ...grpc.CallOption) (*StatBlobResponse, error)
// Read allows reading (all) data of a blob/chunk by the BLAKE3 digest of
// its contents.
// If the backend communicated more granular chunks in the `Stat` request,
// this can also be used to read chunks.
// This request returns a stream of BlobChunk, which is just a container for
// a stream of bytes.
// The server may decide on whatever chunking it may seem fit as a size for
// the individual BlobChunk sent in the response stream, this is mostly to
// keep individual messages at a manageable size.
Read(ctx context.Context, in *ReadBlobRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[BlobChunk], error)
// Put uploads a Blob, by reading a stream of bytes.
//
// The way the data is chunked up in individual BlobChunk messages sent in
// the stream has no effect on how the server ends up chunking blobs up, if
// it does at all.
Put(ctx context.Context, opts ...grpc.CallOption) (grpc.ClientStreamingClient[BlobChunk, PutBlobResponse], error)
}
type blobServiceClient struct {
cc grpc.ClientConnInterface
}
func NewBlobServiceClient(cc grpc.ClientConnInterface) BlobServiceClient {
return &blobServiceClient{cc}
}
func (c *blobServiceClient) Stat(ctx context.Context, in *StatBlobRequest, opts ...grpc.CallOption) (*StatBlobResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(StatBlobResponse)
err := c.cc.Invoke(ctx, BlobService_Stat_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *blobServiceClient) Read(ctx context.Context, in *ReadBlobRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[BlobChunk], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &BlobService_ServiceDesc.Streams[0], BlobService_Read_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[ReadBlobRequest, BlobChunk]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type BlobService_ReadClient = grpc.ServerStreamingClient[BlobChunk]
func (c *blobServiceClient) Put(ctx context.Context, opts ...grpc.CallOption) (grpc.ClientStreamingClient[BlobChunk, PutBlobResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &BlobService_ServiceDesc.Streams[1], BlobService_Put_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[BlobChunk, PutBlobResponse]{ClientStream: stream}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type BlobService_PutClient = grpc.ClientStreamingClient[BlobChunk, PutBlobResponse]
// BlobServiceServer is the server API for BlobService service.
// All implementations must embed UnimplementedBlobServiceServer
// for forward compatibility.
//
// BlobService allows reading (or uploading) content-addressed blobs of data.
// BLAKE3 is used as a hashing function for the data. Uploading a blob will
// return the BLAKE3 digest of it, and that's the identifier used to Read/Stat
// them too.
type BlobServiceServer interface {
// Stat can be used to check for the existence of a blob, as well as
// gathering more data about it, like more granular chunking information
// or baos.
// Server implementations are not required to provide more granular chunking
// information, especially if the digest specified in `StatBlobRequest` is
// already a chunk of a blob.
Stat(context.Context, *StatBlobRequest) (*StatBlobResponse, error)
// Read allows reading (all) data of a blob/chunk by the BLAKE3 digest of
// its contents.
// If the backend communicated more granular chunks in the `Stat` request,
// this can also be used to read chunks.
// This request returns a stream of BlobChunk, which is just a container for
// a stream of bytes.
// The server may decide on whatever chunking it may seem fit as a size for
// the individual BlobChunk sent in the response stream, this is mostly to
// keep individual messages at a manageable size.
Read(*ReadBlobRequest, grpc.ServerStreamingServer[BlobChunk]) error
// Put uploads a Blob, by reading a stream of bytes.
//
// The way the data is chunked up in individual BlobChunk messages sent in
// the stream has no effect on how the server ends up chunking blobs up, if
// it does at all.
Put(grpc.ClientStreamingServer[BlobChunk, PutBlobResponse]) error
mustEmbedUnimplementedBlobServiceServer()
}
// UnimplementedBlobServiceServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedBlobServiceServer struct{}
func (UnimplementedBlobServiceServer) Stat(context.Context, *StatBlobRequest) (*StatBlobResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Stat not implemented")
}
func (UnimplementedBlobServiceServer) Read(*ReadBlobRequest, grpc.ServerStreamingServer[BlobChunk]) error {
return status.Errorf(codes.Unimplemented, "method Read not implemented")
}
func (UnimplementedBlobServiceServer) Put(grpc.ClientStreamingServer[BlobChunk, PutBlobResponse]) error {
return status.Errorf(codes.Unimplemented, "method Put not implemented")
}
func (UnimplementedBlobServiceServer) mustEmbedUnimplementedBlobServiceServer() {}
func (UnimplementedBlobServiceServer) testEmbeddedByValue() {}
// UnsafeBlobServiceServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to BlobServiceServer will
// result in compilation errors.
type UnsafeBlobServiceServer interface {
mustEmbedUnimplementedBlobServiceServer()
}
func RegisterBlobServiceServer(s grpc.ServiceRegistrar, srv BlobServiceServer) {
// If the following call pancis, it indicates UnimplementedBlobServiceServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&BlobService_ServiceDesc, srv)
}
func _BlobService_Stat_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(StatBlobRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BlobServiceServer).Stat(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: BlobService_Stat_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BlobServiceServer).Stat(ctx, req.(*StatBlobRequest))
}
return interceptor(ctx, in, info, handler)
}
func _BlobService_Read_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(ReadBlobRequest)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(BlobServiceServer).Read(m, &grpc.GenericServerStream[ReadBlobRequest, BlobChunk]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type BlobService_ReadServer = grpc.ServerStreamingServer[BlobChunk]
func _BlobService_Put_Handler(srv interface{}, stream grpc.ServerStream) error {
return srv.(BlobServiceServer).Put(&grpc.GenericServerStream[BlobChunk, PutBlobResponse]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type BlobService_PutServer = grpc.ClientStreamingServer[BlobChunk, PutBlobResponse]
// BlobService_ServiceDesc is the grpc.ServiceDesc for BlobService service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var BlobService_ServiceDesc = grpc.ServiceDesc{
ServiceName: "snix.castore.v1.BlobService",
HandlerType: (*BlobServiceServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Stat",
Handler: _BlobService_Stat_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "Read",
Handler: _BlobService_Read_Handler,
ServerStreams: true,
},
{
StreamName: "Put",
Handler: _BlobService_Put_Handler,
ClientStreams: true,
},
},
Metadata: "snix/castore/protos/rpc_blobstore.proto",
}

View file

@ -0,0 +1,243 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.5
// protoc (unknown)
// source: snix/castore/protos/rpc_directory.proto
package castorev1
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type GetDirectoryRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Types that are valid to be assigned to ByWhat:
//
// *GetDirectoryRequest_Digest
ByWhat isGetDirectoryRequest_ByWhat `protobuf_oneof:"by_what"`
// If set to true, recursively resolve all child Directory messages.
// Directory messages SHOULD be streamed in a recursive breadth-first walk,
// but other orders are also fine, as long as Directory messages are only
// sent after they are referred to from previously sent Directory messages.
Recursive bool `protobuf:"varint,2,opt,name=recursive,proto3" json:"recursive,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *GetDirectoryRequest) Reset() {
*x = GetDirectoryRequest{}
mi := &file_snix_castore_protos_rpc_directory_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *GetDirectoryRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*GetDirectoryRequest) ProtoMessage() {}
func (x *GetDirectoryRequest) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_directory_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use GetDirectoryRequest.ProtoReflect.Descriptor instead.
func (*GetDirectoryRequest) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_directory_proto_rawDescGZIP(), []int{0}
}
func (x *GetDirectoryRequest) GetByWhat() isGetDirectoryRequest_ByWhat {
if x != nil {
return x.ByWhat
}
return nil
}
func (x *GetDirectoryRequest) GetDigest() []byte {
if x != nil {
if x, ok := x.ByWhat.(*GetDirectoryRequest_Digest); ok {
return x.Digest
}
}
return nil
}
func (x *GetDirectoryRequest) GetRecursive() bool {
if x != nil {
return x.Recursive
}
return false
}
type isGetDirectoryRequest_ByWhat interface {
isGetDirectoryRequest_ByWhat()
}
type GetDirectoryRequest_Digest struct {
// The blake3 hash of the (root) Directory message, serialized in
// protobuf canonical form.
// Keep in mind this can be a subtree of another root.
Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3,oneof"`
}
func (*GetDirectoryRequest_Digest) isGetDirectoryRequest_ByWhat() {}
type PutDirectoryResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
RootDigest []byte `protobuf:"bytes,1,opt,name=root_digest,json=rootDigest,proto3" json:"root_digest,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *PutDirectoryResponse) Reset() {
*x = PutDirectoryResponse{}
mi := &file_snix_castore_protos_rpc_directory_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *PutDirectoryResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PutDirectoryResponse) ProtoMessage() {}
func (x *PutDirectoryResponse) ProtoReflect() protoreflect.Message {
mi := &file_snix_castore_protos_rpc_directory_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PutDirectoryResponse.ProtoReflect.Descriptor instead.
func (*PutDirectoryResponse) Descriptor() ([]byte, []int) {
return file_snix_castore_protos_rpc_directory_proto_rawDescGZIP(), []int{1}
}
func (x *PutDirectoryResponse) GetRootDigest() []byte {
if x != nil {
return x.RootDigest
}
return nil
}
var File_snix_castore_protos_rpc_directory_proto protoreflect.FileDescriptor
var file_snix_castore_protos_rpc_directory_proto_rawDesc = string([]byte{
0x0a, 0x27, 0x73, 0x6e, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70,
0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x72, 0x70, 0x63, 0x5f, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74,
0x6f, 0x72, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x73, 0x6e, 0x69, 0x78, 0x2e,
0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x1a, 0x21, 0x73, 0x6e, 0x69, 0x78,
0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f,
0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x58, 0x0a,
0x13, 0x47, 0x65, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x71,
0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01,
0x20, 0x01, 0x28, 0x0c, 0x48, 0x00, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x1c,
0x0a, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28,
0x08, 0x52, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x42, 0x09, 0x0a, 0x07,
0x62, 0x79, 0x5f, 0x77, 0x68, 0x61, 0x74, 0x22, 0x37, 0x0a, 0x14, 0x50, 0x75, 0x74, 0x44, 0x69,
0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12,
0x1f, 0x0a, 0x0b, 0x72, 0x6f, 0x6f, 0x74, 0x5f, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01,
0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x72, 0x6f, 0x6f, 0x74, 0x44, 0x69, 0x67, 0x65, 0x73, 0x74,
0x32, 0xa9, 0x01, 0x0a, 0x10, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x53, 0x65,
0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x49, 0x0a, 0x03, 0x47, 0x65, 0x74, 0x12, 0x24, 0x2e, 0x73,
0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x47,
0x65, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65,
0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72,
0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x30, 0x01,
0x12, 0x4a, 0x0a, 0x03, 0x50, 0x75, 0x74, 0x12, 0x1a, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63,
0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74,
0x6f, 0x72, 0x79, 0x1a, 0x25, 0x2e, 0x73, 0x6e, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f,
0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x75, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f,
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x22, 0x5a, 0x20,
0x73, 0x6e, 0x69, 0x78, 0x2e, 0x64, 0x65, 0x76, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65,
0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31,
0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
})
var (
file_snix_castore_protos_rpc_directory_proto_rawDescOnce sync.Once
file_snix_castore_protos_rpc_directory_proto_rawDescData []byte
)
func file_snix_castore_protos_rpc_directory_proto_rawDescGZIP() []byte {
file_snix_castore_protos_rpc_directory_proto_rawDescOnce.Do(func() {
file_snix_castore_protos_rpc_directory_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_snix_castore_protos_rpc_directory_proto_rawDesc), len(file_snix_castore_protos_rpc_directory_proto_rawDesc)))
})
return file_snix_castore_protos_rpc_directory_proto_rawDescData
}
var file_snix_castore_protos_rpc_directory_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
var file_snix_castore_protos_rpc_directory_proto_goTypes = []any{
(*GetDirectoryRequest)(nil), // 0: snix.castore.v1.GetDirectoryRequest
(*PutDirectoryResponse)(nil), // 1: snix.castore.v1.PutDirectoryResponse
(*Directory)(nil), // 2: snix.castore.v1.Directory
}
var file_snix_castore_protos_rpc_directory_proto_depIdxs = []int32{
0, // 0: snix.castore.v1.DirectoryService.Get:input_type -> snix.castore.v1.GetDirectoryRequest
2, // 1: snix.castore.v1.DirectoryService.Put:input_type -> snix.castore.v1.Directory
2, // 2: snix.castore.v1.DirectoryService.Get:output_type -> snix.castore.v1.Directory
1, // 3: snix.castore.v1.DirectoryService.Put:output_type -> snix.castore.v1.PutDirectoryResponse
2, // [2:4] is the sub-list for method output_type
0, // [0:2] is the sub-list for method input_type
0, // [0:0] is the sub-list for extension type_name
0, // [0:0] is the sub-list for extension extendee
0, // [0:0] is the sub-list for field type_name
}
func init() { file_snix_castore_protos_rpc_directory_proto_init() }
func file_snix_castore_protos_rpc_directory_proto_init() {
if File_snix_castore_protos_rpc_directory_proto != nil {
return
}
file_snix_castore_protos_castore_proto_init()
file_snix_castore_protos_rpc_directory_proto_msgTypes[0].OneofWrappers = []any{
(*GetDirectoryRequest_Digest)(nil),
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_snix_castore_protos_rpc_directory_proto_rawDesc), len(file_snix_castore_protos_rpc_directory_proto_rawDesc)),
NumEnums: 0,
NumMessages: 2,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_snix_castore_protos_rpc_directory_proto_goTypes,
DependencyIndexes: file_snix_castore_protos_rpc_directory_proto_depIdxs,
MessageInfos: file_snix_castore_protos_rpc_directory_proto_msgTypes,
}.Build()
File_snix_castore_protos_rpc_directory_proto = out.File
file_snix_castore_protos_rpc_directory_proto_goTypes = nil
file_snix_castore_protos_rpc_directory_proto_depIdxs = nil
}

View file

@ -0,0 +1,197 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.5.1
// - protoc (unknown)
// source: snix/castore/protos/rpc_directory.proto
package castorev1
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
DirectoryService_Get_FullMethodName = "/snix.castore.v1.DirectoryService/Get"
DirectoryService_Put_FullMethodName = "/snix.castore.v1.DirectoryService/Put"
)
// DirectoryServiceClient is the client API for DirectoryService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type DirectoryServiceClient interface {
// Get retrieves a stream of Directory messages, by using the lookup
// parameters in GetDirectoryRequest.
// Keep in mind multiple DirectoryNodes in different parts of the graph might
// have the same digest if they have the same underlying contents,
// so sending subsequent ones can be omitted.
//
// It is okay for certain implementations to only allow retrieval of
// Directory digests that are at the "root", aka the last element that's
// sent in a Put. This makes sense for implementations bundling closures of
// directories together in batches.
Get(ctx context.Context, in *GetDirectoryRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[Directory], error)
// Put uploads a graph of Directory messages.
// Individual Directory messages need to be send in an order walking up
// from the leaves to the root - a Directory message can only refer to
// Directory messages previously sent in the same stream.
// Keep in mind multiple DirectoryNodes in different parts of the graph might
// have the same digest if they have the same underlying contents,
// so sending subsequent ones can be omitted.
// We might add a separate method, allowing to send partial graphs at a later
// time, if requiring to send the full graph turns out to be a problem.
Put(ctx context.Context, opts ...grpc.CallOption) (grpc.ClientStreamingClient[Directory, PutDirectoryResponse], error)
}
type directoryServiceClient struct {
cc grpc.ClientConnInterface
}
func NewDirectoryServiceClient(cc grpc.ClientConnInterface) DirectoryServiceClient {
return &directoryServiceClient{cc}
}
func (c *directoryServiceClient) Get(ctx context.Context, in *GetDirectoryRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[Directory], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &DirectoryService_ServiceDesc.Streams[0], DirectoryService_Get_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[GetDirectoryRequest, Directory]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type DirectoryService_GetClient = grpc.ServerStreamingClient[Directory]
func (c *directoryServiceClient) Put(ctx context.Context, opts ...grpc.CallOption) (grpc.ClientStreamingClient[Directory, PutDirectoryResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &DirectoryService_ServiceDesc.Streams[1], DirectoryService_Put_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[Directory, PutDirectoryResponse]{ClientStream: stream}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type DirectoryService_PutClient = grpc.ClientStreamingClient[Directory, PutDirectoryResponse]
// DirectoryServiceServer is the server API for DirectoryService service.
// All implementations must embed UnimplementedDirectoryServiceServer
// for forward compatibility.
type DirectoryServiceServer interface {
// Get retrieves a stream of Directory messages, by using the lookup
// parameters in GetDirectoryRequest.
// Keep in mind multiple DirectoryNodes in different parts of the graph might
// have the same digest if they have the same underlying contents,
// so sending subsequent ones can be omitted.
//
// It is okay for certain implementations to only allow retrieval of
// Directory digests that are at the "root", aka the last element that's
// sent in a Put. This makes sense for implementations bundling closures of
// directories together in batches.
Get(*GetDirectoryRequest, grpc.ServerStreamingServer[Directory]) error
// Put uploads a graph of Directory messages.
// Individual Directory messages need to be send in an order walking up
// from the leaves to the root - a Directory message can only refer to
// Directory messages previously sent in the same stream.
// Keep in mind multiple DirectoryNodes in different parts of the graph might
// have the same digest if they have the same underlying contents,
// so sending subsequent ones can be omitted.
// We might add a separate method, allowing to send partial graphs at a later
// time, if requiring to send the full graph turns out to be a problem.
Put(grpc.ClientStreamingServer[Directory, PutDirectoryResponse]) error
mustEmbedUnimplementedDirectoryServiceServer()
}
// UnimplementedDirectoryServiceServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedDirectoryServiceServer struct{}
func (UnimplementedDirectoryServiceServer) Get(*GetDirectoryRequest, grpc.ServerStreamingServer[Directory]) error {
return status.Errorf(codes.Unimplemented, "method Get not implemented")
}
func (UnimplementedDirectoryServiceServer) Put(grpc.ClientStreamingServer[Directory, PutDirectoryResponse]) error {
return status.Errorf(codes.Unimplemented, "method Put not implemented")
}
func (UnimplementedDirectoryServiceServer) mustEmbedUnimplementedDirectoryServiceServer() {}
func (UnimplementedDirectoryServiceServer) testEmbeddedByValue() {}
// UnsafeDirectoryServiceServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to DirectoryServiceServer will
// result in compilation errors.
type UnsafeDirectoryServiceServer interface {
mustEmbedUnimplementedDirectoryServiceServer()
}
func RegisterDirectoryServiceServer(s grpc.ServiceRegistrar, srv DirectoryServiceServer) {
// If the following call pancis, it indicates UnimplementedDirectoryServiceServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&DirectoryService_ServiceDesc, srv)
}
func _DirectoryService_Get_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(GetDirectoryRequest)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(DirectoryServiceServer).Get(m, &grpc.GenericServerStream[GetDirectoryRequest, Directory]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type DirectoryService_GetServer = grpc.ServerStreamingServer[Directory]
func _DirectoryService_Put_Handler(srv interface{}, stream grpc.ServerStream) error {
return srv.(DirectoryServiceServer).Put(&grpc.GenericServerStream[Directory, PutDirectoryResponse]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type DirectoryService_PutServer = grpc.ClientStreamingServer[Directory, PutDirectoryResponse]
// DirectoryService_ServiceDesc is the grpc.ServiceDesc for DirectoryService service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var DirectoryService_ServiceDesc = grpc.ServiceDesc{
ServiceName: "snix.castore.v1.DirectoryService",
HandlerType: (*DirectoryServiceServer)(nil),
Methods: []grpc.MethodDesc{},
Streams: []grpc.StreamDesc{
{
StreamName: "Get",
Handler: _DirectoryService_Get_Handler,
ServerStreams: true,
},
{
StreamName: "Put",
Handler: _DirectoryService_Put_Handler,
ClientStreams: true,
},
},
Metadata: "snix/castore/protos/rpc_directory.proto",
}

106
snix/castore/Cargo.toml Normal file
View file

@ -0,0 +1,106 @@
[package]
name = "snix-castore"
version = "0.1.0"
edition = "2021"
[dependencies]
async-compression = { workspace = true, features = ["tokio", "zstd"] }
async-stream.workspace = true
async-tempfile.workspace = true
blake3 = { workspace = true, features = ["rayon", "std", "traits-preview"] }
bstr.workspace = true
bytes.workspace = true
clap = { workspace = true, features = ["derive", "env"] }
data-encoding.workspace = true
digest.workspace = true
fastcdc = { workspace = true, features = ["tokio"] }
futures.workspace = true
object_store = { workspace = true, features = ["http"] }
parking_lot.workspace = true
pin-project-lite.workspace = true
prost.workspace = true
thiserror.workspace = true
tokio-stream = { workspace = true, features = ["fs", "net"] }
tokio-util = { workspace = true, features = ["io", "io-util", "codec"] }
tokio-tar.workspace = true
tokio = { workspace = true, features = ["fs", "macros", "net", "rt", "rt-multi-thread", "signal"] }
toml = { version = "0.8.19", optional = true }
tonic.workspace = true
tower.workspace = true
tracing.workspace = true
tracing-indicatif.workspace = true
snix-tracing = { path = "../tracing", features = ["tonic"] }
url.workspace = true
walkdir.workspace = true
zstd.workspace = true
serde = { workspace = true, features = ["derive"] }
serde_with.workspace = true
serde_qs.workspace = true
petgraph.workspace = true
pin-project.workspace = true
erased-serde.workspace = true
serde_tagged.workspace = true
hyper-util.workspace = true
redb = { workspace = true, features = ["logging"] }
bigtable_rs = { workspace = true, optional = true }
fuse-backend-rs = { workspace = true, optional = true }
libc = { workspace = true, optional = true }
threadpool = { workspace = true, optional = true }
tonic-reflection = { workspace = true, optional = true }
vhost = { workspace = true, optional = true }
vhost-user-backend = { workspace = true, optional = true }
virtio-queue = { workspace = true, optional = true }
vm-memory = { workspace = true, optional = true }
vmm-sys-util = { workspace = true, optional = true }
virtio-bindings = { workspace = true, optional = true }
wu-manber.workspace = true
auto_impl = "1.2.0"
[build-dependencies]
prost-build.workspace = true
tonic-build.workspace = true
[dev-dependencies]
async-process.workspace = true
rstest.workspace = true
tempfile.workspace = true
tokio-retry.workspace = true
hex-literal.workspace = true
rstest_reuse.workspace = true
xattr.workspace = true
serde_json.workspace = true
tokio-test.workspace = true
[features]
default = ["cloud"]
cloud = [
"dep:bigtable_rs",
"object_store/aws",
"object_store/azure",
"object_store/gcp",
]
fs = ["dep:fuse-backend-rs", "dep:threadpool", "dep:libc"]
virtiofs = [
"fs",
"dep:vhost",
"dep:vhost-user-backend",
"dep:virtio-queue",
"dep:vm-memory",
"dep:vmm-sys-util",
"dep:virtio-bindings",
"fuse-backend-rs?/vhost-user-fs", # impl FsCacheReqHandler for SlaveFsCacheReq
"fuse-backend-rs?/virtiofs",
]
fuse = ["fs"]
tonic-reflection = ["dep:tonic-reflection"]
xp-composition-cli = ["toml", "xp-composition-url-refs"]
# This feature enables anonymous url syntax which might inherently expose
# arbitrary composition possibilities to the user.
xp-composition-url-refs = []
# Whether to run the integration tests.
# Requires the following packages in $PATH:
# cbtemulator, google-cloud-bigtable-tool
integration = []
[lints]
workspace = true

35
snix/castore/build.rs Normal file
View file

@ -0,0 +1,35 @@
use std::io::Result;
fn main() -> Result<()> {
#[allow(unused_mut)]
let mut builder = tonic_build::configure();
#[cfg(feature = "tonic-reflection")]
{
let out_dir = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap());
let descriptor_path = out_dir.join("snix.castore.v1.bin");
builder = builder.file_descriptor_set_path(descriptor_path);
};
builder
.build_server(true)
.build_client(true)
.emit_rerun_if_changed(false)
.bytes(["."])
.type_attribute(".", "#[derive(Eq, Hash)]")
.compile_protos(
&[
"snix/castore/protos/castore.proto",
"snix/castore/protos/rpc_blobstore.proto",
"snix/castore/protos/rpc_directory.proto",
],
// If we are in running `cargo build` manually, using `../..` works fine,
// but in case we run inside a nix build, we need to instead point PROTO_ROOT
// to a custom tree containing that structure.
&[match std::env::var_os("PROTO_ROOT") {
Some(proto_root) => proto_root.to_str().unwrap().to_owned(),
None => "../..".to_string(),
}],
)
}

28
snix/castore/default.nix Normal file
View file

@ -0,0 +1,28 @@
{ depot, pkgs, lib, ... }:
(depot.snix.crates.workspaceMembers.snix-castore.build.override {
runTests = true;
testPreRun = ''
export SSL_CERT_FILE=/dev/null
'';
}).overrideAttrs (old: rec {
meta.ci.targets = [ "integration-tests" ] ++ lib.filter (x: lib.hasPrefix "with-features" x || x == "no-features") (lib.attrNames passthru);
passthru = (depot.snix.utils.mkFeaturePowerset {
inherit (old) crateName;
features = ([ "cloud" "fuse" "tonic-reflection" "xp-composition-url-refs" ]
# virtiofs feature currently fails to build on Darwin
++ lib.optional pkgs.stdenv.isLinux "virtiofs");
override.testPreRun = ''
export SSL_CERT_FILE=/dev/null
'';
}) // {
integration-tests = depot.snix.crates.workspaceMembers.${old.crateName}.build.override (old: {
runTests = true;
testPreRun = ''
export SSL_CERT_FILE=/dev/null
export PATH="$PATH:${pkgs.lib.makeBinPath [ pkgs.cbtemulator pkgs.google-cloud-bigtable-tool ]}"
'';
features = old.features ++ [ "integration" ];
});
};
})

View file

@ -0,0 +1,22 @@
Copyright © The Tvix Authors
Copyright © The Snix Project
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
“Software”), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,72 @@
// SPDX-FileCopyrightText: edef <edef@unfathomable.blue>
// SPDX-License-Identifier: OSL-3.0 OR MIT OR Apache-2.0
// Copyright © 2025 The Snix Project
syntax = "proto3";
package snix.castore.v1;
option go_package = "snix.dev/castore/proto;castorev1";
// A Directory can contain Directory, File or Symlink nodes.
// Each of these nodes have a name attribute, which is the basename in that
// directory and node type specific attributes.
// The name attribute:
// - MUST not contain slashes or null bytes
// - MUST not be '.' or '..'
// - MUST be unique across all three lists
// Elements in each list need to be lexicographically ordered by the name
// attribute.
message Directory {
repeated DirectoryNode directories = 1;
repeated FileNode files = 2;
repeated SymlinkNode symlinks = 3;
}
// A DirectoryNode represents a directory in a Directory.
message DirectoryNode {
// The (base)name of the directory
bytes name = 1;
// The blake3 hash of a Directory message, serialized in protobuf canonical form.
bytes digest = 2;
// Number of child elements in the Directory referred to by `digest`.
// Calculated by summing up the numbers of `directories`, `files` and
// `symlinks`, and for each directory, its size field. Used for inode number
// calculation.
// This field is precisely as verifiable as any other Merkle tree edge.
// Resolve `digest`, and you can compute it incrementally. Resolve the entire
// tree, and you can fully compute it from scratch.
// A credulous implementation won't reject an excessive size, but this is
// harmless: you'll have some ordinals without nodes. Undersizing is obvious
// and easy to reject: you won't have an ordinal for some nodes.
uint64 size = 3;
}
// A FileNode represents a regular or executable file in a Directory.
message FileNode {
// The (base)name of the file
bytes name = 1;
// The blake3 digest of the file contents
bytes digest = 2;
// The file content size
uint64 size = 3;
// Whether the file is executable
bool executable = 4;
}
// A SymlinkNode represents a symbolic link in a Directory.
message SymlinkNode {
// The (base)name of the symlink
bytes name = 1;
// The target of the symlink.
bytes target = 2;
}
// A Node is either a DirectoryNode, FileNode or SymlinkNode.
message Node {
oneof node {
DirectoryNode directory = 1;
FileNode file = 2;
SymlinkNode symlink = 3;
}
}

View file

@ -0,0 +1,48 @@
{ depot, pkgs, lib, ... }:
let
protos = lib.sourceByRegex depot.path.origSrc [
"buf.yaml"
"buf.gen.yaml"
"^snix(/castore(/protos(/.*\.proto)?)?)?$"
];
in
depot.nix.readTree.drvTargets {
inherit protos;
# Lints and ensures formatting of the proto files.
check = pkgs.stdenv.mkDerivation {
name = "proto-check";
src = protos;
nativeBuildInputs = [
pkgs.buf
];
buildPhase = ''
export HOME=$TMPDIR
buf lint
buf format -d --exit-code
touch $out
'';
};
# Produces the golang bindings.
go-bindings = pkgs.stdenv.mkDerivation {
name = "go-bindings";
src = protos;
nativeBuildInputs = [
pkgs.buf
pkgs.protoc-gen-go
pkgs.protoc-gen-go-grpc
];
buildPhase = ''
export HOME=$TMPDIR
buf generate
mkdir -p $out
cp snix/castore/protos/*.pb.go $out/
'';
};
}

View file

@ -0,0 +1,86 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
syntax = "proto3";
package snix.castore.v1;
option go_package = "snix.dev/castore/proto;castorev1";
// BlobService allows reading (or uploading) content-addressed blobs of data.
// BLAKE3 is used as a hashing function for the data. Uploading a blob will
// return the BLAKE3 digest of it, and that's the identifier used to Read/Stat
// them too.
service BlobService {
// Stat can be used to check for the existence of a blob, as well as
// gathering more data about it, like more granular chunking information
// or baos.
// Server implementations are not required to provide more granular chunking
// information, especially if the digest specified in `StatBlobRequest` is
// already a chunk of a blob.
rpc Stat(StatBlobRequest) returns (StatBlobResponse);
// Read allows reading (all) data of a blob/chunk by the BLAKE3 digest of
// its contents.
// If the backend communicated more granular chunks in the `Stat` request,
// this can also be used to read chunks.
// This request returns a stream of BlobChunk, which is just a container for
// a stream of bytes.
// The server may decide on whatever chunking it may seem fit as a size for
// the individual BlobChunk sent in the response stream, this is mostly to
// keep individual messages at a manageable size.
rpc Read(ReadBlobRequest) returns (stream BlobChunk);
// Put uploads a Blob, by reading a stream of bytes.
//
// The way the data is chunked up in individual BlobChunk messages sent in
// the stream has no effect on how the server ends up chunking blobs up, if
// it does at all.
rpc Put(stream BlobChunk) returns (PutBlobResponse);
}
message StatBlobRequest {
// The blake3 digest of the blob requested
bytes digest = 1;
// Whether the server should reply with a list of more granular chunks.
bool send_chunks = 2;
// Whether the server should reply with a bao.
bool send_bao = 3;
}
message StatBlobResponse {
// If `send_chunks` was set to true, this MAY contain a list of more
// granular chunks, which then may be read individually via the `Read`
// method.
repeated ChunkMeta chunks = 2;
message ChunkMeta {
// Digest of that specific chunk
bytes digest = 1;
// Length of that chunk, in bytes.
uint64 size = 2;
}
// If `send_bao` was set to true, this MAY contain a outboard bao.
// The exact format and message types here will still be fleshed out.
bytes bao = 3;
}
message ReadBlobRequest {
// The blake3 digest of the blob or chunk requested
bytes digest = 1;
}
// This represents some bytes of a blob.
// Blobs are sent in smaller chunks to keep message sizes manageable.
message BlobChunk {
bytes data = 1;
}
message PutBlobResponse {
// The blake3 digest of the data that was sent.
bytes digest = 1;
}

View file

@ -0,0 +1,54 @@
// SPDX-License-Identifier: MIT
// Copyright © 2022 The Tvix Authors
// Copyright © 2025 The Snix Project
syntax = "proto3";
package snix.castore.v1;
import "snix/castore/protos/castore.proto";
option go_package = "snix.dev/castore/proto;castorev1";
service DirectoryService {
// Get retrieves a stream of Directory messages, by using the lookup
// parameters in GetDirectoryRequest.
// Keep in mind multiple DirectoryNodes in different parts of the graph might
// have the same digest if they have the same underlying contents,
// so sending subsequent ones can be omitted.
//
// It is okay for certain implementations to only allow retrieval of
// Directory digests that are at the "root", aka the last element that's
// sent in a Put. This makes sense for implementations bundling closures of
// directories together in batches.
rpc Get(GetDirectoryRequest) returns (stream Directory);
// Put uploads a graph of Directory messages.
// Individual Directory messages need to be send in an order walking up
// from the leaves to the root - a Directory message can only refer to
// Directory messages previously sent in the same stream.
// Keep in mind multiple DirectoryNodes in different parts of the graph might
// have the same digest if they have the same underlying contents,
// so sending subsequent ones can be omitted.
// We might add a separate method, allowing to send partial graphs at a later
// time, if requiring to send the full graph turns out to be a problem.
rpc Put(stream Directory) returns (PutDirectoryResponse);
}
message GetDirectoryRequest {
oneof by_what {
// The blake3 hash of the (root) Directory message, serialized in
// protobuf canonical form.
// Keep in mind this can be a subtree of another root.
bytes digest = 1;
}
// If set to true, recursively resolve all child Directory messages.
// Directory messages SHOULD be streamed in a recursive breadth-first walk,
// but other orders are also fine, as long as Directory messages are only
// sent after they are referred to from previously sent Directory messages.
bool recursive = 2;
}
message PutDirectoryResponse {
bytes root_digest = 1;
}

View file

@ -0,0 +1,503 @@
use futures::{ready, TryStreamExt};
use pin_project_lite::pin_project;
use tokio::io::{AsyncRead, AsyncSeekExt};
use tokio_stream::StreamExt;
use tokio_util::io::{ReaderStream, StreamReader};
use tracing::{instrument, trace, warn};
use crate::B3Digest;
use std::{cmp::Ordering, pin::Pin};
use super::{BlobReader, BlobService};
pin_project! {
/// ChunkedReader provides a chunk-aware [BlobReader], so allows reading and
/// seeking into a blob.
/// It internally holds a [ChunkedBlob], which is storing chunk information
/// able to emit a reader seeked to a specific position whenever we need to seek.
pub struct ChunkedReader<BS> {
chunked_blob: ChunkedBlob<BS>,
#[pin]
r: Box<dyn AsyncRead + Unpin + Send>,
pos: u64,
}
}
impl<BS> ChunkedReader<BS>
where
BS: AsRef<dyn BlobService> + Clone + 'static + Send,
{
/// Construct a new [ChunkedReader], by retrieving a list of chunks (their
/// blake3 digests and chunk sizes)
pub fn from_chunks(chunks_it: impl Iterator<Item = (B3Digest, u64)>, blob_service: BS) -> Self {
let chunked_blob = ChunkedBlob::from_iter(chunks_it, blob_service);
let r = chunked_blob.reader_skipped_offset(0);
Self {
chunked_blob,
r,
pos: 0,
}
}
}
/// ChunkedReader implements BlobReader.
impl<BS> BlobReader for ChunkedReader<BS> where BS: Send + Clone + 'static + AsRef<dyn BlobService> {}
impl<BS> tokio::io::AsyncRead for ChunkedReader<BS>
where
BS: AsRef<dyn BlobService> + Clone + 'static,
{
fn poll_read(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
// The amount of data read can be determined by the increase
// in the length of the slice returned by `ReadBuf::filled`.
let filled_before = buf.filled().len();
let this = self.project();
ready!(this.r.poll_read(cx, buf))?;
let bytes_read = buf.filled().len() - filled_before;
*this.pos += bytes_read as u64;
Ok(()).into()
}
}
impl<BS> tokio::io::AsyncSeek for ChunkedReader<BS>
where
BS: AsRef<dyn BlobService> + Clone + Send + 'static,
{
#[instrument(skip(self), err(Debug))]
fn start_seek(self: Pin<&mut Self>, position: std::io::SeekFrom) -> std::io::Result<()> {
let total_len = self.chunked_blob.blob_length();
let mut this = self.project();
let absolute_offset: u64 = match position {
std::io::SeekFrom::Start(from_start) => from_start,
std::io::SeekFrom::End(from_end) => {
// note from_end is i64, not u64, so this is usually negative.
total_len.checked_add_signed(from_end).ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"over/underflow while seeking",
)
})?
}
std::io::SeekFrom::Current(from_current) => {
// note from_end is i64, not u64, so this can be positive or negative.
(*this.pos)
.checked_add_signed(from_current)
.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"over/underflow while seeking",
)
})?
}
};
// check if the position actually did change.
if absolute_offset != *this.pos {
// ensure the new position still is inside the file.
if absolute_offset > total_len {
Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"seeked beyond EOF",
))?
}
// Update the position and the internal reader.
*this.pos = absolute_offset;
// FUTUREWORK: if we can seek forward, avoid re-assembling.
// At least if it's still in the same chunk?
*this.r = this.chunked_blob.reader_skipped_offset(absolute_offset);
}
Ok(())
}
fn poll_complete(
self: Pin<&mut Self>,
_cx: &mut std::task::Context<'_>,
) -> std::task::Poll<std::io::Result<u64>> {
std::task::Poll::Ready(Ok(self.pos))
}
}
/// Holds a list of blake3 digest for individual chunks (and their sizes).
/// Is able to construct a Reader that seeked to a certain offset, which
/// is useful to construct a BlobReader (that implements AsyncSeek).
/// - the current chunk index, and a Custor<Vec<u8>> holding the data of that chunk.
struct ChunkedBlob<BS> {
blob_service: BS,
chunks: Vec<(u64, u64, B3Digest)>,
}
impl<BS> ChunkedBlob<BS>
where
BS: AsRef<dyn BlobService> + Clone + 'static + Send,
{
/// Constructs [Self] from a list of blake3 digests of chunks and their
/// sizes, and a reference to a blob service.
/// Initializing it with an empty list is disallowed.
fn from_iter(chunks_it: impl Iterator<Item = (B3Digest, u64)>, blob_service: BS) -> Self {
let mut chunks = Vec::new();
let mut offset: u64 = 0;
for (chunk_digest, chunk_size) in chunks_it {
chunks.push((offset, chunk_size, chunk_digest));
offset += chunk_size;
}
assert!(
!chunks.is_empty(),
"Chunks must be provided, don't use this for blobs without chunks"
);
Self {
blob_service,
chunks,
}
}
/// Returns the length of the blob.
fn blob_length(&self) -> u64 {
self.chunks
.last()
.map(|(chunk_offset, chunk_size, _)| chunk_offset + chunk_size)
.unwrap_or(0)
}
/// For a given position pos, return the chunk containing the data.
/// In case this would range outside the blob, None is returned.
#[instrument(level = "trace", skip(self), ret)]
fn get_chunk_idx_for_position(&self, pos: u64) -> Option<usize> {
// FUTUREWORK: benchmark when to use linear search, binary_search and BTreeSet
self.chunks
.binary_search_by(|(chunk_start_pos, chunk_size, _)| {
if chunk_start_pos + chunk_size <= pos {
Ordering::Less
} else if *chunk_start_pos > pos {
Ordering::Greater
} else {
Ordering::Equal
}
})
.ok()
}
/// Returns a stream of bytes of the data in that blob.
/// It internally assembles a stream reading from each chunk (skipping over
/// chunks containing irrelevant data).
/// From the first relevant chunk, the irrelevant bytes are skipped too.
/// The returned boxed thing does not implement AsyncSeek on its own, but
/// ChunkedReader does.
#[instrument(level = "trace", skip(self))]
fn reader_skipped_offset(&self, offset: u64) -> Box<dyn tokio::io::AsyncRead + Send + Unpin> {
if offset == self.blob_length() {
return Box::new(std::io::Cursor::new(vec![]));
}
// construct a stream of all chunks starting with the given offset
let start_chunk_idx = self
.get_chunk_idx_for_position(offset)
.expect("outside of blob");
// It's ok to panic here, we can only reach this by seeking, and seeking should already reject out-of-file seeking.
let skip_first_chunk_bytes = (offset - self.chunks[start_chunk_idx].0) as usize;
let blob_service = self.blob_service.clone();
let chunks: Vec<_> = self.chunks[start_chunk_idx..].to_vec();
let readers_stream = tokio_stream::iter(chunks.into_iter().enumerate()).map(
move |(nth_chunk, (_chunk_start_offset, chunk_size, chunk_digest))| {
let chunk_digest = chunk_digest.to_owned();
let blob_service = blob_service.clone();
async move {
trace!(chunk_size=%chunk_size, chunk_digest=%chunk_digest, "open_read on chunk in stream");
let mut blob_reader = blob_service
.as_ref()
.open_read(&chunk_digest.to_owned())
.await?
.ok_or_else(|| {
warn!(chunk.digest = %chunk_digest, "chunk not found");
std::io::Error::new(std::io::ErrorKind::NotFound, "chunk not found")
})?;
// iff this is the first chunk in the stream, skip by skip_first_chunk_bytes
if nth_chunk == 0 && skip_first_chunk_bytes > 0 {
blob_reader
.seek(std::io::SeekFrom::Start(skip_first_chunk_bytes as u64))
.await?;
}
Ok::<_, std::io::Error>(blob_reader)
}
},
);
// convert the stream of readers to a stream of streams of byte chunks
let bytes_streams = readers_stream.then(|elem| async { elem.await.map(ReaderStream::new) });
// flatten into one stream of byte chunks
let bytes_stream = bytes_streams.try_flatten();
// convert into AsyncRead
Box::new(StreamReader::new(Box::pin(bytes_stream)))
}
}
#[cfg(test)]
mod test {
use std::{
io::SeekFrom,
sync::{Arc, LazyLock},
};
use crate::{
blobservice::{chunked_reader::ChunkedReader, BlobService, MemoryBlobService},
B3Digest,
};
use hex_literal::hex;
use tokio::io::{AsyncReadExt, AsyncSeekExt};
const CHUNK_1: [u8; 2] = hex!("0001");
const CHUNK_2: [u8; 4] = hex!("02030405");
const CHUNK_3: [u8; 1] = hex!("06");
const CHUNK_4: [u8; 2] = hex!("0708");
const CHUNK_5: [u8; 7] = hex!("090a0b0c0d0e0f");
// `[ 0 1 ] [ 2 3 4 5 ] [ 6 ] [ 7 8 ] [ 9 10 11 12 13 14 15 ]`
pub static CHUNK_1_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&CHUNK_1).as_bytes().into());
pub static CHUNK_2_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&CHUNK_2).as_bytes().into());
pub static CHUNK_3_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&CHUNK_3).as_bytes().into());
pub static CHUNK_4_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&CHUNK_4).as_bytes().into());
pub static CHUNK_5_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&CHUNK_5).as_bytes().into());
pub static BLOB_1_LIST: LazyLock<[(B3Digest, u64); 5]> = LazyLock::new(|| {
[
(CHUNK_1_DIGEST.clone(), 2),
(CHUNK_2_DIGEST.clone(), 4),
(CHUNK_3_DIGEST.clone(), 1),
(CHUNK_4_DIGEST.clone(), 2),
(CHUNK_5_DIGEST.clone(), 7),
]
});
use super::ChunkedBlob;
/// ensure the start offsets are properly calculated.
#[test]
fn from_iter() {
let cb = ChunkedBlob::from_iter(
BLOB_1_LIST.clone().into_iter(),
Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>,
);
assert_eq!(
cb.chunks,
Vec::from_iter([
(0, 2, CHUNK_1_DIGEST.clone()),
(2, 4, CHUNK_2_DIGEST.clone()),
(6, 1, CHUNK_3_DIGEST.clone()),
(7, 2, CHUNK_4_DIGEST.clone()),
(9, 7, CHUNK_5_DIGEST.clone()),
])
);
}
/// ensure ChunkedBlob can't be used with an empty list of chunks
#[test]
#[should_panic]
fn from_iter_empty() {
ChunkedBlob::from_iter(
[].into_iter(),
Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>,
);
}
/// ensure the right chunk is selected
#[test]
fn chunk_idx_for_position() {
let cb = ChunkedBlob::from_iter(
BLOB_1_LIST.clone().into_iter(),
Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>,
);
assert_eq!(Some(0), cb.get_chunk_idx_for_position(0), "start of blob");
assert_eq!(
Some(0),
cb.get_chunk_idx_for_position(1),
"middle of first chunk"
);
assert_eq!(
Some(1),
cb.get_chunk_idx_for_position(2),
"beginning of second chunk"
);
assert_eq!(
Some(4),
cb.get_chunk_idx_for_position(15),
"right before the end of the blob"
);
assert_eq!(
None,
cb.get_chunk_idx_for_position(16),
"right outside the blob"
);
assert_eq!(
None,
cb.get_chunk_idx_for_position(100),
"way outside the blob"
);
}
/// returns a blobservice with all chunks in BLOB_1 present.
async fn gen_blobservice_blob1() -> Arc<dyn BlobService> {
let blob_service = Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>;
// seed blob service with all chunks
for blob_contents in [
CHUNK_1.to_vec(),
CHUNK_2.to_vec(),
CHUNK_3.to_vec(),
CHUNK_4.to_vec(),
CHUNK_5.to_vec(),
] {
let mut bw = blob_service.open_write().await;
tokio::io::copy(&mut std::io::Cursor::new(blob_contents), &mut bw)
.await
.expect("writing blob");
bw.close().await.expect("close blobwriter");
}
blob_service
}
#[tokio::test]
async fn test_read() {
let blob_service = gen_blobservice_blob1().await;
let mut chunked_reader =
ChunkedReader::from_chunks(BLOB_1_LIST.clone().into_iter(), blob_service);
// read all data
let mut buf = Vec::new();
tokio::io::copy(&mut chunked_reader, &mut buf)
.await
.expect("copy");
assert_eq!(
hex!("000102030405060708090a0b0c0d0e0f").to_vec(),
buf,
"read data must match"
);
}
#[tokio::test]
async fn test_seek() {
let blob_service = gen_blobservice_blob1().await;
let mut chunked_reader =
ChunkedReader::from_chunks(BLOB_1_LIST.clone().into_iter(), blob_service);
// seek to the end
// expect to read 0 bytes
{
chunked_reader
.seek(SeekFrom::End(0))
.await
.expect("seek to end");
let mut buf = Vec::new();
chunked_reader
.read_to_end(&mut buf)
.await
.expect("read to end");
assert_eq!(hex!("").to_vec(), buf);
}
// seek one bytes before the end
{
chunked_reader.seek(SeekFrom::End(-1)).await.expect("seek");
let mut buf = Vec::new();
chunked_reader
.read_to_end(&mut buf)
.await
.expect("read to end");
assert_eq!(hex!("0f").to_vec(), buf);
}
// seek back three bytes, but using relative positioning
// read two bytes
{
chunked_reader
.seek(SeekFrom::Current(-3))
.await
.expect("seek");
let mut buf = [0b0; 2];
chunked_reader
.read_exact(&mut buf)
.await
.expect("read exact");
assert_eq!(hex!("0d0e"), buf);
}
}
// seeds a blob service with only the first two chunks, reads a bit in the
// front (which succeeds), but then tries to seek past and read more (which
// should fail).
#[tokio::test]
async fn test_read_missing_chunks() {
let blob_service = Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>;
for blob_contents in [CHUNK_1.to_vec(), CHUNK_2.to_vec()] {
let mut bw = blob_service.open_write().await;
tokio::io::copy(&mut std::io::Cursor::new(blob_contents), &mut bw)
.await
.expect("writing blob");
bw.close().await.expect("close blobwriter");
}
let mut chunked_reader =
ChunkedReader::from_chunks(BLOB_1_LIST.clone().into_iter(), blob_service);
// read a bit from the front (5 bytes out of 6 available)
let mut buf = [0b0; 5];
chunked_reader
.read_exact(&mut buf)
.await
.expect("read exact");
assert_eq!(hex!("0001020304"), buf);
// seek 2 bytes forward, into an area where we don't have chunks
chunked_reader
.seek(SeekFrom::Current(2))
.await
.expect("seek");
let mut buf = Vec::new();
chunked_reader
.read_to_end(&mut buf)
.await
.expect_err("must fail");
// FUTUREWORK: check semantics on errorkinds. Should this be InvalidData
// or NotFound?
}
}

View file

@ -0,0 +1,131 @@
use std::sync::Arc;
use tonic::async_trait;
use tracing::instrument;
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::{B3Digest, Error};
use super::{BlobReader, BlobService, BlobWriter, ChunkedReader};
/// Combinator for a BlobService, using a "near" and "far" blobservice.
/// Requests are tried in (and returned from) the near store first, only if
/// things are not present there, the far BlobService is queried.
/// In case the near blobservice doesn't have the blob, we ask the remote
/// blobservice for chunks, and try to read each of these chunks from the near
/// blobservice again, before falling back to the far one.
/// The far BlobService is never written to.
pub struct CombinedBlobService<BL, BR> {
instance_name: String,
near: BL,
far: BR,
}
impl<BL, BR> Clone for CombinedBlobService<BL, BR>
where
BL: Clone,
BR: Clone,
{
fn clone(&self) -> Self {
Self {
instance_name: self.instance_name.clone(),
near: self.near.clone(),
far: self.far.clone(),
}
}
}
#[async_trait]
impl<BL, BR> BlobService for CombinedBlobService<BL, BR>
where
BL: AsRef<dyn BlobService> + Clone + Send + Sync + 'static,
BR: AsRef<dyn BlobService> + Clone + Send + Sync + 'static,
{
#[instrument(skip(self, digest), fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn has(&self, digest: &B3Digest) -> std::io::Result<bool> {
Ok(self.near.as_ref().has(digest).await? || self.far.as_ref().has(digest).await?)
}
#[instrument(skip(self, digest), fields(blob.digest=%digest, instance_name=%self.instance_name), err)]
async fn open_read(&self, digest: &B3Digest) -> std::io::Result<Option<Box<dyn BlobReader>>> {
if self.near.as_ref().has(digest).await? {
// near store has the blob, so we can assume it also has all chunks.
self.near.as_ref().open_read(digest).await
} else {
// near store doesn't have the blob.
// Ask the remote one for the list of chunks,
// and create a chunked reader that uses self.open_read() for
// individual chunks. There's a chance we already have some chunks
// in near, meaning we don't need to fetch them all from the far
// BlobService.
match self.far.as_ref().chunks(digest).await? {
// blob doesn't exist on the near side either, nothing we can do.
None => Ok(None),
Some(remote_chunks) => {
// if there's no more granular chunks, or the far
// blobservice doesn't support chunks, read the blob from
// the far blobservice directly.
if remote_chunks.is_empty() {
return self.far.as_ref().open_read(digest).await;
}
// otherwise, a chunked reader, which will always try the
// near backend first.
let chunked_reader = ChunkedReader::from_chunks(
remote_chunks.into_iter().map(|chunk| {
(
chunk.digest.try_into().expect("invalid b3 digest"),
chunk.size,
)
}),
Arc::new(self.clone()) as Arc<dyn BlobService>,
);
Ok(Some(Box::new(chunked_reader)))
}
}
}
}
#[instrument(skip_all, fields(instance_name=%self.instance_name))]
async fn open_write(&self) -> Box<dyn BlobWriter> {
// direct writes to the near one.
self.near.as_ref().open_write().await
}
}
#[derive(serde::Deserialize, Debug, Clone)]
#[serde(deny_unknown_fields)]
pub struct CombinedBlobServiceConfig {
near: String,
far: String,
}
impl TryFrom<url::Url> for CombinedBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(_url: url::Url) -> Result<Self, Self::Error> {
Err(Error::StorageError(
"Instantiating a CombinedBlobService from a url is not supported".into(),
)
.into())
}
}
#[async_trait]
impl ServiceBuilder for CombinedBlobServiceConfig {
type Output = dyn BlobService;
async fn build<'a>(
&'a self,
instance_name: &str,
context: &CompositionContext,
) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync>> {
let (local, remote) = futures::join!(
context.resolve(self.near.clone()),
context.resolve(self.far.clone())
);
Ok(Arc::new(CombinedBlobService {
instance_name: instance_name.to_string(),
near: local?,
far: remote?,
}))
}
}

View file

@ -0,0 +1,88 @@
use std::sync::Arc;
use url::Url;
use crate::composition::{
with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG,
};
use super::BlobService;
/// Constructs a new instance of a [BlobService] from an URI.
///
/// The following schemes are supported by the following services:
/// - `memory://` ([super::MemoryBlobService])
/// - `grpc+*://` ([super::GRPCBlobService])
/// - `objectstore+*://` ([super::ObjectStoreBlobService])
///
/// See their `from_url` methods for more details about their syntax.
pub async fn from_addr(
uri: &str,
) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync>> {
let url = Url::parse(uri)
.map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
let blob_service_config = with_registry(&REG, || {
<DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn BlobService>>>>::try_from(url)
})?
.0;
let blob_service = blob_service_config
.build("anonymous", &CompositionContext::blank(&REG))
.await?;
Ok(blob_service)
}
#[cfg(test)]
mod tests {
use super::from_addr;
use rstest::rstest;
#[rstest]
/// This uses an unsupported scheme.
#[case::unsupported_scheme("http://foo.example/test", false)]
/// This correctly sets the scheme, and doesn't set a path.
#[case::memory_valid("memory://", true)]
/// This sets a memory url host to `foo`
#[case::memory_invalid_host("memory://foo", false)]
/// This sets a memory url path to "/", which is invalid.
#[case::memory_invalid_root_path("memory:///", false)]
/// This sets a memory url path to "/foo", which is invalid.
#[case::memory_invalid_root_path_foo("memory:///foo", false)]
/// Correct scheme to connect to a unix socket.
#[case::grpc_valid_unix_socket("grpc+unix:///path/to/somewhere", true)]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
#[case::grpc_invalid_unix_socket_and_host("grpc+unix://host.example/path/to/somewhere", false)]
/// Correct scheme to connect to localhost, with port 12345
#[case::grpc_valid_ipv6_localhost_port_12345("grpc+http://[::1]:12345", true)]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[case::grpc_valid_http_host_without_port("grpc+http://localhost", true)]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[case::grpc_valid_https_host_without_port("grpc+https://localhost", true)]
/// Correct scheme to connect to localhost over http, but with additional path, which is invalid.
#[case::grpc_invalid_has_path("grpc+http://localhost/some-path", false)]
/// An example for object store (InMemory)
#[case::objectstore_valid_memory("objectstore+memory:///", true)]
/// An example for object store (LocalFileSystem)
#[case::objectstore_valid_file("objectstore+file:///foo/bar", true)]
// An example for object store (HTTP / WebDAV)
#[case::objectstore_valid_http_url("objectstore+https://localhost:8080/some-path", true)]
/// An example for object store (S3)
#[cfg_attr(
feature = "cloud",
case::objectstore_valid_s3_url("objectstore+s3://bucket/path", true)
)]
/// An example for object store (GCS)
#[cfg_attr(
feature = "cloud",
case::objectstore_valid_gcs_url("objectstore+gs://bucket/path", true)
)]
#[tokio::test]
async fn test_from_addr_tokio(#[case] uri_str: &str, #[case] exp_succeed: bool) {
if exp_succeed {
from_addr(uri_str).await.expect("should succeed");
} else {
assert!(from_addr(uri_str).await.is_err(), "should fail");
}
}
}

View file

@ -0,0 +1,398 @@
use super::{BlobReader, BlobService, BlobWriter, ChunkedReader};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::{
proto::{self, stat_blob_response::ChunkMeta},
B3Digest,
};
use futures::sink::SinkExt;
use std::{
io::{self, Cursor},
pin::pin,
sync::Arc,
task::Poll,
};
use tokio::io::AsyncWriteExt;
use tokio::task::JoinHandle;
use tokio_stream::{wrappers::ReceiverStream, StreamExt};
use tokio_util::{
io::{CopyToBytes, SinkWriter},
sync::PollSender,
};
use tonic::{async_trait, Code, Status};
use tracing::{instrument, Instrument as _};
/// Connects to a (remote) snix-store BlobService over gRPC.
#[derive(Clone)]
pub struct GRPCBlobService<T> {
instance_name: String,
/// The internal reference to a gRPC client.
/// Cloning it is cheap, and it internally handles concurrent requests.
grpc_client: proto::blob_service_client::BlobServiceClient<T>,
}
impl<T> GRPCBlobService<T> {
/// construct a [GRPCBlobService] from a [proto::blob_service_client::BlobServiceClient].
pub fn from_client(
instance_name: String,
grpc_client: proto::blob_service_client::BlobServiceClient<T>,
) -> Self {
Self {
instance_name,
grpc_client,
}
}
}
#[async_trait]
impl<T> BlobService for GRPCBlobService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Send + Sync + Clone + 'static,
T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
<T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError> + Send,
T::Future: Send,
{
#[instrument(skip(self, digest), fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn has(&self, digest: &B3Digest) -> io::Result<bool> {
match self
.grpc_client
.clone()
.stat(proto::StatBlobRequest {
digest: digest.clone().into(),
..Default::default()
})
.await
{
Ok(_blob_meta) => Ok(true),
Err(e) if e.code() == Code::NotFound => Ok(false),
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e)),
}
}
#[instrument(skip(self, digest), fields(blob.digest=%digest, instance_name=%self.instance_name), err)]
async fn open_read(&self, digest: &B3Digest) -> io::Result<Option<Box<dyn BlobReader>>> {
// First try to get a list of chunks. In case there's only one chunk returned,
// buffer its data into a Vec, otherwise use a ChunkedReader.
// We previously used NaiveSeeker here, but userland likes to seek backwards too often,
// and without store composition this will get very noisy.
// FUTUREWORK: use CombinedBlobService and store composition.
match self.chunks(digest).await {
Ok(None) => Ok(None),
Ok(Some(chunks)) => {
if chunks.is_empty() || chunks.len() == 1 {
// No more granular chunking info, treat this as an individual chunk.
// Get a stream of [proto::BlobChunk], or return an error if the blob
// doesn't exist.
return match self
.grpc_client
.clone()
.read(proto::ReadBlobRequest {
digest: digest.clone().into(),
})
.await
{
Ok(stream) => {
let data_stream = stream.into_inner().map(|e| {
e.map(|c| c.data)
.map_err(|s| std::io::Error::new(io::ErrorKind::InvalidData, s))
});
// Use StreamReader::new to convert to an AsyncRead.
let mut data_reader = tokio_util::io::StreamReader::new(data_stream);
let mut buf = Vec::new();
// TODO: only do this up to a certain limit.
tokio::io::copy(&mut data_reader, &mut buf).await?;
Ok(Some(Box::new(Cursor::new(buf))))
}
Err(e) if e.code() == Code::NotFound => Ok(None),
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e)),
};
}
// The chunked case. Let ChunkedReader do individual reads.
// TODO: we should store the chunking data in some local cache,
// so `ChunkedReader` doesn't call `self.chunks` *again* for every chunk.
// Think about how store composition will fix this.
let chunked_reader = ChunkedReader::from_chunks(
chunks.into_iter().map(|chunk| {
(
chunk.digest.try_into().expect("invalid b3 digest"),
chunk.size,
)
}),
Arc::new(self.clone()) as Arc<dyn BlobService>,
);
Ok(Some(Box::new(chunked_reader)))
}
Err(e) => Err(e)?,
}
}
/// Returns a BlobWriter, that'll internally wrap each write in a
/// [proto::BlobChunk], which is send to the gRPC server.
#[instrument(skip_all, fields(instance_name=%self.instance_name))]
async fn open_write(&self) -> Box<dyn BlobWriter> {
// set up an mpsc channel passing around Bytes.
let (tx, rx) = tokio::sync::mpsc::channel::<bytes::Bytes>(10);
// bytes arriving on the RX side are wrapped inside a
// [proto::BlobChunk], and a [ReceiverStream] is constructed.
let blobchunk_stream = ReceiverStream::new(rx).map(|x| proto::BlobChunk { data: x });
// spawn the gRPC put request, which will read from blobchunk_stream.
let task = tokio::spawn({
let mut grpc_client = self.grpc_client.clone();
async move { Ok::<_, Status>(grpc_client.put(blobchunk_stream).await?.into_inner()) }
// instrument the task with the current span, this is not done by default
.in_current_span()
});
// The tx part of the channel is converted to a sink of byte chunks.
let sink = PollSender::new(tx)
.sink_map_err(|e| std::io::Error::new(std::io::ErrorKind::BrokenPipe, e));
// … which is turned into an [tokio::io::AsyncWrite].
let writer = SinkWriter::new(CopyToBytes::new(sink));
Box::new(GRPCBlobWriter {
task_and_writer: Some((task, writer)),
digest: None,
})
}
#[instrument(skip(self, digest), fields(blob.digest=%digest, instance_name=%self.instance_name), err)]
async fn chunks(&self, digest: &B3Digest) -> io::Result<Option<Vec<ChunkMeta>>> {
let resp = self
.grpc_client
.clone()
.stat(proto::StatBlobRequest {
digest: digest.clone().into(),
send_chunks: true,
..Default::default()
})
.await;
match resp {
Err(e) if e.code() == Code::NotFound => Ok(None),
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e)),
Ok(resp) => {
let resp = resp.into_inner();
resp.validate()
.map_err(|e| std::io::Error::new(io::ErrorKind::InvalidData, e))?;
Ok(Some(resp.chunks))
}
}
}
}
#[derive(serde::Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct GRPCBlobServiceConfig {
url: String,
}
impl TryFrom<url::Url> for GRPCBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by snix_castore::channel::from_url.
Ok(GRPCBlobServiceConfig {
url: url.to_string(),
})
}
}
#[async_trait]
impl ServiceBuilder for GRPCBlobServiceConfig {
type Output = dyn BlobService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
let client = proto::blob_service_client::BlobServiceClient::new(
crate::tonic::channel_from_url(&self.url.parse()?).await?,
);
Ok(Arc::new(GRPCBlobService::from_client(
instance_name.to_string(),
client,
)))
}
}
pub struct GRPCBlobWriter<W: tokio::io::AsyncWrite> {
/// The task containing the put request, and the inner writer, if we're still writing.
task_and_writer: Option<(JoinHandle<Result<proto::PutBlobResponse, Status>>, W)>,
/// The digest that has been returned, if we successfully closed.
digest: Option<B3Digest>,
}
#[async_trait]
impl<W: tokio::io::AsyncWrite + Send + Sync + Unpin + 'static> BlobWriter for GRPCBlobWriter<W> {
async fn close(&mut self) -> io::Result<B3Digest> {
if self.task_and_writer.is_none() {
// if we're already closed, return the b3 digest, which must exist.
// If it doesn't, we already closed and failed once, and didn't handle the error.
match &self.digest {
Some(digest) => Ok(digest.clone()),
None => Err(io::Error::new(io::ErrorKind::BrokenPipe, "already closed")),
}
} else {
let (task, mut writer) = self.task_and_writer.take().unwrap();
// invoke shutdown, so the inner writer closes its internal tx side of
// the channel.
writer.shutdown().await?;
// block on the RPC call to return.
// This ensures all chunks are sent out, and have been received by the
// backend.
match task.await? {
Ok(resp) => {
// return the digest from the response, and store it in self.digest for subsequent closes.
let digest_len = resp.digest.len();
let digest: B3Digest = resp.digest.try_into().map_err(|_| {
io::Error::new(
io::ErrorKind::Other,
format!("invalid root digest length {} in response", digest_len),
)
})?;
self.digest = Some(digest.clone());
Ok(digest)
}
Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())),
}
}
}
}
impl<W: tokio::io::AsyncWrite + Unpin> tokio::io::AsyncWrite for GRPCBlobWriter<W> {
fn poll_write(
mut self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &[u8],
) -> std::task::Poll<Result<usize, io::Error>> {
match &mut self.task_and_writer {
None => Poll::Ready(Err(io::Error::new(
io::ErrorKind::NotConnected,
"already closed",
))),
Some((_, ref mut writer)) => {
let pinned_writer = pin!(writer);
pinned_writer.poll_write(cx, buf)
}
}
}
fn poll_flush(
mut self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Result<(), io::Error>> {
match &mut self.task_and_writer {
None => Poll::Ready(Err(io::Error::new(
io::ErrorKind::NotConnected,
"already closed",
))),
Some((_, ref mut writer)) => {
let pinned_writer = pin!(writer);
pinned_writer.poll_flush(cx)
}
}
}
fn poll_shutdown(
self: std::pin::Pin<&mut Self>,
_cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Result<(), io::Error>> {
// TODO(raitobezarius): this might not be a graceful shutdown of the
// channel inside the gRPC connection.
Poll::Ready(Ok(()))
}
}
#[cfg(test)]
mod tests {
use std::time::Duration;
use tempfile::TempDir;
use tokio::net::UnixListener;
use tokio_retry::strategy::ExponentialBackoff;
use tokio_retry::Retry;
use tokio_stream::wrappers::UnixListenerStream;
use crate::blobservice::MemoryBlobService;
use crate::fixtures;
use crate::proto::blob_service_client::BlobServiceClient;
use crate::proto::GRPCBlobServiceWrapper;
use super::BlobService;
use super::GRPCBlobService;
/// This ensures connecting via gRPC works as expected.
#[tokio::test]
async fn test_valid_unix_path_ping_pong() {
let tmpdir = TempDir::new().unwrap();
let socket_path = tmpdir.path().join("daemon");
let path_clone = socket_path.clone();
// Spin up a server
tokio::spawn(async {
let uds = UnixListener::bind(path_clone).unwrap();
let uds_stream = UnixListenerStream::new(uds);
// spin up a new server
let mut server = tonic::transport::Server::builder();
let router =
server.add_service(crate::proto::blob_service_server::BlobServiceServer::new(
GRPCBlobServiceWrapper::new(
Box::<MemoryBlobService>::default() as Box<dyn BlobService>
),
));
router.serve_with_incoming(uds_stream).await
});
// wait for the socket to be created
Retry::spawn(
ExponentialBackoff::from_millis(20).max_delay(Duration::from_secs(10)),
|| async {
if socket_path.exists() {
Ok(())
} else {
Err(())
}
},
)
.await
.expect("failed to wait for socket");
// prepare a client
let grpc_client = {
let url = url::Url::parse(&format!(
"grpc+unix://{}?wait-connect=1",
socket_path.display()
))
.expect("must parse");
let client = BlobServiceClient::new(
crate::tonic::channel_from_url(&url)
.await
.expect("must succeed"),
);
GRPCBlobService::from_client("root".into(), client)
};
let has = grpc_client
.has(&fixtures::BLOB_A_DIGEST)
.await
.expect("must not be err");
assert!(!has);
}
}

View file

@ -0,0 +1,159 @@
use parking_lot::RwLock;
use std::io::{self, Cursor, Write};
use std::task::Poll;
use std::{collections::HashMap, sync::Arc};
use tonic::async_trait;
use tracing::instrument;
use super::{BlobReader, BlobService, BlobWriter};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::{B3Digest, Error};
#[derive(Clone, Default)]
pub struct MemoryBlobService {
instance_name: String,
db: Arc<RwLock<HashMap<B3Digest, Vec<u8>>>>,
}
#[async_trait]
impl BlobService for MemoryBlobService {
#[instrument(skip_all, ret, err, fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn has(&self, digest: &B3Digest) -> io::Result<bool> {
let db = self.db.read();
Ok(db.contains_key(digest))
}
#[instrument(skip_all, err, fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn open_read(&self, digest: &B3Digest) -> io::Result<Option<Box<dyn BlobReader>>> {
let db = self.db.read();
match db.get(digest).map(|x| Cursor::new(x.clone())) {
Some(result) => Ok(Some(Box::new(result))),
None => Ok(None),
}
}
#[instrument(skip_all, fields(instance_name=%self.instance_name))]
async fn open_write(&self) -> Box<dyn BlobWriter> {
Box::new(MemoryBlobWriter::new(self.db.clone()))
}
}
#[derive(serde::Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct MemoryBlobServiceConfig {}
impl TryFrom<url::Url> for MemoryBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// memory doesn't support host or path in the URL.
if url.has_host() || !url.path().is_empty() {
return Err(Error::StorageError("invalid url".to_string()).into());
}
Ok(MemoryBlobServiceConfig {})
}
}
#[async_trait]
impl ServiceBuilder for MemoryBlobServiceConfig {
type Output = dyn BlobService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
Ok(Arc::new(MemoryBlobService {
instance_name: instance_name.to_string(),
db: Default::default(),
}))
}
}
pub struct MemoryBlobWriter {
db: Arc<RwLock<HashMap<B3Digest, Vec<u8>>>>,
/// Contains the buffer Vec and hasher, or None if already closed
writers: Option<(Vec<u8>, blake3::Hasher)>,
/// The digest that has been returned, if we successfully closed.
digest: Option<B3Digest>,
}
impl MemoryBlobWriter {
fn new(db: Arc<RwLock<HashMap<B3Digest, Vec<u8>>>>) -> Self {
Self {
db,
writers: Some((Vec::new(), blake3::Hasher::new())),
digest: None,
}
}
}
impl tokio::io::AsyncWrite for MemoryBlobWriter {
fn poll_write(
mut self: std::pin::Pin<&mut Self>,
_cx: &mut std::task::Context<'_>,
b: &[u8],
) -> std::task::Poll<Result<usize, io::Error>> {
Poll::Ready(match &mut self.writers {
None => Err(io::Error::new(
io::ErrorKind::NotConnected,
"already closed",
)),
Some((ref mut buf, ref mut hasher)) => {
let bytes_written = buf.write(b)?;
hasher.write(&b[..bytes_written])
}
})
}
fn poll_flush(
self: std::pin::Pin<&mut Self>,
_cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Result<(), io::Error>> {
Poll::Ready(match self.writers {
None => Err(io::Error::new(
io::ErrorKind::NotConnected,
"already closed",
)),
Some(_) => Ok(()),
})
}
fn poll_shutdown(
self: std::pin::Pin<&mut Self>,
_cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Result<(), io::Error>> {
// shutdown is "instantaneous", we only write to memory.
Poll::Ready(Ok(()))
}
}
#[async_trait]
impl BlobWriter for MemoryBlobWriter {
async fn close(&mut self) -> io::Result<B3Digest> {
if self.writers.is_none() {
match &self.digest {
Some(digest) => Ok(digest.clone()),
None => Err(io::Error::new(io::ErrorKind::BrokenPipe, "already closed")),
}
} else {
let (buf, hasher) = self.writers.take().unwrap();
let digest: B3Digest = hasher.finalize().as_bytes().into();
// Only insert if the blob doesn't already exist.
let mut db = self.db.upgradable_read();
if !db.contains_key(&digest) {
// open the database for writing.
db.with_upgraded(|db| {
// and put buf in there. This will move buf out.
db.insert(digest.clone(), buf);
});
}
self.digest = Some(digest.clone());
Ok(digest)
}
}
}

View file

@ -0,0 +1,92 @@
use std::io;
use auto_impl::auto_impl;
use tonic::async_trait;
use crate::composition::{Registry, ServiceBuilder};
use crate::proto::stat_blob_response::ChunkMeta;
use crate::B3Digest;
mod chunked_reader;
mod combinator;
mod from_addr;
mod grpc;
mod memory;
mod object_store;
#[cfg(test)]
pub mod tests;
pub use self::chunked_reader::ChunkedReader;
pub use self::combinator::{CombinedBlobService, CombinedBlobServiceConfig};
pub use self::from_addr::from_addr;
pub use self::grpc::{GRPCBlobService, GRPCBlobServiceConfig};
pub use self::memory::{MemoryBlobService, MemoryBlobServiceConfig};
pub use self::object_store::{ObjectStoreBlobService, ObjectStoreBlobServiceConfig};
/// The base trait all BlobService services need to implement.
/// It provides functions to check whether a given blob exists,
/// a way to read (and seek) a blob, and a method to create a blobwriter handle,
/// which will implement a writer interface, and also provides a close funtion,
/// to finalize a blob and get its digest.
#[async_trait]
#[auto_impl(&, &mut, Arc, Box)]
pub trait BlobService: Send + Sync {
/// Check if the service has the blob, by its content hash.
/// On implementations returning chunks, this must also work for chunks.
async fn has(&self, digest: &B3Digest) -> io::Result<bool>;
/// Request a blob from the store, by its content hash.
/// On implementations returning chunks, this must also work for chunks.
async fn open_read(&self, digest: &B3Digest) -> io::Result<Option<Box<dyn BlobReader>>>;
/// Insert a new blob into the store. Returns a [BlobWriter], which
/// implements [tokio::io::AsyncWrite] and a [BlobWriter::close] to finalize
/// the blob and get its digest.
async fn open_write(&self) -> Box<dyn BlobWriter>;
/// Return a list of chunks for a given blob.
/// There's a distinction between returning Ok(None) and Ok(Some(vec![])).
/// The former return value is sent in case the blob is not present at all,
/// while the second one is sent in case there's no more granular chunks (or
/// the backend does not support chunking).
/// A default implementation checking for existence and then returning it
/// does not have more granular chunks available is provided.
async fn chunks(&self, digest: &B3Digest) -> io::Result<Option<Vec<ChunkMeta>>> {
if !self.has(digest).await? {
return Ok(None);
}
// default implementation, signalling the backend does not have more
// granular chunks available.
Ok(Some(vec![]))
}
}
/// A [tokio::io::AsyncWrite] that the user needs to close() afterwards for persist.
/// On success, it returns the digest of the written blob.
#[async_trait]
pub trait BlobWriter: tokio::io::AsyncWrite + Send + Unpin {
/// Signal there's no more data to be written, and return the digest of the
/// contents written.
///
/// Closing a already-closed BlobWriter is a no-op.
async fn close(&mut self) -> io::Result<B3Digest>;
}
/// BlobReader is a [tokio::io::AsyncRead] that also allows seeking.
pub trait BlobReader: tokio::io::AsyncRead + tokio::io::AsyncSeek + Send + Unpin + 'static {}
/// A [`io::Cursor<Vec<u8>>`] can be used as a BlobReader.
impl BlobReader for io::Cursor<&'static [u8]> {}
impl BlobReader for io::Cursor<&'static [u8; 0]> {}
impl BlobReader for io::Cursor<Vec<u8>> {}
impl BlobReader for io::Cursor<bytes::Bytes> {}
impl BlobReader for tokio::fs::File {}
/// Registers the builtin BlobService implementations with the registry
pub(crate) fn register_blob_services(reg: &mut Registry) {
reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, super::blobservice::ObjectStoreBlobServiceConfig>("objectstore");
reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, super::blobservice::MemoryBlobServiceConfig>("memory");
reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, super::blobservice::CombinedBlobServiceConfig>("combined");
reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, super::blobservice::GRPCBlobServiceConfig>("grpc");
}

View file

@ -0,0 +1,634 @@
use std::{
collections::{hash_map, HashMap},
io::{self, Cursor},
pin::pin,
sync::Arc,
task::Poll,
};
use data_encoding::HEXLOWER;
use fastcdc::v2020::AsyncStreamCDC;
use futures::Future;
use object_store::{path::Path, ObjectStore};
use pin_project_lite::pin_project;
use prost::Message;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tokio_stream::StreamExt;
use tonic::async_trait;
use tracing::{debug, instrument, trace, Level};
use url::Url;
use crate::{
composition::{CompositionContext, ServiceBuilder},
proto::{stat_blob_response::ChunkMeta, StatBlobResponse},
B3Digest, B3HashingReader, Error,
};
use super::{BlobReader, BlobService, BlobWriter, ChunkedReader};
/// Uses any object storage supported by the [object_store] crate to provide a
/// snix-castore [BlobService].
///
/// # Data format
/// Data is organized in "blobs" and "chunks".
/// Blobs don't hold the actual data, but instead contain a list of more
/// granular chunks that assemble to the contents requested.
/// This allows clients to seek, and not download chunks they already have
/// locally, as it's referred to from other files.
/// Check `rpc_blobstore` and more general BlobStore docs on that.
///
/// ## Blobs
/// Stored at `${base_path}/blobs/b3/$digest_key`. They contains the serialized
/// StatBlobResponse for the blob with the digest.
///
/// ## Chunks
/// Chunks are stored at `${base_path}/chunks/b3/$digest_key`. They contain
/// the literal contents of the chunk, but are zstd-compressed.
///
/// ## Digest key sharding
/// The blake3 digest encoded in lower hex, and sharded after the second
/// character.
/// The blob for "Hello World" is stored at
/// `${base_path}/blobs/b3/41/41f8394111eb713a22165c46c90ab8f0fd9399c92028fd6d288944b23ff5bf76`.
///
/// This reduces the number of files in the same directory, which would be a
/// problem at least when using [object_store::local::LocalFileSystem].
///
/// # Future changes
/// There's no guarantees about this being a final format yet.
/// Once object_store gets support for additional metadata / content-types,
/// we can eliminate some requests (small blobs only consisting of a single
/// chunk can be stored as-is, without the blob index file).
/// It also allows signalling any compression of chunks in the content-type.
/// Migration *should* be possible by simply adding the right content-types to
/// all keys stored so far, but no promises ;-)
#[derive(Clone)]
pub struct ObjectStoreBlobService {
instance_name: String,
object_store: Arc<dyn ObjectStore>,
base_path: Path,
/// Average chunk size for FastCDC, in bytes.
/// min value is half, max value double of that number.
avg_chunk_size: u32,
}
#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
fn derive_blob_path(base_path: &Path, digest: &B3Digest) -> Path {
base_path
.child("blobs")
.child("b3")
.child(HEXLOWER.encode(&digest[..2]))
.child(HEXLOWER.encode(&digest[..]))
}
#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,chunk.digest=%digest),ret(Display))]
fn derive_chunk_path(base_path: &Path, digest: &B3Digest) -> Path {
base_path
.child("chunks")
.child("b3")
.child(HEXLOWER.encode(&digest[..2]))
.child(HEXLOWER.encode(&digest[..]))
}
#[async_trait]
impl BlobService for ObjectStoreBlobService {
#[instrument(skip_all, ret(level = Level::TRACE), err, fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn has(&self, digest: &B3Digest) -> io::Result<bool> {
// TODO: clarify if this should work for chunks or not, and explicitly
// document in the proto docs.
let p = derive_blob_path(&self.base_path, digest);
match self.object_store.head(&p).await {
Ok(_) => Ok(true),
Err(object_store::Error::NotFound { .. }) => {
let p = derive_chunk_path(&self.base_path, digest);
match self.object_store.head(&p).await {
Ok(_) => Ok(true),
Err(object_store::Error::NotFound { .. }) => Ok(false),
Err(e) => Err(e)?,
}
}
Err(e) => Err(e)?,
}
}
#[instrument(skip_all, err, fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn open_read(&self, digest: &B3Digest) -> io::Result<Option<Box<dyn BlobReader>>> {
// handle reading the empty blob.
if digest.as_slice() == blake3::hash(b"").as_bytes() {
return Ok(Some(Box::new(Cursor::new(b"")) as Box<dyn BlobReader>));
}
match self
.object_store
.get(&derive_chunk_path(&self.base_path, digest))
.await
{
Ok(res) => {
// handle reading blobs that are small enough to fit inside a single chunk:
// fetch the entire chunk into memory, decompress, ensure the b3 digest matches,
// and return a io::Cursor over that data.
// FUTUREWORK: use zstd::bulk to prevent decompression bombs
let chunk_raw_bytes = res.bytes().await?;
let chunk_contents = zstd::stream::decode_all(Cursor::new(chunk_raw_bytes))?;
if *digest != blake3::hash(&chunk_contents).as_bytes().into() {
Err(io::Error::other("chunk contents invalid"))?;
}
Ok(Some(Box::new(Cursor::new(chunk_contents))))
}
Err(object_store::Error::NotFound { .. }) => {
// NOTE: For public-facing things, we would want to stop here.
// Clients should fetch granularly, so they can make use of
// chunks they have locally.
// However, if this is used directly, without any caches, do the
// assembly here.
// This is subject to change, once we have store composition.
// TODO: make this configurable, and/or clarify behaviour for
// the gRPC server surface (explicitly document behaviour in the
// proto docs)
if let Some(chunks) = self.chunks(digest).await? {
let chunked_reader = ChunkedReader::from_chunks(
chunks.into_iter().map(|chunk| {
(
chunk.digest.try_into().expect("invalid b3 digest"),
chunk.size,
)
}),
Arc::new(self.clone()) as Arc<dyn BlobService>,
);
Ok(Some(Box::new(chunked_reader)))
} else {
// This is neither a chunk nor a blob, return None.
Ok(None)
}
}
Err(e) => Err(e.into()),
}
}
#[instrument(skip_all, fields(instance_name=%self.instance_name))]
async fn open_write(&self) -> Box<dyn BlobWriter> {
// ObjectStoreBlobWriter implements AsyncWrite, but all the chunking
// needs an AsyncRead, so we create a pipe here.
// In its `AsyncWrite` implementation, `ObjectStoreBlobWriter` delegates
// writes to w. It periodically polls the future that's reading from the
// other side.
let (w, r) = tokio::io::duplex(self.avg_chunk_size as usize * 10);
Box::new(ObjectStoreBlobWriter {
writer: Some(w),
fut: Some(Box::pin(chunk_and_upload(
r,
self.object_store.clone(),
self.base_path.clone(),
self.avg_chunk_size / 2,
self.avg_chunk_size,
self.avg_chunk_size * 2,
))),
fut_output: None,
})
}
#[instrument(skip_all, err, fields(blob.digest=%digest, instance_name=%self.instance_name))]
async fn chunks(&self, digest: &B3Digest) -> io::Result<Option<Vec<ChunkMeta>>> {
match self
.object_store
.get(&derive_blob_path(&self.base_path, digest))
.await
{
Ok(get_result) => {
// fetch the data at the blob path
let blob_data = get_result.bytes().await?;
// parse into StatBlobResponse
let stat_blob_response: StatBlobResponse = StatBlobResponse::decode(blob_data)?;
debug!(
chunk.count = stat_blob_response.chunks.len(),
blob.size = stat_blob_response
.chunks
.iter()
.map(|x| x.size)
.sum::<u64>(),
"found more granular chunks"
);
Ok(Some(stat_blob_response.chunks))
}
Err(object_store::Error::NotFound { .. }) => {
// If there's only a chunk, we must return the empty vec here, rather than None.
match self
.object_store
.head(&derive_chunk_path(&self.base_path, digest))
.await
{
Ok(_) => {
// present, but no more chunks available
debug!("found a single chunk");
Ok(Some(vec![]))
}
Err(object_store::Error::NotFound { .. }) => {
// Neither blob nor single chunk found
debug!("not found");
Ok(None)
}
// error checking for chunk
Err(e) => Err(e.into()),
}
}
// error checking for blob
Err(err) => Err(err.into()),
}
}
}
fn default_avg_chunk_size() -> u32 {
256 * 1024
}
#[derive(serde::Deserialize)]
#[serde(deny_unknown_fields)]
pub struct ObjectStoreBlobServiceConfig {
object_store_url: String,
#[serde(default = "default_avg_chunk_size")]
avg_chunk_size: u32,
object_store_options: HashMap<String, String>,
}
impl TryFrom<url::Url> for ObjectStoreBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
/// Constructs a new [ObjectStoreBlobService] from a [Url] supported by
/// [object_store].
/// Any path suffix becomes the base path of the object store.
/// additional options, the same as in [object_store::parse_url_opts] can
/// be passed.
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// We need to convert the URL to string, strip the prefix there, and then
// parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
let trimmed_url = {
let s = url.to_string();
let mut url = Url::parse(
s.strip_prefix("objectstore+")
.ok_or(Error::StorageError("Missing objectstore uri".into()))?,
)?;
// trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
url.set_query(None);
url
};
Ok(ObjectStoreBlobServiceConfig {
object_store_url: trimmed_url.into(),
object_store_options: url
.query_pairs()
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
avg_chunk_size: 256 * 1024,
})
}
}
#[async_trait]
impl ServiceBuilder for ObjectStoreBlobServiceConfig {
type Output = dyn BlobService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
let opts = {
let mut opts: HashMap<&str, _> = self
.object_store_options
.iter()
.map(|(k, v)| (k.as_str(), v.as_str()))
.collect();
if let hash_map::Entry::Vacant(e) =
opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
{
e.insert(crate::USER_AGENT);
}
opts
};
let (object_store, path) =
object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
Ok(Arc::new(ObjectStoreBlobService {
instance_name: instance_name.to_string(),
object_store: Arc::new(object_store),
base_path: path,
avg_chunk_size: self.avg_chunk_size,
}))
}
}
/// Reads blob contents from a AsyncRead, chunks and uploads them.
/// On success, returns a [StatBlobResponse] pointing to the individual chunks.
#[instrument(skip_all, fields(base_path=%base_path, min_chunk_size, avg_chunk_size, max_chunk_size), err)]
async fn chunk_and_upload<R: AsyncRead + Unpin>(
r: R,
object_store: Arc<dyn ObjectStore>,
base_path: Path,
min_chunk_size: u32,
avg_chunk_size: u32,
max_chunk_size: u32,
) -> io::Result<B3Digest> {
// wrap reader with something calculating the blake3 hash of all data read.
let mut b3_r = B3HashingReader::from(r);
// set up a fastcdc chunker
let mut chunker =
AsyncStreamCDC::new(&mut b3_r, min_chunk_size, avg_chunk_size, max_chunk_size);
/// This really should just belong into the closure at
/// `chunker.as_stream().then(|_| { … })``, but if we try to, rustc spits
/// higher-ranked lifetime errors at us.
async fn fastcdc_chunk_uploader(
resp: Result<fastcdc::v2020::ChunkData, fastcdc::v2020::Error>,
base_path: Path,
object_store: Arc<dyn ObjectStore>,
) -> std::io::Result<ChunkMeta> {
let chunk_data = resp?;
let chunk_digest: B3Digest = blake3::hash(&chunk_data.data).as_bytes().into();
let chunk_path = derive_chunk_path(&base_path, &chunk_digest);
upload_chunk(object_store, chunk_digest, chunk_path, chunk_data.data).await
}
// Use the fastcdc chunker to produce a stream of chunks, and upload these
// that don't exist to the backend.
let chunks = chunker
.as_stream()
.then(|resp| fastcdc_chunk_uploader(resp, base_path.clone(), object_store.clone()))
.collect::<io::Result<Vec<ChunkMeta>>>()
.await?;
let chunks = if chunks.len() < 2 {
// The chunker returned only one chunk, which is the entire blob.
// According to the protocol, we must return an empty list of chunks
// when the blob is not split up further.
vec![]
} else {
chunks
};
let stat_blob_response = StatBlobResponse {
chunks,
bao: "".into(), // still todo
};
// check for Blob, if it doesn't exist, persist.
let blob_digest: B3Digest = b3_r.digest().into();
let blob_path = derive_blob_path(&base_path, &blob_digest);
match object_store.head(&blob_path).await {
// blob already exists, nothing to do
Ok(_) => {
trace!(
blob.digest = %blob_digest,
blob.path = %blob_path,
"blob already exists on backend"
);
}
// chunk does not yet exist, upload first
Err(object_store::Error::NotFound { .. }) => {
debug!(
blob.digest = %blob_digest,
blob.path = %blob_path,
"uploading blob"
);
object_store
.put(&blob_path, stat_blob_response.encode_to_vec().into())
.await?;
}
Err(err) => {
// other error
Err(err)?
}
}
Ok(blob_digest)
}
/// upload chunk if it doesn't exist yet.
#[instrument(skip_all, fields(chunk.digest = %chunk_digest, chunk.size = chunk_data.len(), chunk.path = %chunk_path), err)]
async fn upload_chunk(
object_store: Arc<dyn ObjectStore>,
chunk_digest: B3Digest,
chunk_path: Path,
chunk_data: Vec<u8>,
) -> std::io::Result<ChunkMeta> {
let chunk_size = chunk_data.len();
match object_store.head(&chunk_path).await {
// chunk already exists, nothing to do
Ok(_) => {
debug!("chunk already exists");
}
// chunk does not yet exist, compress and upload.
Err(object_store::Error::NotFound { .. }) => {
let chunk_data_compressed =
zstd::encode_all(Cursor::new(chunk_data), zstd::DEFAULT_COMPRESSION_LEVEL)?;
debug!(chunk.compressed_size=%chunk_data_compressed.len(), "uploading chunk");
object_store
.as_ref()
.put(&chunk_path, chunk_data_compressed.into())
.await?;
}
// other error
Err(err) => Err(err)?,
}
Ok(ChunkMeta {
digest: chunk_digest.into(),
size: chunk_size as u64,
})
}
pin_project! {
/// Takes care of blob uploads.
/// All writes are relayed to self.writer, and we continuously poll the
/// future (which will internally read from the other side of the pipe and
/// upload chunks).
/// Our BlobWriter::close() needs to drop self.writer, so the other side
/// will read EOF and can finalize the blob.
/// The future should then resolve and return the blob digest.
pub struct ObjectStoreBlobWriter<W, Fut>
where
W: AsyncWrite,
Fut: Future,
{
#[pin]
writer: Option<W>,
#[pin]
fut: Option<Fut>,
fut_output: Option<io::Result<B3Digest>>
}
}
impl<W, Fut> tokio::io::AsyncWrite for ObjectStoreBlobWriter<W, Fut>
where
W: AsyncWrite + Send + Unpin,
Fut: Future,
{
fn poll_write(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &[u8],
) -> std::task::Poll<Result<usize, io::Error>> {
let this = self.project();
// poll the future.
let fut = this.fut.as_pin_mut().expect("not future");
let fut_p = fut.poll(cx);
// if it's ready, the only way this could have happened is that the
// upload failed, because we're only closing `self.writer` after all
// writes happened.
if fut_p.is_ready() {
return Poll::Ready(Err(io::Error::other("upload failed")));
}
// write to the underlying writer
this.writer
.as_pin_mut()
.expect("writer must be some")
.poll_write(cx, buf)
}
fn poll_flush(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Result<(), io::Error>> {
let this = self.project();
// poll the future.
let fut = this.fut.as_pin_mut().expect("not future");
let fut_p = fut.poll(cx);
// if it's ready, the only way this could have happened is that the
// upload failed, because we're only closing `self.writer` after all
// writes happened.
if fut_p.is_ready() {
return Poll::Ready(Err(io::Error::other("upload failed")));
}
// Call poll_flush on the writer
this.writer
.as_pin_mut()
.expect("writer must be some")
.poll_flush(cx)
}
fn poll_shutdown(
self: std::pin::Pin<&mut Self>,
_cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Result<(), io::Error>> {
// There's nothing to do on shutdown. We might have written some chunks
// that are nowhere else referenced, but cleaning them up here would be racy.
std::task::Poll::Ready(Ok(()))
}
}
#[async_trait]
impl<W, Fut> BlobWriter for ObjectStoreBlobWriter<W, Fut>
where
W: AsyncWrite + Send + Unpin,
Fut: Future<Output = io::Result<B3Digest>> + Send + Unpin,
{
async fn close(&mut self) -> io::Result<B3Digest> {
match self.writer.take() {
Some(mut writer) => {
// shut down the writer, so the other side will read EOF.
writer.shutdown().await?;
// take out the future.
let fut = self.fut.take().expect("fut must be some");
// await it.
let resp = pin!(fut).await;
match resp.as_ref() {
// In the case of an Ok value, we store it in self.fut_output,
// so future calls to close can return that.
Ok(b3_digest) => {
self.fut_output = Some(Ok(b3_digest.clone()));
}
Err(e) => {
// for the error type, we need to cheat a bit, as
// they're not clone-able.
// Simply store a sloppy clone, with the same ErrorKind and message there.
self.fut_output = Some(Err(std::io::Error::new(e.kind(), e.to_string())))
}
}
resp
}
None => {
// called a second time, return self.fut_output.
match self.fut_output.as_ref().unwrap() {
Ok(ref b3_digest) => Ok(b3_digest.clone()),
Err(e) => Err(std::io::Error::new(e.kind(), e.to_string())),
}
}
}
}
}
#[cfg(test)]
mod test {
use super::{chunk_and_upload, default_avg_chunk_size};
use crate::{
blobservice::{BlobService, ObjectStoreBlobService},
fixtures::{BLOB_A, BLOB_A_DIGEST, BLOB_B, BLOB_B_DIGEST},
};
use std::{io::Cursor, sync::Arc};
use url::Url;
/// Tests chunk_and_upload directly, bypassing the BlobWriter at open_write().
#[rstest::rstest]
#[case::a(&BLOB_A, &BLOB_A_DIGEST)]
#[case::b(&BLOB_B, &BLOB_B_DIGEST)]
#[tokio::test]
async fn test_chunk_and_upload(
#[case] blob: &bytes::Bytes,
#[case] blob_digest: &crate::B3Digest,
) {
let (object_store, base_path) =
object_store::parse_url(&Url::parse("memory:///").unwrap()).unwrap();
let object_store: Arc<dyn object_store::ObjectStore> = Arc::from(object_store);
let blobsvc = Arc::new(ObjectStoreBlobService {
instance_name: "test".into(),
object_store: object_store.clone(),
avg_chunk_size: default_avg_chunk_size(),
base_path,
});
let inserted_blob_digest = chunk_and_upload(
&mut Cursor::new(blob.to_vec()),
object_store,
object_store::path::Path::from("/"),
1024 / 2,
1024,
1024 * 2,
)
.await
.expect("chunk_and_upload succeeds");
assert_eq!(blob_digest.clone(), inserted_blob_digest);
// Now we should have the blob
assert!(blobsvc.has(blob_digest).await.unwrap());
// Check if it was chunked correctly
let chunks = blobsvc.chunks(blob_digest).await.unwrap().unwrap();
if blob.len() < 1024 / 2 {
// The blob is smaller than the min chunk size, it should have been inserted as a whole
assert!(chunks.is_empty());
} else if blob.len() > 1024 * 2 {
// The blob is larger than the max chunk size, make sure it was split up into at least
// two chunks
assert!(chunks.len() >= 2);
}
}
}

View file

@ -0,0 +1,253 @@
//! This contains test scenarios that a given [BlobService] needs to pass.
//! We use [rstest] and [rstest_reuse] to provide all services we want to test
//! against, and then apply this template to all test functions.
use rstest::*;
use rstest_reuse::{self, *};
use std::io;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
use super::BlobService;
use crate::blobservice;
use crate::fixtures::BLOB_A;
use crate::fixtures::BLOB_A_DIGEST;
use crate::fixtures::BLOB_B;
use crate::fixtures::BLOB_B_DIGEST;
mod utils;
use self::utils::make_grpc_blob_service_client;
/// This produces a template, which will be applied to all individual test functions.
/// See https://github.com/la10736/rstest/issues/130#issuecomment-968864832
#[template]
#[rstest]
#[case::grpc(make_grpc_blob_service_client().await)]
#[case::memory(blobservice::from_addr("memory://").await.unwrap())]
#[case::objectstore_memory(blobservice::from_addr("objectstore+memory://").await.unwrap())]
pub fn blob_services(#[case] blob_service: impl BlobService) {}
/// Using [BlobService::has] on a non-existing blob should return false.
#[apply(blob_services)]
#[tokio::test]
async fn has_nonexistent_false(blob_service: impl BlobService) {
assert!(!blob_service
.has(&BLOB_A_DIGEST)
.await
.expect("must not fail"));
}
/// Using [BlobService::chunks] on a non-existing blob should return Ok(None)
#[apply(blob_services)]
#[tokio::test]
async fn chunks_nonexistent_false(blob_service: impl BlobService) {
assert!(blob_service
.chunks(&BLOB_A_DIGEST)
.await
.expect("must be ok")
.is_none());
}
// TODO: do tests with `chunks`
/// Trying to read a non-existing blob should return a None instead of a reader.
#[apply(blob_services)]
#[tokio::test]
async fn not_found_read(blob_service: impl BlobService) {
assert!(blob_service
.open_read(&BLOB_A_DIGEST)
.await
.expect("must not fail")
.is_none())
}
/// Put a blob in the store, check has, get it back.
#[apply(blob_services)]
// #[case::small(&fixtures::BLOB_A, &fixtures::BLOB_A_DIGEST)]
// #[case::big(&fixtures::BLOB_B, &fixtures::BLOB_B_DIGEST)]
#[tokio::test]
async fn put_has_get(blob_service: impl BlobService) {
// TODO: figure out how to instantiate this with BLOB_A and BLOB_B, as two separate cases
for (blob_contents, blob_digest) in &[
(&*BLOB_A, BLOB_A_DIGEST.clone()),
(&*BLOB_B, BLOB_B_DIGEST.clone()),
] {
let mut w = blob_service.open_write().await;
let l = tokio::io::copy(&mut io::Cursor::new(blob_contents), &mut w)
.await
.expect("copy must succeed");
assert_eq!(
blob_contents.len(),
l as usize,
"written bytes must match blob length"
);
let digest = w.close().await.expect("close must succeed");
assert_eq!(*blob_digest, digest, "returned digest must be correct");
assert!(
blob_service.has(blob_digest).await.expect("must not fail"),
"blob service should now have the blob"
);
let mut r = blob_service
.open_read(blob_digest)
.await
.expect("open_read must succeed")
.expect("must be some");
let mut buf: Vec<u8> = Vec::new();
let mut pinned_reader = std::pin::pin!(r);
let l = tokio::io::copy(&mut pinned_reader, &mut buf)
.await
.expect("copy must succeed");
assert_eq!(
blob_contents.len(),
l as usize,
"read bytes must match blob length"
);
assert_eq!(&blob_contents[..], &buf, "read blob contents must match");
}
}
/// Put a blob in the store, and seek inside it a bit.
#[apply(blob_services)]
#[tokio::test]
async fn put_seek(blob_service: impl BlobService) {
let mut w = blob_service.open_write().await;
tokio::io::copy(&mut io::Cursor::new(&BLOB_B.to_vec()), &mut w)
.await
.expect("copy must succeed");
w.close().await.expect("close must succeed");
// open a blob for reading
let mut r = blob_service
.open_read(&BLOB_B_DIGEST)
.await
.expect("open_read must succeed")
.expect("must be some");
let mut pos: u64 = 0;
// read the first 10 bytes, they must match the data in the fixture.
{
let mut buf = [0; 10];
r.read_exact(&mut buf).await.expect("must succeed");
assert_eq!(
&BLOB_B[pos as usize..pos as usize + buf.len()],
buf,
"expected first 10 bytes to match"
);
pos += buf.len() as u64;
}
// seek by 0 bytes, using SeekFrom::Start.
let p = r
.seek(io::SeekFrom::Start(pos))
.await
.expect("must not fail");
assert_eq!(pos, p);
// read the next 10 bytes, they must match the data in the fixture.
{
let mut buf = [0; 10];
r.read_exact(&mut buf).await.expect("must succeed");
assert_eq!(
&BLOB_B[pos as usize..pos as usize + buf.len()],
buf,
"expected data to match"
);
pos += buf.len() as u64;
}
// seek by 5 bytes, using SeekFrom::Start.
let p = r
.seek(io::SeekFrom::Start(pos + 5))
.await
.expect("must not fail");
pos += 5;
assert_eq!(pos, p);
// read the next 10 bytes, they must match the data in the fixture.
{
let mut buf = [0; 10];
r.read_exact(&mut buf).await.expect("must succeed");
assert_eq!(
&BLOB_B[pos as usize..pos as usize + buf.len()],
buf,
"expected data to match"
);
pos += buf.len() as u64;
}
// seek by 12345 bytes, using SeekFrom::
let p = r
.seek(io::SeekFrom::Current(12345))
.await
.expect("must not fail");
pos += 12345;
assert_eq!(pos, p);
// read the next 10 bytes, they must match the data in the fixture.
{
let mut buf = [0; 10];
r.read_exact(&mut buf).await.expect("must succeed");
assert_eq!(
&BLOB_B[pos as usize..pos as usize + buf.len()],
buf,
"expected data to match"
);
#[allow(unused_assignments)]
{
pos += buf.len() as u64;
}
}
// seeking to the end is okay…
let p = r
.seek(io::SeekFrom::Start(BLOB_B.len() as u64))
.await
.expect("must not fail");
pos = BLOB_B.len() as u64;
assert_eq!(pos, p);
{
// but it returns no more data.
let mut buf: Vec<u8> = Vec::new();
r.read_to_end(&mut buf).await.expect("must not fail");
assert!(buf.is_empty(), "expected no more data to be read");
}
// seeking past the end…
// should either be ok, but then return 0 bytes.
// this matches the behaviour or a Cursor<Vec<u8>>.
if let Ok(_pos) = r.seek(io::SeekFrom::Start(BLOB_B.len() as u64 + 1)).await {
let mut buf: Vec<u8> = Vec::new();
r.read_to_end(&mut buf).await.expect("must not fail");
assert!(buf.is_empty(), "expected no more data to be read");
}
// or not be okay.
// TODO: this is only broken for the gRPC version
// We expect seeking backwards or relative to the end to fail.
// r.seek(io::SeekFrom::Current(-1))
// .expect_err("SeekFrom::Current(-1) expected to fail");
// r.seek(io::SeekFrom::Start(pos - 1))
// .expect_err("SeekFrom::Start(pos-1) expected to fail");
// r.seek(io::SeekFrom::End(0))
// .expect_err("SeekFrom::End(_) expected to fail");
}

View file

@ -0,0 +1,45 @@
use crate::blobservice::{BlobService, MemoryBlobService};
use crate::proto::blob_service_client::BlobServiceClient;
use crate::proto::GRPCBlobServiceWrapper;
use crate::{blobservice::GRPCBlobService, proto::blob_service_server::BlobServiceServer};
use hyper_util::rt::TokioIo;
use tonic::transport::{Endpoint, Server, Uri};
/// Constructs and returns a gRPC BlobService.
/// The server part is a [MemoryBlobService], exposed via the
/// [GRPCBlobServiceWrapper], and connected through a DuplexStream
pub async fn make_grpc_blob_service_client() -> Box<dyn BlobService> {
let (left, right) = tokio::io::duplex(64);
// spin up a server, which will only connect once, to the left side.
tokio::spawn(async {
let blob_service = Box::<MemoryBlobService>::default() as Box<dyn BlobService>;
// spin up a new DirectoryService
let mut server = Server::builder();
let router = server.add_service(BlobServiceServer::new(GRPCBlobServiceWrapper::new(
blob_service,
)));
router
.serve_with_incoming(tokio_stream::once(Ok::<_, std::io::Error>(left)))
.await
});
// Create a client, connecting to the right side. The URI is unused.
let mut maybe_right = Some(right);
Box::new(GRPCBlobService::from_client(
"root".into(),
BlobServiceClient::new(
Endpoint::try_from("http://[::]:50051")
.unwrap()
.connect_with_connector(tower::service_fn(move |_: Uri| {
let right = maybe_right.take().unwrap();
async move { Ok::<_, std::io::Error>(TokioIo::new(right)) }
}))
.await
.unwrap(),
),
))
}

View file

@ -0,0 +1,582 @@
//! The composition module allows composing different kinds of services based on a set of service
//! configurations _at runtime_.
//!
//! Store configs are deserialized with serde. The registry provides a stateful mapping from the
//! `type` tag of an internally tagged enum on the serde side to a Config struct which is
//! deserialized and then returned as a `Box<dyn ServiceBuilder<Output = dyn BlobService>>`
//! (the same for DirectoryService instead of BlobService etc).
//!
//! ### Example 1.: Implementing a new BlobService
//!
//! You need a Config struct which implements `DeserializeOwned` and
//! `ServiceBuilder<Output = dyn BlobService>`.
//! Provide the user with a function to call with
//! their registry. You register your new type as:
//!
//! ```
//! use std::sync::Arc;
//!
//! use snix_castore::composition::*;
//! use snix_castore::blobservice::BlobService;
//!
//! #[derive(serde::Deserialize)]
//! struct MyBlobServiceConfig {
//! }
//!
//! #[tonic::async_trait]
//! impl ServiceBuilder for MyBlobServiceConfig {
//! type Output = dyn BlobService;
//! async fn build(&self, _: &str, _: &CompositionContext) -> Result<Arc<Self::Output>, Box<dyn std::error::Error + Send + Sync + 'static>> {
//! todo!()
//! }
//! }
//!
//! impl TryFrom<url::Url> for MyBlobServiceConfig {
//! type Error = Box<dyn std::error::Error + Send + Sync>;
//! fn try_from(url: url::Url) -> Result<Self, Self::Error> {
//! todo!()
//! }
//! }
//!
//! pub fn add_my_service(reg: &mut Registry) {
//! reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, MyBlobServiceConfig>("myblobservicetype");
//! }
//! ```
//!
//! Now, when a user deserializes a store config with the type tag "myblobservicetype" into a
//! `Box<dyn ServiceBuilder<Output = Arc<dyn BlobService>>>`, it will be done via `MyBlobServiceConfig`.
//!
//! ### Example 2.: Composing stores to get one store
//!
//! ```
//! use std::sync::Arc;
//! use snix_castore::composition::*;
//! use snix_castore::blobservice::BlobService;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async move {
//! let blob_services_configs_json = serde_json::json!({
//! "blobstore1": {
//! "type": "memory"
//! },
//! "blobstore2": {
//! "type": "memory"
//! },
//! "root": {
//! "type": "combined",
//! "near": "blobstore1",
//! "far": "blobstore2"
//! }
//! });
//!
//! let blob_services_configs = with_registry(&REG, || serde_json::from_value(blob_services_configs_json))?;
//! let mut blob_service_composition = Composition::new(&REG);
//! blob_service_composition.extend_with_configs::<dyn BlobService>(blob_services_configs);
//! let blob_service: Arc<dyn BlobService> = blob_service_composition.build("root").await?;
//! # Ok(())
//! # })
//! # }
//! ```
//!
//! ### Example 3.: Creating another registry extending the default registry with third-party types
//!
//! ```
//! # pub fn add_my_service(reg: &mut snix_castore::composition::Registry) {}
//! let mut my_registry = snix_castore::composition::Registry::default();
//! snix_castore::composition::add_default_services(&mut my_registry);
//! add_my_service(&mut my_registry);
//! ```
//!
//! Continue with Example 2, with my_registry instead of REG
//!
//! EXPERIMENTAL: If the xp-composition-url-refs feature is enabled,
//! entrypoints can also be URL strings, which are created as
//! anonymous stores. Instantiations of the same URL will
//! result in a new, distinct anonymous store each time, so creating
//! two `memory://` stores with this method will not share the same view.
//! This behavior might change in the future.
use erased_serde::deserialize;
use futures::future::BoxFuture;
use futures::FutureExt;
use serde::de::DeserializeOwned;
use serde_tagged::de::{BoxFnSeed, SeedFactory};
use serde_tagged::util::TagString;
use std::any::{Any, TypeId};
use std::cell::Cell;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::sync::{Arc, LazyLock};
use tonic::async_trait;
/// Resolves tag names to the corresponding Config type.
// Registry implementation details:
// This is really ugly. Really we would want to store this as a generic static field:
//
// ```
// struct Registry<T>(BTreeMap<(&'static str), RegistryEntry<T>);
// static REG<T>: Registry<T>;
// ```
//
// so that one version of the static is generated for each Type that the registry is accessed for.
// However, this is not possible, because generics are only a thing in functions, and even there
// they will not interact with static items:
// https://doc.rust-lang.org/reference/items/static-items.html#statics--generics
//
// So instead, we make this lookup at runtime by putting the TypeId into the key.
// But now we can no longer store the `BoxFnSeed<T>` because we are lacking the generic parameter
// T, so instead store it as `Box<dyn Any>` and downcast to `&BoxFnSeed<T>` when performing the
// lookup.
// I said it was ugly...
#[derive(Default)]
pub struct Registry(BTreeMap<(TypeId, &'static str), Box<dyn Any + Sync>>);
pub type FromUrlSeed<T> =
Box<dyn Fn(url::Url) -> Result<T, Box<dyn std::error::Error + Send + Sync>> + Sync>;
pub struct RegistryEntry<T> {
serde_deserialize_seed: BoxFnSeed<DeserializeWithRegistry<T>>,
from_url_seed: FromUrlSeed<DeserializeWithRegistry<T>>,
}
struct RegistryWithFakeType<'r, T>(&'r Registry, PhantomData<T>);
impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithFakeType<'r, T> {
type Value = DeserializeWithRegistry<T>;
type Seed = &'r BoxFnSeed<Self::Value>;
// Required method
fn seed<E>(self, tag: TagString<'de>) -> Result<Self::Seed, E>
where
E: serde::de::Error,
{
// using find() and not get() because of https://github.com/rust-lang/rust/issues/80389
let seed: &Box<dyn Any + Sync> = self
.0
.0
.iter()
.find(|(k, _)| *k == &(TypeId::of::<T>(), tag.as_ref()))
.ok_or_else(|| serde::de::Error::custom(format!("Unknown type: {}", tag)))?
.1;
let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap();
Ok(&entry.serde_deserialize_seed)
}
}
/// Wrapper type which implements Deserialize using the registry
///
/// Wrap your type in this in order to deserialize it using a registry, e.g.
/// `RegistryWithFakeType<Box<dyn MyTrait>>`, then the types registered for `Box<dyn MyTrait>`
/// will be used.
pub struct DeserializeWithRegistry<T>(pub T);
impl Registry {
/// Registers a mapping from type tag to a concrete type into the registry.
///
/// The type parameters are very important:
/// After calling `register::<Box<dyn FooTrait>, FooStruct>("footype")`, when a user
/// deserializes into an input with the type tag "myblobservicetype" into a
/// `Box<dyn FooTrait>`, it will first call the Deserialize imple of `FooStruct` and
/// then convert it into a `Box<dyn FooTrait>` using From::from.
pub fn register<
T: 'static,
C: DeserializeOwned
+ TryFrom<url::Url, Error = Box<dyn std::error::Error + Send + Sync>>
+ Into<T>,
>(
&mut self,
type_name: &'static str,
) {
self.0.insert(
(TypeId::of::<T>(), type_name),
Box::new(RegistryEntry {
serde_deserialize_seed: BoxFnSeed::new(|x| {
deserialize::<C>(x)
.map(Into::into)
.map(DeserializeWithRegistry)
}),
from_url_seed: Box::new(|url| {
C::try_from(url)
.map(Into::into)
.map(DeserializeWithRegistry)
}),
}),
);
}
}
impl<'de, T: 'static> serde::Deserialize<'de> for DeserializeWithRegistry<T> {
fn deserialize<D>(de: D) -> std::result::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
serde_tagged::de::internal::deserialize(
de,
"type",
RegistryWithFakeType(ACTIVE_REG.get().unwrap(), PhantomData::<T>),
)
}
}
#[derive(Debug, thiserror::Error)]
enum TryFromUrlError {
#[error("Unknown type: {0}")]
UnknownTag(String),
}
impl<T: 'static> TryFrom<url::Url> for DeserializeWithRegistry<T> {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
let tag = url.scheme().split('+').next().unwrap();
// same as in the SeedFactory impl: using find() and not get() because of https://github.com/rust-lang/rust/issues/80389
let seed = ACTIVE_REG
.get()
.unwrap()
.0
.iter()
.find(|(k, _)| *k == &(TypeId::of::<T>(), tag))
.ok_or_else(|| Box::new(TryFromUrlError::UnknownTag(tag.into())))?
.1;
let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap();
(entry.from_url_seed)(url)
}
}
thread_local! {
/// The active Registry is global state, because there is no convenient and universal way to pass state
/// into the functions usually used for deserialization, e.g. `serde_json::from_str`, `toml::from_str`,
/// `serde_qs::from_str`.
static ACTIVE_REG: Cell<Option<&'static Registry>> = panic!("reg was accessed before initialization");
}
/// Run the provided closure with a registry context.
/// Any serde deserialize calls within the closure will use the registry to resolve tag names to
/// the corresponding Config type.
pub fn with_registry<R>(reg: &'static Registry, f: impl FnOnce() -> R) -> R {
ACTIVE_REG.set(Some(reg));
let result = f();
ACTIVE_REG.set(None);
result
}
/// The provided registry of snix_castore, with all builtin BlobStore/DirectoryStore implementations
pub static REG: LazyLock<&'static Registry> = LazyLock::new(|| {
let mut reg = Default::default();
add_default_services(&mut reg);
// explicitly leak to get an &'static, so that we gain `&Registry: Send` from `Registry: Sync`
Box::leak(Box::new(reg))
});
// ---------- End of generic registry code --------- //
/// Register the builtin services of snix_castore (blob services and directory
/// services) with the given registry.
/// This can be used outside to create your own registry with the builtin types
/// _and_ extra third party types.
pub fn add_default_services(reg: &mut Registry) {
crate::blobservice::register_blob_services(reg);
crate::directoryservice::register_directory_services(reg);
}
pub struct CompositionContext<'a> {
// The stack used to detect recursive instantiations and prevent deadlocks
// The TypeId of the trait object is included to distinguish e.g. the
// BlobService "root" and the DirectoryService "root".
stack: Vec<(TypeId, String)>,
registry: &'static Registry,
composition: Option<&'a Composition>,
}
impl CompositionContext<'_> {
/// Get a composition context for one-off store creation.
pub fn blank(registry: &'static Registry) -> Self {
Self {
registry,
stack: Default::default(),
composition: None,
}
}
pub async fn resolve<T: ?Sized + Send + Sync + 'static>(
&self,
entrypoint: String,
) -> Result<Arc<T>, Box<dyn std::error::Error + Send + Sync + 'static>> {
// disallow recursion
if self
.stack
.contains(&(TypeId::of::<T>(), entrypoint.clone()))
{
return Err(CompositionError::Recursion(
self.stack.iter().map(|(_, n)| n.clone()).collect(),
)
.into());
}
Ok(self.build_internal(entrypoint).await?)
}
#[cfg(feature = "xp-composition-url-refs")]
async fn build_anonymous<T: ?Sized + Send + Sync + 'static>(
&self,
entrypoint: String,
) -> Result<Arc<T>, Box<dyn std::error::Error + Send + Sync>> {
let url = url::Url::parse(&entrypoint)?;
let config: DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = T>>> =
with_registry(self.registry, || url.try_into())?;
config.0.build("anonymous", self).await
}
fn build_internal<T: ?Sized + Send + Sync + 'static>(
&self,
entrypoint: String,
) -> BoxFuture<'_, Result<Arc<T>, CompositionError>> {
#[cfg(feature = "xp-composition-url-refs")]
if entrypoint.contains("://") {
// There is a chance this is a url. we are building an anonymous store
return Box::pin(async move {
self.build_anonymous(entrypoint.clone())
.await
.map_err(|e| CompositionError::Failed(entrypoint, Arc::from(e)))
});
}
let mut stores = match self.composition {
Some(comp) => comp.stores.lock().unwrap(),
None => return Box::pin(futures::future::err(CompositionError::NotFound(entrypoint))),
};
let entry = match stores.get_mut(&(TypeId::of::<T>(), entrypoint.clone())) {
Some(v) => v,
None => return Box::pin(futures::future::err(CompositionError::NotFound(entrypoint))),
};
// for lifetime reasons, we put a placeholder value in the hashmap while we figure out what
// the new value should be. the Mutex stays locked the entire time, so nobody will ever see
// this temporary value.
let prev_val = std::mem::replace(
entry,
Box::new(InstantiationState::<T>::Done(Err(
CompositionError::Poisoned(entrypoint.clone()),
))),
);
let (new_val, ret) = match *prev_val.downcast::<InstantiationState<T>>().unwrap() {
InstantiationState::Done(service) => (
InstantiationState::Done(service.clone()),
futures::future::ready(service).boxed(),
),
// the construction of the store has not started yet.
InstantiationState::Config(config) => {
let (tx, rx) = tokio::sync::watch::channel(None);
(
InstantiationState::InProgress(rx),
(async move {
let mut new_context = CompositionContext {
composition: self.composition,
registry: self.registry,
stack: self.stack.clone(),
};
new_context
.stack
.push((TypeId::of::<T>(), entrypoint.clone()));
let res =
config.build(&entrypoint, &new_context).await.map_err(|e| {
match e.downcast() {
Ok(e) => *e,
Err(e) => CompositionError::Failed(entrypoint, e.into()),
}
});
tx.send(Some(res.clone())).unwrap();
res
})
.boxed(),
)
}
// there is already a task driving forward the construction of this store, wait for it
// to notify us via the provided channel
InstantiationState::InProgress(mut recv) => {
(InstantiationState::InProgress(recv.clone()), {
(async move {
loop {
if let Some(v) =
recv.borrow_and_update().as_ref().map(|res| res.clone())
{
break v;
}
recv.changed().await.unwrap();
}
})
.boxed()
})
}
};
*entry = Box::new(new_val);
ret
}
}
#[async_trait]
/// This is the trait usually implemented on a per-store-type Config struct and
/// used to instantiate it.
pub trait ServiceBuilder: Send + Sync {
type Output: ?Sized;
async fn build(
&self,
instance_name: &str,
context: &CompositionContext,
) -> Result<Arc<Self::Output>, Box<dyn std::error::Error + Send + Sync + 'static>>;
}
impl<T: ?Sized, S: ServiceBuilder<Output = T> + 'static> From<S>
for Box<dyn ServiceBuilder<Output = T>>
{
fn from(t: S) -> Self {
Box::new(t)
}
}
enum InstantiationState<T: ?Sized> {
Config(Box<dyn ServiceBuilder<Output = T>>),
InProgress(tokio::sync::watch::Receiver<Option<Result<Arc<T>, CompositionError>>>),
Done(Result<Arc<T>, CompositionError>),
}
pub struct Composition {
registry: &'static Registry,
stores: std::sync::Mutex<HashMap<(TypeId, String), Box<dyn Any + Send + Sync>>>,
}
#[derive(thiserror::Error, Clone, Debug)]
pub enum CompositionError {
#[error("store not found: {0}")]
NotFound(String),
#[error("recursion not allowed {0:?}")]
Recursion(Vec<String>),
#[error("store construction panicked {0}")]
Poisoned(String),
#[error("instantiation of service {0} failed: {1}")]
Failed(String, Arc<dyn std::error::Error + Send + Sync>),
}
impl<T: ?Sized + Send + Sync + 'static>
Extend<(
String,
DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = T>>>,
)> for Composition
{
fn extend<I>(&mut self, configs: I)
where
I: IntoIterator<
Item = (
String,
DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = T>>>,
),
>,
{
self.stores
.lock()
.unwrap()
.extend(configs.into_iter().map(|(k, v)| {
(
(TypeId::of::<T>(), k),
Box::new(InstantiationState::Config(v.0)) as Box<dyn Any + Send + Sync>,
)
}))
}
}
impl Composition {
/// The given registry will be used for creation of anonymous stores during composition
pub fn new(registry: &'static Registry) -> Self {
Self {
registry,
stores: Default::default(),
}
}
pub fn extend_with_configs<T: ?Sized + Send + Sync + 'static>(
&mut self,
// Keep the concrete `HashMap` type here since it allows for type
// inference of what type is previously being deserialized.
configs: HashMap<String, DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = T>>>>,
) {
self.extend(configs);
}
/// Looks up the entrypoint name in the composition and returns an instantiated service.
pub async fn build<T: ?Sized + Send + Sync + 'static>(
&self,
entrypoint: &str,
) -> Result<Arc<T>, CompositionError> {
self.context().build_internal(entrypoint.to_string()).await
}
pub fn context(&self) -> CompositionContext {
CompositionContext {
registry: self.registry,
stack: vec![],
composition: Some(self),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::blobservice::BlobService;
use std::sync::Arc;
/// Test that we return a reference to the same instance of MemoryBlobService (via ptr_eq)
/// when instantiating the same entrypoint twice. By instantiating concurrently, we also
/// test the channels notifying the second consumer when the store has been instantiated.
#[tokio::test]
async fn concurrent() {
let blob_services_configs_json = serde_json::json!({
"root": {
"type": "memory",
}
});
let blob_services_configs =
with_registry(&REG, || serde_json::from_value(blob_services_configs_json)).unwrap();
let mut blob_service_composition = Composition::new(&REG);
blob_service_composition.extend_with_configs::<dyn BlobService>(blob_services_configs);
let (blob_service1, blob_service2) = tokio::join!(
blob_service_composition.build::<dyn BlobService>("root"),
blob_service_composition.build::<dyn BlobService>("root")
);
assert!(Arc::ptr_eq(
&blob_service1.unwrap(),
&blob_service2.unwrap()
));
}
/// Test that we throw the correct error when an instantiation would recurse (deadlock)
#[tokio::test]
async fn reject_recursion() {
let blob_services_configs_json = serde_json::json!({
"root": {
"type": "combined",
"near": "other",
"far": "other"
},
"other": {
"type": "combined",
"near": "root",
"far": "root"
}
});
let blob_services_configs =
with_registry(&REG, || serde_json::from_value(blob_services_configs_json)).unwrap();
let mut blob_service_composition = Composition::new(&REG);
blob_service_composition.extend_with_configs::<dyn BlobService>(blob_services_configs);
match blob_service_composition
.build::<dyn BlobService>("root")
.await
{
Err(CompositionError::Recursion(stack)) => {
assert_eq!(stack, vec!["root".to_string(), "other".to_string()])
}
other => panic!("should have returned an error, returned: {:?}", other.err()),
}
}
}

111
snix/castore/src/digests.rs Normal file
View file

@ -0,0 +1,111 @@
use bytes::Bytes;
use data_encoding::BASE64;
use thiserror::Error;
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct B3Digest([u8; Self::LENGTH]);
impl B3Digest {
pub const LENGTH: usize = blake3::OUT_LEN;
}
// TODO: allow converting these errors to crate::Error
#[derive(Error, Debug, PartialEq)]
pub enum Error {
#[error("invalid digest length: {0}")]
InvalidDigestLen(usize),
}
impl AsRef<[u8; B3Digest::LENGTH]> for B3Digest {
fn as_ref(&self) -> &[u8; Self::LENGTH] {
&self.0
}
}
impl std::ops::Deref for B3Digest {
type Target = [u8; Self::LENGTH];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl From<B3Digest> for bytes::Bytes {
fn from(val: B3Digest) -> Self {
Bytes::copy_from_slice(&val.0)
}
}
impl From<blake3::Hash> for B3Digest {
fn from(value: blake3::Hash) -> Self {
Self(*value.as_bytes())
}
}
impl From<digest::Output<blake3::Hasher>> for B3Digest {
fn from(value: digest::Output<blake3::Hasher>) -> Self {
Self(value.into())
}
}
impl TryFrom<&[u8]> for B3Digest {
type Error = Error;
// constructs a [B3Digest] from a &[u8].
// Returns an error if the digest has the wrong length.
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
Ok(Self(
value
.try_into()
.map_err(|_e| Error::InvalidDigestLen(value.len()))?,
))
}
}
impl TryFrom<bytes::Bytes> for B3Digest {
type Error = Error;
fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
value[..].try_into()
}
}
impl TryFrom<Vec<u8>> for B3Digest {
type Error = Error;
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
value[..].try_into()
}
}
impl From<&[u8; B3Digest::LENGTH]> for B3Digest {
fn from(value: &[u8; B3Digest::LENGTH]) -> Self {
Self(*value)
}
}
impl From<B3Digest> for [u8; B3Digest::LENGTH] {
fn from(value: B3Digest) -> Self {
value.0
}
}
impl Clone for B3Digest {
fn clone(&self) -> Self {
Self(self.0.to_owned())
}
}
impl std::fmt::Display for B3Digest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("blake3-").unwrap();
BASE64.encode_write(&self.0, f)
}
}
impl std::fmt::Debug for B3Digest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("blake3-").unwrap();
BASE64.encode_write(&self.0, f)
}
}

View file

@ -0,0 +1,395 @@
use bigtable_rs::{bigtable, google::bigtable::v2 as bigtable_v2};
use bytes::Bytes;
use data_encoding::HEXLOWER;
use futures::stream::BoxStream;
use prost::Message;
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DurationSeconds};
use std::sync::Arc;
use tonic::async_trait;
use tracing::{instrument, trace, warn};
use super::{
utils::traverse_directory, Directory, DirectoryPutter, DirectoryService, SimplePutter,
};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::{proto, B3Digest, Error};
/// There should not be more than 10 MiB in a single cell.
/// https://cloud.google.com/bigtable/docs/schema-design#cells
const CELL_SIZE_LIMIT: u64 = 10 * 1024 * 1024;
/// Provides a [DirectoryService] implementation using
/// [Bigtable](https://cloud.google.com/bigtable/docs/)
/// as an underlying K/V store.
///
/// # Data format
/// We use Bigtable as a plain K/V store.
/// The row key is the digest of the directory, in hexlower.
/// Inside the row, we currently have a single column/cell, again using the
/// hexlower directory digest.
/// Its value is the Directory message, serialized in canonical protobuf.
/// We currently only populate this column.
///
/// In the future, we might want to introduce "bucketing", essentially storing
/// all directories inserted via `put_multiple_start` in a batched form.
/// This will prevent looking up intermediate Directories, which are not
/// directly at the root, so rely on store composition.
#[derive(Clone)]
pub struct BigtableDirectoryService {
instance_name: String,
client: bigtable::BigTable,
params: BigtableParameters,
#[cfg(test)]
#[allow(dead_code)]
/// Holds the temporary directory containing the unix socket, and the
/// spawned emulator process.
emulator: std::sync::Arc<(tempfile::TempDir, async_process::Child)>,
}
impl BigtableDirectoryService {
#[cfg(not(test))]
pub async fn connect(
instance_name: String,
params: BigtableParameters,
) -> Result<Self, bigtable::Error> {
let connection = bigtable::BigTableConnection::new(
&params.project_id,
&params.instance_name,
params.is_read_only,
params.channel_size,
params.timeout,
)
.await?;
Ok(Self {
instance_name,
client: connection.client(),
params,
})
}
#[cfg(test)]
pub async fn connect(
instance_name: String,
params: BigtableParameters,
) -> Result<Self, bigtable::Error> {
use std::time::Duration;
use async_process::{Command, Stdio};
use tempfile::TempDir;
use tokio_retry::{strategy::ExponentialBackoff, Retry};
let tmpdir = TempDir::new().unwrap();
let socket_path = tmpdir.path().join("cbtemulator.sock");
let emulator_process = Command::new("cbtemulator")
.arg("-address")
.arg(socket_path.clone())
.stderr(Stdio::piped())
.stdout(Stdio::piped())
.kill_on_drop(true)
.spawn()
.expect("failed to spawn emulator");
Retry::spawn(
ExponentialBackoff::from_millis(20)
.max_delay(Duration::from_secs(1))
.take(3),
|| async {
if socket_path.exists() {
Ok(())
} else {
Err(())
}
},
)
.await
.expect("failed to wait for socket");
// populate the emulator
for cmd in &[
vec!["createtable", &params.table_name],
vec!["createfamily", &params.table_name, &params.family_name],
] {
Command::new("cbt")
.args({
let mut args = vec![
"-instance",
&params.instance_name,
"-project",
&params.project_id,
];
args.extend_from_slice(cmd);
args
})
.env(
"BIGTABLE_EMULATOR_HOST",
format!("unix://{}", socket_path.to_string_lossy()),
)
.output()
.await
.expect("failed to run cbt setup command");
}
let connection = bigtable_rs::bigtable::BigTableConnection::new_with_emulator(
&format!("unix://{}", socket_path.to_string_lossy()),
&params.project_id,
&params.instance_name,
params.is_read_only,
params.timeout,
)?;
Ok(Self {
instance_name,
client: connection.client(),
params,
emulator: (tmpdir, emulator_process).into(),
})
}
}
/// Derives the row/column key for a given blake3 digest.
/// We use hexlower encoding, also because it can't be misinterpreted as RE2.
fn derive_directory_key(digest: &B3Digest) -> String {
HEXLOWER.encode(digest.as_slice())
}
#[async_trait]
impl DirectoryService for BigtableDirectoryService {
#[instrument(skip(self, digest), err, fields(directory.digest = %digest, instance_name=%self.instance_name))]
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
let mut client = self.client.clone();
let directory_key = derive_directory_key(digest);
let request = bigtable_v2::ReadRowsRequest {
app_profile_id: self.params.app_profile_id.to_string(),
table_name: client.get_full_table_name(&self.params.table_name),
rows_limit: 1,
rows: Some(bigtable_v2::RowSet {
row_keys: vec![directory_key.clone().into()],
row_ranges: vec![],
}),
// Filter selected family name, and column qualifier matching our digest.
// This is to ensure we don't fail once we start bucketing.
filter: Some(bigtable_v2::RowFilter {
filter: Some(bigtable_v2::row_filter::Filter::Chain(
bigtable_v2::row_filter::Chain {
filters: vec![
bigtable_v2::RowFilter {
filter: Some(
bigtable_v2::row_filter::Filter::FamilyNameRegexFilter(
self.params.family_name.to_string(),
),
),
},
bigtable_v2::RowFilter {
filter: Some(
bigtable_v2::row_filter::Filter::ColumnQualifierRegexFilter(
directory_key.clone().into(),
),
),
},
],
},
)),
}),
..Default::default()
};
let mut response = client
.read_rows(request)
.await
.map_err(|e| Error::StorageError(format!("unable to read rows: {}", e)))?;
if response.len() != 1 {
if response.len() > 1 {
// This shouldn't happen, we limit number of rows to 1
return Err(Error::StorageError(
"got more than one row from bigtable".into(),
));
}
// else, this is simply a "not found".
return Ok(None);
}
let (row_key, mut row_cells) = response.pop().unwrap();
if row_key != directory_key.as_bytes() {
// This shouldn't happen, we requested this row key.
return Err(Error::StorageError(
"got wrong row key from bigtable".into(),
));
}
let row_cell = row_cells
.pop()
.ok_or_else(|| Error::StorageError("found no cells".into()))?;
// Ensure there's only one cell (so no more left after the pop())
// This shouldn't happen, We filter out other cells in our query.
if !row_cells.is_empty() {
return Err(Error::StorageError(
"more than one cell returned from bigtable".into(),
));
}
// We also require the qualifier to be correct in the filter above,
// so this shouldn't happen.
if directory_key.as_bytes() != row_cell.qualifier {
return Err(Error::StorageError("unexpected cell qualifier".into()));
}
// For the data in that cell, ensure the digest matches what's requested, before parsing.
let got_digest = B3Digest::from(blake3::hash(&row_cell.value).as_bytes());
if got_digest != *digest {
return Err(Error::StorageError(format!(
"invalid digest: {}",
got_digest
)));
}
// Try to parse the value into a Directory message.
let directory = proto::Directory::decode(Bytes::from(row_cell.value))
.map_err(|e| Error::StorageError(format!("unable to decode directory proto: {}", e)))?
.try_into()
.map_err(|e| Error::StorageError(format!("invalid Directory message: {}", e)))?;
Ok(Some(directory))
}
#[instrument(skip(self, directory), err, fields(directory.digest = %directory.digest(), instance_name=%self.instance_name))]
async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
let directory_digest = directory.digest();
let mut client = self.client.clone();
let directory_key = derive_directory_key(&directory_digest);
let data = proto::Directory::from(directory).encode_to_vec();
if data.len() as u64 > CELL_SIZE_LIMIT {
return Err(Error::StorageError(
"Directory exceeds cell limit on Bigtable".into(),
));
}
let resp = client
.check_and_mutate_row(bigtable_v2::CheckAndMutateRowRequest {
table_name: client.get_full_table_name(&self.params.table_name),
app_profile_id: self.params.app_profile_id.to_string(),
authorized_view_name: "".to_string(),
row_key: directory_key.clone().into(),
predicate_filter: Some(bigtable_v2::RowFilter {
filter: Some(bigtable_v2::row_filter::Filter::ColumnQualifierRegexFilter(
directory_key.clone().into(),
)),
}),
// If the column was already found, do nothing.
true_mutations: vec![],
// Else, do the insert.
false_mutations: vec![
// https://cloud.google.com/bigtable/docs/writes
bigtable_v2::Mutation {
mutation: Some(bigtable_v2::mutation::Mutation::SetCell(
bigtable_v2::mutation::SetCell {
family_name: self.params.family_name.to_string(),
column_qualifier: directory_key.clone().into(),
timestamp_micros: -1, // use server time to fill timestamp
value: data,
},
)),
},
],
})
.await
.map_err(|e| Error::StorageError(format!("unable to mutate rows: {}", e)))?;
if resp.predicate_matched {
trace!("already existed")
}
Ok(directory_digest)
}
#[instrument(skip_all, fields(directory.digest = %root_directory_digest, instance_name=%self.instance_name))]
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>> {
traverse_directory(self.clone(), root_directory_digest)
}
#[instrument(skip_all, fields(instance_name=%self.instance_name))]
fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + '_)> {
Box::new(SimplePutter::new(self))
}
}
/// Represents configuration of [BigtableDirectoryService].
/// This currently conflates both connect parameters and data model/client
/// behaviour parameters.
#[serde_as]
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct BigtableParameters {
project_id: String,
instance_name: String,
#[serde(default)]
is_read_only: bool,
#[serde(default = "default_channel_size")]
channel_size: usize,
#[serde_as(as = "Option<DurationSeconds<String>>")]
#[serde(default = "default_timeout")]
timeout: Option<std::time::Duration>,
table_name: String,
family_name: String,
#[serde(default = "default_app_profile_id")]
app_profile_id: String,
}
#[async_trait]
impl ServiceBuilder for BigtableParameters {
type Output = dyn DirectoryService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync>> {
Ok(Arc::new(
BigtableDirectoryService::connect(instance_name.to_string(), self.clone()).await?,
))
}
}
impl TryFrom<url::Url> for BigtableParameters {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(mut url: url::Url) -> Result<Self, Self::Error> {
// parse the instance name from the hostname.
let instance_name = url
.host_str()
.ok_or_else(|| Error::StorageError("instance name missing".into()))?
.to_string();
// … but add it to the query string now, so we just need to parse that.
url.query_pairs_mut()
.append_pair("instance_name", &instance_name);
let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default())
.map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?;
Ok(params)
}
}
fn default_app_profile_id() -> String {
"default".to_owned()
}
fn default_channel_size() -> usize {
4
}
fn default_timeout() -> Option<std::time::Duration> {
Some(std::time::Duration::from_secs(4))
}

View file

@ -0,0 +1,186 @@
use std::sync::Arc;
use futures::stream::BoxStream;
use futures::StreamExt;
use futures::TryFutureExt;
use futures::TryStreamExt;
use tonic::async_trait;
use tracing::{instrument, trace};
use super::{Directory, DirectoryGraph, DirectoryService, RootToLeavesValidator, SimplePutter};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::directoryservice::DirectoryPutter;
use crate::B3Digest;
use crate::Error;
/// Asks near first, if not found, asks far.
/// If found in there, returns it, and *inserts* it into
/// near.
/// Specifically, it always obtains the entire directory closure from far and inserts it into near,
/// which is useful when far does not support accessing intermediate directories (but near does).
/// There is no negative cache.
/// Inserts and listings are not implemented for now.
pub struct Cache<DS1, DS2> {
instance_name: String,
near: DS1,
far: DS2,
}
impl<DS1, DS2> Cache<DS1, DS2> {
pub fn new(instance_name: String, near: DS1, far: DS2) -> Self {
Self {
instance_name,
near,
far,
}
}
}
#[async_trait]
impl<DS1, DS2> DirectoryService for Cache<DS1, DS2>
where
DS1: DirectoryService + Clone + 'static,
DS2: DirectoryService + Clone + 'static,
{
#[instrument(skip(self, digest), fields(directory.digest = %digest, instance_name = %self.instance_name))]
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
match self.near.get(digest).await? {
Some(directory) => {
trace!("serving from cache");
Ok(Some(directory))
}
None => {
trace!("not found in near, asking remote…");
let mut copy = DirectoryGraph::with_order(
RootToLeavesValidator::new_with_root_digest(digest.clone()),
);
let mut stream = self.far.get_recursive(digest);
let root = stream.try_next().await?;
if let Some(root) = root.clone() {
copy.add(root)
.map_err(|e| Error::StorageError(e.to_string()))?;
}
while let Some(dir) = stream.try_next().await? {
copy.add(dir)
.map_err(|e| Error::StorageError(e.to_string()))?;
}
let copy = copy
.validate()
.map_err(|e| Error::StorageError(e.to_string()))?;
let mut put = self.near.put_multiple_start();
for dir in copy.drain_leaves_to_root() {
put.put(dir).await?;
}
put.close().await?;
Ok(root)
}
}
}
#[instrument(skip_all, fields(instance_name = %self.instance_name))]
async fn put(&self, _directory: Directory) -> Result<B3Digest, Error> {
Err(Error::StorageError("unimplemented".to_string()))
}
#[instrument(skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>> {
let near = self.near.clone();
let far = self.far.clone();
let digest = root_directory_digest.clone();
Box::pin(
(async move {
let mut stream = near.get_recursive(&digest);
match stream.try_next().await? {
Some(first) => {
trace!("serving from cache");
Ok(futures::stream::once(async { Ok(first) })
.chain(stream)
.left_stream())
}
None => {
trace!("not found in near, asking remote…");
let mut copy_for_near = DirectoryGraph::with_order(
RootToLeavesValidator::new_with_root_digest(digest.clone()),
);
let mut copy_for_client = vec![];
let mut stream = far.get_recursive(&digest);
while let Some(dir) = stream.try_next().await? {
copy_for_near
.add(dir.clone())
.map_err(|e| Error::StorageError(e.to_string()))?;
copy_for_client.push(dir);
}
let copy_for_near = copy_for_near
.validate()
.map_err(|e| Error::StorageError(e.to_string()))?;
let mut put = near.put_multiple_start();
for dir in copy_for_near.drain_leaves_to_root() {
put.put(dir).await?;
}
put.close().await?;
Ok(futures::stream::iter(copy_for_client.into_iter().map(Ok))
.right_stream())
}
}
})
.try_flatten_stream(),
)
}
#[instrument(skip_all)]
fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + '_)> {
Box::new(SimplePutter::new(self))
}
}
#[derive(serde::Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct CacheConfig {
near: String,
far: String,
}
impl TryFrom<url::Url> for CacheConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// cache doesn't support host or path in the URL.
if url.has_host() || !url.path().is_empty() {
return Err(Error::StorageError("invalid url".to_string()).into());
}
Ok(serde_qs::from_str(url.query().unwrap_or_default())?)
}
}
#[async_trait]
impl ServiceBuilder for CacheConfig {
type Output = dyn DirectoryService;
async fn build<'a>(
&'a self,
instance_name: &str,
context: &CompositionContext,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
let (near, far) = futures::join!(
context.resolve::<Self::Output>(self.near.clone()),
context.resolve::<Self::Output>(self.far.clone())
);
Ok(Arc::new(Cache {
instance_name: instance_name.to_string(),
near: near?,
far: far?,
}))
}
}

View file

@ -0,0 +1,404 @@
use std::collections::HashMap;
use petgraph::{
graph::{DiGraph, NodeIndex},
visit::{Bfs, DfsPostOrder, EdgeRef, IntoNodeIdentifiers, Walker},
Direction, Incoming,
};
use tracing::instrument;
use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator};
use crate::{path::PathComponent, B3Digest, Directory, Node};
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("{0}")]
ValidationError(String),
}
struct EdgeWeight {
name: PathComponent,
size: u64,
}
/// This can be used to validate and/or re-order a Directory closure (DAG of
/// connected Directories), and their insertion order.
///
/// The DirectoryGraph is parametrized on the insertion order, and can be
/// constructed using the Default trait, or using `with_order` if the
/// OrderValidator needs to be customized.
///
/// If the user is receiving directories from canonical protobuf encoding in
/// root-to-leaves order, and parsing them, she can call `digest_allowed`
/// _before_ parsing the protobuf record and then add it with `add_unchecked`.
/// All other users insert the directories via `add`, in their specified order.
/// During insertion, we validate as much as we can at that time:
///
/// - individual validation of Directory messages
/// - validation of insertion order
/// - validation of size fields of referred Directories
///
/// Internally it keeps all received Directories in a directed graph,
/// with node weights being the Directories and edges pointing to child/parent
/// directories.
///
/// Once all Directories have been inserted, a validate function can be
/// called to perform a check for graph connectivity and ensure there's no
/// disconnected components or missing nodes.
/// Finally, the `drain_leaves_to_root` or `drain_root_to_leaves` can be
/// _chained_ on validate to get an iterator over the (deduplicated and)
/// validated list of directories in either order.
#[derive(Default)]
pub struct DirectoryGraph<O> {
// A directed graph, using Directory as node weight.
// Edges point from parents to children.
//
// Nodes with None weigths might exist when a digest has been referred to but the directory
// with this digest has not yet been sent.
//
// The option in the edge weight tracks the pending validation state of the respective edge, for example if
// the child has not been added yet.
graph: DiGraph<Option<Directory>, Option<EdgeWeight>>,
// A lookup table from directory digest to node index.
digest_to_node_ix: HashMap<B3Digest, NodeIndex>,
order_validator: O,
}
pub struct ValidatedDirectoryGraph {
graph: DiGraph<Option<Directory>, Option<EdgeWeight>>,
root: Option<NodeIndex>,
}
fn check_edge(edge: &EdgeWeight, child: &Directory) -> Result<(), Error> {
// Ensure the size specified in the child node matches our records.
if edge.size != child.size() {
return Err(Error::ValidationError(format!(
"'{}' has wrong size, specified {}, recorded {}",
edge.name,
edge.size,
child.size(),
)));
}
Ok(())
}
impl DirectoryGraph<LeavesToRootValidator> {
/// Insert a new Directory into the closure
#[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest(), directory.size=%directory.size()), err)]
pub fn add(&mut self, directory: Directory) -> Result<(), Error> {
if !self.order_validator.add_directory(&directory) {
return Err(Error::ValidationError(
"unknown directory was referenced".into(),
));
}
self.add_order_unchecked(directory)
}
}
impl DirectoryGraph<RootToLeavesValidator> {
/// If the user is parsing directories from canonical protobuf encoding, she can
/// call `digest_allowed` _before_ parsing the protobuf record and then add it
/// with `add_unchecked`.
pub fn digest_allowed(&self, digest: B3Digest) -> bool {
self.order_validator.digest_allowed(&digest)
}
/// Insert a new Directory into the closure
#[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest(), directory.size=%directory.size()), err)]
pub fn add(&mut self, directory: Directory) -> Result<(), Error> {
let digest = directory.digest();
if !self.order_validator.digest_allowed(&digest) {
return Err(Error::ValidationError("unexpected digest".into()));
}
self.order_validator.add_directory_unchecked(&directory);
self.add_order_unchecked(directory)
}
}
impl<O: OrderValidator> DirectoryGraph<O> {
/// Customize the ordering, i.e. for pre-setting the root of the RootToLeavesValidator
pub fn with_order(order_validator: O) -> Self {
Self {
graph: Default::default(),
digest_to_node_ix: Default::default(),
order_validator,
}
}
/// Adds a directory which has already been confirmed to be in-order to the graph
pub fn add_order_unchecked(&mut self, directory: Directory) -> Result<(), Error> {
let digest = directory.digest();
// Teach the graph about the existence of a node with this digest
let ix = *self
.digest_to_node_ix
.entry(digest)
.or_insert_with(|| self.graph.add_node(None));
if self.graph[ix].is_some() {
// The node is already in the graph, there is nothing to do here.
return Ok(());
}
// set up edges to all child directories
for (name, node) in directory.nodes() {
if let Node::Directory { digest, size } = node {
let child_ix = *self
.digest_to_node_ix
.entry(digest.clone())
.or_insert_with(|| self.graph.add_node(None));
let pending_edge_check = match &self.graph[child_ix] {
Some(child) => {
// child is already available, validate the edge now
check_edge(
&EdgeWeight {
name: name.clone(),
size: *size,
},
child,
)?;
None
}
None => Some(EdgeWeight {
name: name.clone(),
size: *size,
}), // pending validation
};
self.graph.add_edge(ix, child_ix, pending_edge_check);
}
}
// validate the edges from parents to this node
// this collects edge ids in a Vec because there is no edges_directed_mut :'c
for edge_id in self
.graph
.edges_directed(ix, Direction::Incoming)
.map(|edge_ref| edge_ref.id())
.collect::<Vec<_>>()
.into_iter()
{
let edge_weight = self
.graph
.edge_weight_mut(edge_id)
.expect("edge not found")
.take()
.expect("edge is already validated");
check_edge(&edge_weight, &directory)?;
}
// finally, store the directory information in the node weight
self.graph[ix] = Some(directory);
Ok(())
}
#[instrument(level = "trace", skip_all, err)]
pub fn validate(self) -> Result<ValidatedDirectoryGraph, Error> {
// find all initial nodes (nodes without incoming edges)
let mut roots = self
.graph
.node_identifiers()
.filter(|&a| self.graph.neighbors_directed(a, Incoming).next().is_none());
let root = roots.next();
if roots.next().is_some() {
return Err(Error::ValidationError(
"graph has disconnected roots".into(),
));
}
// test that the graph is complete
if self.graph.raw_nodes().iter().any(|n| n.weight.is_none()) {
return Err(Error::ValidationError("graph is incomplete".into()));
}
Ok(ValidatedDirectoryGraph {
graph: self.graph,
root,
})
}
}
impl ValidatedDirectoryGraph {
/// Return the list of directories in from-root-to-leaves order.
/// In case no elements have been inserted, returns an empty list.
///
/// panics if the specified root is not in the graph
#[instrument(level = "trace", skip_all)]
pub fn drain_root_to_leaves(self) -> impl Iterator<Item = Directory> {
let order = match self.root {
Some(root) => {
// do a BFS traversal of the graph, starting with the root node
Bfs::new(&self.graph, root)
.iter(&self.graph)
.collect::<Vec<_>>()
}
None => vec![], // No nodes have been inserted, do not traverse
};
let (mut nodes, _edges) = self.graph.into_nodes_edges();
order
.into_iter()
.filter_map(move |i| nodes[i.index()].weight.take())
}
/// Return the list of directories in from-leaves-to-root order.
/// In case no elements have been inserted, returns an empty list.
///
/// panics when the specified root is not in the graph
#[instrument(level = "trace", skip_all)]
pub fn drain_leaves_to_root(self) -> impl Iterator<Item = Directory> {
let order = match self.root {
Some(root) => {
// do a DFS Post-Order traversal of the graph, starting with the root node
DfsPostOrder::new(&self.graph, root)
.iter(&self.graph)
.collect::<Vec<_>>()
}
None => vec![], // No nodes have been inserted, do not traverse
};
let (mut nodes, _edges) = self.graph.into_nodes_edges();
order
.into_iter()
.filter_map(move |i| nodes[i.index()].weight.take())
}
}
#[cfg(test)]
mod tests {
use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
use crate::{Directory, Node};
use rstest::rstest;
use std::sync::LazyLock;
use super::{DirectoryGraph, LeavesToRootValidator, RootToLeavesValidator};
pub static BROKEN_PARENT_DIRECTORY: LazyLock<Directory> = LazyLock::new(|| {
Directory::try_from_iter([(
"foo".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size() + 42, // wrong!
},
)])
.unwrap()
});
#[rstest]
/// Uploading an empty directory should succeed.
#[case::empty_directory(&[&*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A]))]
/// Uploading A, then B (referring to A) should succeed.
#[case::simple_closure(&[&*DIRECTORY_A, &*DIRECTORY_B], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_B]))]
/// Uploading A, then A, then C (referring to A twice) should succeed.
/// We pretend to be a dumb client not deduping directories.
#[case::same_child(&[&*DIRECTORY_A, &*DIRECTORY_A, &*DIRECTORY_C], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
/// Uploading A, then C (referring to A twice) should succeed.
#[case::same_child_dedup(&[&*DIRECTORY_A, &*DIRECTORY_C], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
/// Uploading A, then C (referring to A twice), then B (itself referring to A) should fail during close,
/// as B itself would be left unconnected.
#[case::unconnected_node(&[&*DIRECTORY_A, &*DIRECTORY_C, &*DIRECTORY_B], false, None)]
/// Uploading B (referring to A) should fail immediately, because A was never uploaded.
#[case::dangling_pointer(&[&*DIRECTORY_B], true, None)]
/// Uploading a directory which refers to another Directory with a wrong size should fail.
#[case::wrong_size_in_parent(&[&*DIRECTORY_A, &*BROKEN_PARENT_DIRECTORY], true, None)]
fn test_uploads(
#[case] directories_to_upload: &[&Directory],
#[case] exp_fail_upload_last: bool,
#[case] exp_finalize: Option<Vec<&Directory>>, // Some(_) if finalize successful, None if not.
) {
let mut dcv = DirectoryGraph::<LeavesToRootValidator>::default();
let len_directories_to_upload = directories_to_upload.len();
for (i, d) in directories_to_upload.iter().enumerate() {
let resp = dcv.add((*d).clone());
if i == len_directories_to_upload - 1 && exp_fail_upload_last {
assert!(resp.is_err(), "expect last put to fail");
// We don't really care anymore what finalize() would return, as
// the add() failed.
return;
} else {
assert!(resp.is_ok(), "expect put to succeed");
}
}
// everything was uploaded successfully. Test finalize().
let resp = dcv
.validate()
.map(|validated| validated.drain_leaves_to_root().collect::<Vec<_>>());
match exp_finalize {
Some(directories) => {
assert_eq!(
Vec::from_iter(directories.iter().map(|e| (*e).to_owned())),
resp.expect("drain should succeed")
);
}
None => {
resp.expect_err("drain should fail");
}
}
}
#[rstest]
/// Downloading an empty directory should succeed.
#[case::empty_directory(&*DIRECTORY_A, &[&*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A]))]
/// Downlading B, then A (referenced by B) should succeed.
#[case::simple_closure(&*DIRECTORY_B, &[&*DIRECTORY_B, &*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_B]))]
/// Downloading C (referring to A twice), then A should succeed.
#[case::same_child_dedup(&*DIRECTORY_C, &[&*DIRECTORY_C, &*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
/// Downloading C, then B (both referring to A but not referring to each other) should fail immediately as B has no connection to C (the root)
#[case::unconnected_node(&*DIRECTORY_C, &[&*DIRECTORY_C, &*DIRECTORY_B], true, None)]
/// Downloading B (specified as the root) but receiving A instead should fail immediately, because A has no connection to B (the root).
#[case::dangling_pointer(&*DIRECTORY_B, &[&*DIRECTORY_A], true, None)]
/// Downloading a directory which refers to another Directory with a wrong size should fail.
#[case::wrong_size_in_parent(&*BROKEN_PARENT_DIRECTORY, &[&*BROKEN_PARENT_DIRECTORY, &*DIRECTORY_A], true, None)]
fn test_downloads(
#[case] root: &Directory,
#[case] directories_to_upload: &[&Directory],
#[case] exp_fail_upload_last: bool,
#[case] exp_finalize: Option<Vec<&Directory>>, // Some(_) if finalize successful, None if not.
) {
let mut dcv =
DirectoryGraph::with_order(RootToLeavesValidator::new_with_root_digest(root.digest()));
let len_directories_to_upload = directories_to_upload.len();
for (i, d) in directories_to_upload.iter().enumerate() {
let resp = dcv.add((*d).clone());
if i == len_directories_to_upload - 1 && exp_fail_upload_last {
assert!(resp.is_err(), "expect last put to fail");
// We don't really care anymore what finalize() would return, as
// the add() failed.
return;
} else {
assert!(resp.is_ok(), "expect put to succeed");
}
}
// everything was uploaded successfully. Test finalize().
let resp = dcv
.validate()
.map(|validated| validated.drain_leaves_to_root().collect::<Vec<_>>());
match exp_finalize {
Some(directories) => {
assert_eq!(
Vec::from_iter(directories.iter().map(|e| (*e).to_owned())),
resp.expect("drain should succeed")
);
}
None => {
resp.expect_err("drain should fail");
}
}
}
}

View file

@ -0,0 +1,129 @@
use std::sync::Arc;
use url::Url;
use crate::composition::{
with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG,
};
use super::DirectoryService;
/// Constructs a new instance of a [DirectoryService] from an URI.
///
/// The following URIs are supported:
/// - `memory:`
/// Uses a in-memory implementation.
/// from one process at the same time.
/// - `redb:`
/// Uses a in-memory redb implementation.
/// - `redb:///absolute/path/to/somewhere`
/// Uses redb, using a path on the disk for persistency. Can be only opened
/// from one process at the same time.
/// - `grpc+unix:///absolute/path/to/somewhere`
/// Connects to a local snix-store gRPC service via Unix socket.
/// - `grpc+http://host:port`, `grpc+https://host:port`
/// Connects to a (remote) snix-store gRPC service.
pub async fn from_addr(
uri: &str,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync>> {
#[allow(unused_mut)]
let mut url = Url::parse(uri)
.map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
let directory_service_config = with_registry(&REG, || {
<DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>>>::try_from(
url,
)
})?
.0;
let directory_service = directory_service_config
.build("anonymous", &CompositionContext::blank(&REG))
.await?;
Ok(directory_service)
}
#[cfg(test)]
mod tests {
use std::sync::LazyLock;
use super::from_addr;
use rstest::rstest;
use tempfile::TempDir;
static TMPDIR_REDB_1: LazyLock<TempDir> = LazyLock::new(|| TempDir::new().unwrap());
static TMPDIR_REDB_2: LazyLock<TempDir> = LazyLock::new(|| TempDir::new().unwrap());
#[rstest]
/// This uses an unsupported scheme.
#[case::unsupported_scheme("http://foo.example/test", false)]
/// This correctly sets the scheme, and doesn't set a path.
#[case::memory_valid("memory://", true)]
/// This sets a memory url host to `foo`
#[case::memory_invalid_host("memory://foo", false)]
/// This sets a memory url path to "/", which is invalid.
#[case::memory_invalid_root_path("memory:///", false)]
/// This sets a memory url path to "/foo", which is invalid.
#[case::memory_invalid_root_path_foo("memory:///foo", false)]
/// This configures redb in temporary mode.
#[case::redb_valid_temporary("redb://", true)]
/// This configures redb with /, which should fail.
#[case::redb_invalid_root("redb:///", false)]
/// This configures redb with a host, not path, which should fail.
#[case::redb_invalid_host("redb://foo.example", false)]
/// This configures redb with a valid path, which should succeed.
#[case::redb_valid_path(&format!("redb://{}", &TMPDIR_REDB_1.path().join("foo").to_str().unwrap()), true)]
/// This configures redb with a host, and a valid path path, which should fail.
#[case::redb_invalid_host_with_valid_path(&format!("redb://foo.example{}", &TMPDIR_REDB_2.path().join("bar").to_str().unwrap()), false)]
/// Correct scheme to connect to a unix socket.
#[case::grpc_valid_unix_socket("grpc+unix:///path/to/somewhere", true)]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
#[case::grpc_invalid_unix_socket_and_host("grpc+unix://host.example/path/to/somewhere", false)]
/// Correct scheme to connect to localhost, with port 12345
#[case::grpc_valid_ipv6_localhost_port_12345("grpc+http://[::1]:12345", true)]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[case::grpc_valid_http_host_without_port("grpc+http://localhost", true)]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[case::grpc_valid_https_host_without_port("grpc+https://localhost", true)]
/// Correct scheme to connect to localhost over http, but with additional path, which is invalid.
#[case::grpc_invalid_host_and_path("grpc+http://localhost/some-path", false)]
/// A valid example for store composition using anonymous urls
#[cfg_attr(
feature = "xp-composition-url-refs",
case::anonymous_url_composition("cache://?near=memory://&far=memory://", true)
)]
/// Store composition with anonymous urls should fail if the feature is disabled
#[cfg_attr(
not(feature = "xp-composition-url-refs"),
case::anonymous_url_composition("cache://?near=memory://&far=memory://", false)
)]
/// A valid example for Bigtable
#[cfg_attr(
all(feature = "cloud", feature = "integration"),
case::bigtable_valid_url(
"bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1",
true
)
)]
/// A valid example for Bigtable, specifying a custom channel size and timeout
#[cfg_attr(
all(feature = "cloud", feature = "integration"),
case::bigtable_valid_url(
"bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1&channel_size=10&timeout=10",
true
)
)]
/// A invalid Bigtable example (missing fields)
#[cfg_attr(
all(feature = "cloud", feature = "integration"),
case::bigtable_invalid_url("bigtable://instance-1", false)
)]
#[tokio::test]
async fn test_from_addr_tokio(#[case] uri_str: &str, #[case] exp_succeed: bool) {
if exp_succeed {
from_addr(uri_str).await.expect("should succeed");
} else {
assert!(from_addr(uri_str).await.is_err(), "should fail");
}
}
}

View file

@ -0,0 +1,392 @@
use std::collections::HashSet;
use super::{Directory, DirectoryPutter, DirectoryService};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::proto::{self, get_directory_request::ByWhat};
use crate::{B3Digest, DirectoryError, Error};
use async_stream::try_stream;
use futures::stream::BoxStream;
use std::sync::Arc;
use tokio::spawn;
use tokio::sync::mpsc::UnboundedSender;
use tokio::task::JoinHandle;
use tokio_stream::wrappers::UnboundedReceiverStream;
use tonic::{async_trait, Code, Status};
use tracing::{instrument, warn, Instrument as _};
/// Connects to a (remote) snix-store DirectoryService over gRPC.
#[derive(Clone)]
pub struct GRPCDirectoryService<T> {
instance_name: String,
/// The internal reference to a gRPC client.
/// Cloning it is cheap, and it internally handles concurrent requests.
grpc_client: proto::directory_service_client::DirectoryServiceClient<T>,
}
impl<T> GRPCDirectoryService<T> {
/// construct a [GRPCDirectoryService] from a [proto::directory_service_client::DirectoryServiceClient].
/// panics if called outside the context of a tokio runtime.
pub fn from_client(
instance_name: String,
grpc_client: proto::directory_service_client::DirectoryServiceClient<T>,
) -> Self {
Self {
instance_name,
grpc_client,
}
}
}
#[async_trait]
impl<T> DirectoryService for GRPCDirectoryService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Send + Sync + Clone + 'static,
T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
<T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError> + Send,
T::Future: Send,
{
#[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, crate::Error> {
// Get a new handle to the gRPC client, and copy the digest.
let mut grpc_client = self.grpc_client.clone();
let digest_cpy = digest.clone();
let message = async move {
let mut s = grpc_client
.get(proto::GetDirectoryRequest {
recursive: false,
by_what: Some(ByWhat::Digest(digest_cpy.into())),
})
.await?
.into_inner();
// Retrieve the first message only, then close the stream (we set recursive to false)
s.message().await
};
let digest = digest.clone();
match message.await {
Ok(Some(directory)) => {
// Validate the retrieved Directory indeed has the
// digest we expect it to have, to detect corruptions.
let actual_digest = directory.digest();
if actual_digest != digest {
Err(crate::Error::StorageError(format!(
"requested directory with digest {}, but got {}",
digest, actual_digest
)))
} else {
Ok(Some(directory.try_into().map_err(|_| {
Error::StorageError("invalid root digest length in response".to_string())
})?))
}
}
Ok(None) => Ok(None),
Err(e) if e.code() == Code::NotFound => Ok(None),
Err(e) => Err(crate::Error::StorageError(e.to_string())),
}
}
#[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
async fn put(&self, directory: Directory) -> Result<B3Digest, crate::Error> {
let resp = self
.grpc_client
.clone()
.put(tokio_stream::once(proto::Directory::from(directory)))
.await;
match resp {
Ok(put_directory_resp) => Ok(put_directory_resp
.into_inner()
.root_digest
.try_into()
.map_err(|_| {
Error::StorageError("invalid root digest length in response".to_string())
})?),
Err(e) => Err(crate::Error::StorageError(e.to_string())),
}
}
#[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>> {
let mut grpc_client = self.grpc_client.clone();
let root_directory_digest = root_directory_digest.clone();
let stream = try_stream! {
let mut stream = grpc_client
.get(proto::GetDirectoryRequest {
recursive: true,
by_what: Some(ByWhat::Digest(root_directory_digest.clone().into())),
})
.await
.map_err(|e| crate::Error::StorageError(e.to_string()))?
.into_inner();
// The Directory digests we received so far
let mut received_directory_digests: HashSet<B3Digest> = HashSet::new();
// The Directory digests we're still expecting to get sent.
let mut expected_directory_digests: HashSet<B3Digest> = HashSet::from([root_directory_digest.clone()]);
loop {
match stream.message().await {
Ok(Some(directory)) => {
// validate we actually expected that directory, and move it from expected to received.
let directory_digest = directory.digest();
let was_expected = expected_directory_digests.remove(&directory_digest);
if !was_expected {
// FUTUREWORK: dumb clients might send the same stuff twice.
// as a fallback, we might want to tolerate receiving
// it if it's in received_directory_digests (as that
// means it once was in expected_directory_digests)
Err(crate::Error::StorageError(format!(
"received unexpected directory {}",
directory_digest
)))?;
}
received_directory_digests.insert(directory_digest);
// register all children in expected_directory_digests.
for child_directory in &directory.directories {
// We ran validate() above, so we know these digests must be correct.
let child_directory_digest =
child_directory.digest.clone().try_into().unwrap();
expected_directory_digests
.insert(child_directory_digest);
}
let directory = directory.try_into()
.map_err(|e: DirectoryError| Error::StorageError(e.to_string()))?;
yield directory;
},
Ok(None) if expected_directory_digests.len() == 1 && expected_directory_digests.contains(&root_directory_digest) => {
// The root directory of the requested closure was not found, return an
// empty stream
return
}
Ok(None) => {
// The stream has ended
let diff_len = expected_directory_digests
// Account for directories which have been referenced more than once,
// but only received once since they were deduplicated
.difference(&received_directory_digests)
.count();
// If this is not empty, then the closure is incomplete
if diff_len != 0 {
Err(crate::Error::StorageError(format!(
"still expected {} directories, but got premature end of stream",
diff_len
)))?
} else {
return
}
},
Err(e) => {
Err(crate::Error::StorageError(e.to_string()))?;
},
}
}
};
Box::pin(stream)
}
#[instrument(skip_all)]
fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + 'static)> {
let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
let task = spawn({
let mut grpc_client = self.grpc_client.clone();
async move {
Ok::<_, Status>(
grpc_client
.put(UnboundedReceiverStream::new(rx))
.await?
.into_inner(),
)
}
// instrument the task with the current span, this is not done by default
.in_current_span()
});
Box::new(GRPCPutter {
rq: Some((task, tx)),
})
}
}
#[derive(serde::Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct GRPCDirectoryServiceConfig {
url: String,
}
impl TryFrom<url::Url> for GRPCDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// This is normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by snix_castore::channel::from_url.
Ok(GRPCDirectoryServiceConfig {
url: url.to_string(),
})
}
}
#[async_trait]
impl ServiceBuilder for GRPCDirectoryServiceConfig {
type Output = dyn DirectoryService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
let client = proto::directory_service_client::DirectoryServiceClient::new(
crate::tonic::channel_from_url(&self.url.parse()?).await?,
);
Ok(Arc::new(GRPCDirectoryService::from_client(
instance_name.to_string(),
client,
)))
}
}
/// Allows uploading multiple Directory messages in the same gRPC stream.
pub struct GRPCPutter {
/// Data about the current request - a handle to the task, and the tx part
/// of the channel.
/// The tx part of the pipe is used to send [proto::Directory] to the ongoing request.
/// The task will yield a [proto::PutDirectoryResponse] once the stream is closed.
#[allow(clippy::type_complexity)] // lol
rq: Option<(
JoinHandle<Result<proto::PutDirectoryResponse, Status>>,
UnboundedSender<proto::Directory>,
)>,
}
#[async_trait]
impl DirectoryPutter for GRPCPutter {
#[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
async fn put(&mut self, directory: Directory) -> Result<(), crate::Error> {
match self.rq {
// If we're not already closed, send the directory to directory_sender.
Some((_, ref directory_sender)) => {
if directory_sender.send(directory.into()).is_err() {
// If the channel has been prematurely closed, invoke close (so we can peek at the error code)
// That error code is much more helpful, because it
// contains the error message from the server.
self.close().await?;
}
Ok(())
}
// If self.close() was already called, we can't put again.
None => Err(Error::StorageError(
"DirectoryPutter already closed".to_string(),
)),
}
}
/// Closes the stream for sending, and returns the value.
#[instrument(level = "trace", skip_all, ret, err)]
async fn close(&mut self) -> Result<B3Digest, crate::Error> {
// get self.rq, and replace it with None.
// This ensures we can only close it once.
match std::mem::take(&mut self.rq) {
None => Err(Error::StorageError("already closed".to_string())),
Some((task, directory_sender)) => {
// close directory_sender, so blocking on task will finish.
drop(directory_sender);
let root_digest = task
.await?
.map_err(|e| Error::StorageError(e.to_string()))?
.root_digest;
root_digest.try_into().map_err(|_| {
Error::StorageError("invalid root digest length in response".to_string())
})
}
}
}
}
#[cfg(test)]
mod tests {
use std::time::Duration;
use tempfile::TempDir;
use tokio::net::UnixListener;
use tokio_retry::{strategy::ExponentialBackoff, Retry};
use tokio_stream::wrappers::UnixListenerStream;
use crate::{
directoryservice::{DirectoryService, GRPCDirectoryService, MemoryDirectoryService},
fixtures,
proto::{directory_service_client::DirectoryServiceClient, GRPCDirectoryServiceWrapper},
};
/// This ensures connecting via gRPC works as expected.
#[tokio::test]
async fn test_valid_unix_path_ping_pong() {
let tmpdir = TempDir::new().unwrap();
let socket_path = tmpdir.path().join("daemon");
let path_clone = socket_path.clone();
// Spin up a server
tokio::spawn(async {
let uds = UnixListener::bind(path_clone).unwrap();
let uds_stream = UnixListenerStream::new(uds);
// spin up a new server
let mut server = tonic::transport::Server::builder();
let router = server.add_service(
crate::proto::directory_service_server::DirectoryServiceServer::new(
GRPCDirectoryServiceWrapper::new(
Box::<MemoryDirectoryService>::default() as Box<dyn DirectoryService>
),
),
);
router.serve_with_incoming(uds_stream).await
});
// wait for the socket to be created
Retry::spawn(
ExponentialBackoff::from_millis(20).max_delay(Duration::from_secs(10)),
|| async {
if socket_path.exists() {
Ok(())
} else {
Err(())
}
},
)
.await
.expect("failed to wait for socket");
// prepare a client
let grpc_client = {
let url = url::Url::parse(&format!(
"grpc+unix://{}?wait-connect=1",
socket_path.display()
))
.expect("must parse");
let client = DirectoryServiceClient::new(
crate::tonic::channel_from_url(&url)
.await
.expect("must succeed"),
);
GRPCDirectoryService::from_client("test-instance".into(), client)
};
assert!(grpc_client
.get(&fixtures::DIRECTORY_A.digest())
.await
.expect("must not fail")
.is_none())
}
}

View file

@ -0,0 +1,102 @@
use crate::{B3Digest, Error};
use futures::stream::BoxStream;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use tonic::async_trait;
use tracing::{instrument, warn};
use super::utils::traverse_directory;
use super::{Directory, DirectoryPutter, DirectoryService, SimplePutter};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::proto;
#[derive(Clone, Default)]
pub struct MemoryDirectoryService {
instance_name: String,
db: Arc<RwLock<HashMap<B3Digest, proto::Directory>>>,
}
#[async_trait]
impl DirectoryService for MemoryDirectoryService {
#[instrument(skip(self, digest), err, fields(directory.digest = %digest, instance_name=%self.instance_name))]
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
let db = self.db.read().await;
match db.get(digest) {
// The directory was not found, return
None => Ok(None),
// The directory was found, try to parse the data as Directory message
Some(directory) => {
// Validate the retrieved Directory indeed has the
// digest we expect it to have, to detect corruptions.
let actual_digest = directory.digest();
if actual_digest != *digest {
return Err(Error::StorageError(format!(
"requested directory with digest {}, but got {}",
digest, actual_digest
)));
}
Ok(Some(directory.clone().try_into().map_err(|e| {
crate::Error::StorageError(format!("corrupted directory: {}", e))
})?))
}
}
}
#[instrument(skip(self, directory), err, fields(directory.digest = %directory.digest(), instance_name=%self.instance_name))]
async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
let digest = directory.digest();
// store it
let mut db = self.db.write().await;
db.insert(digest.clone(), directory.into());
Ok(digest)
}
#[instrument(skip_all, fields(directory.digest = %root_directory_digest, instance_name=%self.instance_name))]
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>> {
traverse_directory(self.clone(), root_directory_digest)
}
#[instrument(skip_all, fields(instance_name=%self.instance_name))]
fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + '_)> {
Box::new(SimplePutter::new(self))
}
}
#[derive(serde::Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct MemoryDirectoryServiceConfig {}
impl TryFrom<url::Url> for MemoryDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// memory doesn't support host or path in the URL.
if url.has_host() || !url.path().is_empty() {
return Err(Error::StorageError("invalid url".to_string()).into());
}
Ok(MemoryDirectoryServiceConfig {})
}
}
#[async_trait]
impl ServiceBuilder for MemoryDirectoryServiceConfig {
type Output = dyn DirectoryService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
Ok(Arc::new(MemoryDirectoryService {
instance_name: instance_name.to_string(),
db: Default::default(),
}))
}
}

View file

@ -0,0 +1,121 @@
use crate::composition::{Registry, ServiceBuilder};
use crate::{B3Digest, Directory, Error};
use auto_impl::auto_impl;
use futures::stream::BoxStream;
use tonic::async_trait;
mod combinators;
mod directory_graph;
mod from_addr;
mod grpc;
mod memory;
mod object_store;
mod order_validator;
mod redb;
mod simple_putter;
#[cfg(test)]
pub mod tests;
mod traverse;
mod utils;
pub use self::combinators::{Cache, CacheConfig};
pub use self::directory_graph::{DirectoryGraph, ValidatedDirectoryGraph};
pub use self::from_addr::from_addr;
pub use self::grpc::{GRPCDirectoryService, GRPCDirectoryServiceConfig};
pub use self::memory::{MemoryDirectoryService, MemoryDirectoryServiceConfig};
pub use self::object_store::{ObjectStoreDirectoryService, ObjectStoreDirectoryServiceConfig};
pub use self::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator};
pub use self::redb::{RedbDirectoryService, RedbDirectoryServiceConfig};
pub use self::simple_putter::SimplePutter;
pub use self::traverse::descend_to;
pub use self::utils::traverse_directory;
#[cfg(feature = "cloud")]
mod bigtable;
#[cfg(feature = "cloud")]
pub use self::bigtable::{BigtableDirectoryService, BigtableParameters};
/// The base trait all Directory services need to implement.
/// This is a simple get and put of [Directory], returning their
/// digest.
#[async_trait]
#[auto_impl(&, &mut, Arc, Box)]
pub trait DirectoryService: Send + Sync {
/// Looks up a single Directory message by its digest.
/// The returned Directory message *must* be valid.
/// In case the directory is not found, Ok(None) is returned.
///
/// It is okay for certain implementations to only allow retrieval of
/// Directory digests that are at the "root", aka the last element that's
/// sent to a DirectoryPutter. This makes sense for implementations bundling
/// closures of directories together in batches.
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error>;
/// Uploads a single Directory message, and returns the calculated
/// digest, or an error. An error *must* also be returned if the message is
/// not valid.
async fn put(&self, directory: Directory) -> Result<B3Digest, Error>;
/// Looks up a closure of [Directory].
/// Ideally this would be a `impl Stream<Item = Result<Directory, Error>>`,
/// and we'd be able to add a default implementation for it here, but
/// we can't have that yet.
///
/// This returns a pinned, boxed stream. The pinning allows for it to be polled easily,
/// and the box allows different underlying stream implementations to be returned since
/// Rust doesn't support this as a generic in traits yet. This is the same thing that
/// [async_trait] generates, but for streams instead of futures.
///
/// The individually returned Directory messages *must* be valid.
/// Directories are sent in an order from the root to the leaves, so that
/// the receiving side can validate each message to be a connected to the root
/// that has initially been requested.
///
/// In case the directory can not be found, this should return an empty stream.
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>>;
/// Allows persisting a closure of [Directory], which is a graph of
/// connected Directory messages.
fn put_multiple_start(&self) -> Box<dyn DirectoryPutter + '_>;
}
/// Provides a handle to put a closure of connected [Directory] elements.
///
/// The consumer can periodically call [DirectoryPutter::put], starting from the
/// leaves. Once the root is reached, [DirectoryPutter::close] can be called to
/// retrieve the root digest (or an error).
///
/// DirectoryPutters might be created without a single [DirectoryPutter::put],
/// and then dropped without calling [DirectoryPutter::close],
/// for example when ingesting a path that ends up not pointing to a directory,
/// but a single file or symlink.
#[async_trait]
pub trait DirectoryPutter: Send {
/// Put a individual [Directory] into the store.
/// Error semantics and behaviour is up to the specific implementation of
/// this trait.
/// Due to bursting, the returned error might refer to an object previously
/// sent via `put`.
async fn put(&mut self, directory: Directory) -> Result<(), Error>;
/// Close the stream, and wait for any errors.
/// If there's been any invalid Directory message uploaded, and error *must*
/// be returned.
async fn close(&mut self) -> Result<B3Digest, Error>;
}
/// Registers the builtin DirectoryService implementations with the registry
pub(crate) fn register_directory_services(reg: &mut Registry) {
reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::ObjectStoreDirectoryServiceConfig>("objectstore");
reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::MemoryDirectoryServiceConfig>("memory");
reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::CacheConfig>("cache");
reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::GRPCDirectoryServiceConfig>("grpc");
reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::RedbDirectoryServiceConfig>("redb");
#[cfg(feature = "cloud")]
{
reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::BigtableParameters>("bigtable");
}
}

View file

@ -0,0 +1,353 @@
use std::collections::hash_map;
use std::collections::HashMap;
use std::sync::Arc;
use data_encoding::HEXLOWER;
use futures::future::Either;
use futures::stream::BoxStream;
use futures::SinkExt;
use futures::StreamExt;
use futures::TryFutureExt;
use futures::TryStreamExt;
use object_store::{path::Path, ObjectStore};
use prost::Message;
use tokio::io::AsyncWriteExt;
use tokio_util::codec::LengthDelimitedCodec;
use tonic::async_trait;
use tracing::{instrument, trace, warn, Level};
use url::Url;
use super::{
Directory, DirectoryGraph, DirectoryPutter, DirectoryService, LeavesToRootValidator,
RootToLeavesValidator,
};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::{proto, B3Digest, Error, Node};
/// Stores directory closures in an object store.
/// Notably, this makes use of the option to disallow accessing child directories except when
/// fetching them recursively via the top-level directory, since all batched writes
/// (using `put_multiple_start`) are stored in a single object.
/// Directories are stored in a length-delimited format with a 1MiB limit. The length field is a
/// u32 and the directories are stored in root-to-leaves topological order, the same way they will
/// be returned to the client in get_recursive.
#[derive(Clone)]
pub struct ObjectStoreDirectoryService {
instance_name: String,
object_store: Arc<dyn ObjectStore>,
base_path: Path,
}
#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
fn derive_dirs_path(base_path: &Path, digest: &B3Digest) -> Path {
base_path
.child("dirs")
.child("b3")
.child(HEXLOWER.encode(&digest.as_slice()[..2]))
.child(HEXLOWER.encode(digest.as_slice()))
}
#[allow(clippy::identity_op)]
const MAX_FRAME_LENGTH: usize = 1 * 1024 * 1024 * 1000; // 1 MiB
//
impl ObjectStoreDirectoryService {
/// Constructs a new [ObjectStoreDirectoryService] from a [Url] supported by
/// [object_store].
/// Any path suffix becomes the base path of the object store.
/// additional options, the same as in [object_store::parse_url_opts] can
/// be passed.
pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
where
I: IntoIterator<Item = (K, V)>,
K: AsRef<str>,
V: Into<String>,
{
let (object_store, path) = object_store::parse_url_opts(url, options)?;
Ok(Self {
instance_name: "root".into(),
object_store: Arc::new(object_store),
base_path: path,
})
}
/// Like [Self::parse_url_opts], except without the options.
pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
Self::parse_url_opts(url, Vec::<(String, String)>::new())
}
pub fn new(instance_name: String, object_store: Arc<dyn ObjectStore>, base_path: Path) -> Self {
Self {
instance_name,
object_store,
base_path,
}
}
}
#[async_trait]
impl DirectoryService for ObjectStoreDirectoryService {
/// This is the same steps as for get_recursive anyways, so we just call get_recursive and
/// return the first element of the stream and drop the request.
#[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
self.get_recursive(digest).take(1).next().await.transpose()
}
#[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
// Ensure the directory doesn't contain other directory children
if directory
.nodes()
.any(|(_, e)| matches!(e, Node::Directory { .. }))
{
return Err(Error::InvalidRequest(
"only put_multiple_start is supported by the ObjectStoreDirectoryService for directories with children".into(),
));
}
let mut handle = self.put_multiple_start();
handle.put(directory).await?;
handle.close().await
}
#[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>> {
// Check that we are not passing on bogus from the object store to the client, and that the
// trust chain from the root digest to the leaves is intact
let mut order_validator =
RootToLeavesValidator::new_with_root_digest(root_directory_digest.clone());
let dir_path = derive_dirs_path(&self.base_path, root_directory_digest);
let object_store = self.object_store.clone();
Box::pin(
(async move {
let stream = match object_store.get(&dir_path).await {
Ok(v) => v.into_stream(),
Err(object_store::Error::NotFound { .. }) => {
return Ok(Either::Left(futures::stream::empty()))
}
Err(e) => return Err(std::io::Error::from(e).into()),
};
// get a reader of the response body.
let r = tokio_util::io::StreamReader::new(stream);
let decompressed_stream = async_compression::tokio::bufread::ZstdDecoder::new(r);
// the subdirectories are stored in a length delimited format
let delimited_stream = LengthDelimitedCodec::builder()
.max_frame_length(MAX_FRAME_LENGTH)
.length_field_type::<u32>()
.new_read(decompressed_stream);
let dirs_stream = delimited_stream.map_err(Error::from).and_then(move |buf| {
futures::future::ready((|| {
let mut hasher = blake3::Hasher::new();
let digest: B3Digest = hasher.update(&buf).finalize().as_bytes().into();
// Ensure to only decode the directory objects whose digests we trust
if !order_validator.digest_allowed(&digest) {
return Err(crate::Error::StorageError(format!(
"received unexpected directory {}",
digest
)));
}
let directory = proto::Directory::decode(&*buf).map_err(|e| {
warn!("unable to parse directory {}: {}", digest, e);
Error::StorageError(e.to_string())
})?;
let directory = Directory::try_from(directory).map_err(|e| {
warn!("unable to convert directory {}: {}", digest, e);
Error::StorageError(e.to_string())
})?;
// Allow the children to appear next
order_validator.add_directory_unchecked(&directory);
Ok(directory)
})())
});
Ok(Either::Right(dirs_stream))
})
.try_flatten_stream(),
)
}
#[instrument(skip_all)]
fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + '_)>
where
Self: Clone,
{
Box::new(ObjectStoreDirectoryPutter::new(
self.object_store.clone(),
&self.base_path,
))
}
}
#[derive(serde::Deserialize)]
#[serde(deny_unknown_fields)]
pub struct ObjectStoreDirectoryServiceConfig {
object_store_url: String,
#[serde(default)]
object_store_options: HashMap<String, String>,
}
impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// We need to convert the URL to string, strip the prefix there, and then
// parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
let trimmed_url = {
let s = url.to_string();
let mut url = Url::parse(
s.strip_prefix("objectstore+")
.ok_or(Error::StorageError("Missing objectstore uri".into()))?,
)?;
// trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
url.set_query(None);
url
};
Ok(ObjectStoreDirectoryServiceConfig {
object_store_url: trimmed_url.into(),
object_store_options: url
.query_pairs()
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
})
}
}
#[async_trait]
impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
type Output = dyn DirectoryService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
let opts = {
let mut opts: HashMap<&str, _> = self
.object_store_options
.iter()
.map(|(k, v)| (k.as_str(), v.as_str()))
.collect();
if let hash_map::Entry::Vacant(e) =
opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
{
e.insert(crate::USER_AGENT);
}
opts
};
let (object_store, path) =
object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
Ok(Arc::new(ObjectStoreDirectoryService::new(
instance_name.to_string(),
Arc::new(object_store),
path,
)))
}
}
struct ObjectStoreDirectoryPutter<'a> {
object_store: Arc<dyn ObjectStore>,
base_path: &'a Path,
directory_validator: Option<DirectoryGraph<LeavesToRootValidator>>,
}
impl<'a> ObjectStoreDirectoryPutter<'a> {
fn new(object_store: Arc<dyn ObjectStore>, base_path: &'a Path) -> Self {
Self {
object_store,
base_path,
directory_validator: Some(Default::default()),
}
}
}
#[async_trait]
impl DirectoryPutter for ObjectStoreDirectoryPutter<'_> {
#[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
async fn put(&mut self, directory: Directory) -> Result<(), Error> {
match self.directory_validator {
None => return Err(Error::StorageError("already closed".to_string())),
Some(ref mut validator) => {
validator
.add(directory)
.map_err(|e| Error::StorageError(e.to_string()))?;
}
}
Ok(())
}
#[instrument(level = "trace", skip_all, ret, err)]
async fn close(&mut self) -> Result<B3Digest, Error> {
let validator = match self.directory_validator.take() {
None => return Err(Error::InvalidRequest("already closed".to_string())),
Some(validator) => validator,
};
// retrieve the validated directories.
// It is important that they are in topological order (root first),
// since that's how we want to retrieve them from the object store in the end.
let directories = validator
.validate()
.map_err(|e| Error::StorageError(e.to_string()))?
.drain_root_to_leaves()
.collect::<Vec<_>>();
// Get the root digest
let root_digest = directories
.first()
.ok_or_else(|| Error::InvalidRequest("got no directories".to_string()))?
.digest();
let dir_path = derive_dirs_path(self.base_path, &root_digest);
match self.object_store.head(&dir_path).await {
// directory tree already exists, nothing to do
Ok(_) => {
trace!("directory tree already exists");
}
// directory tree does not yet exist, compress and upload.
Err(object_store::Error::NotFound { .. }) => {
trace!("uploading directory tree");
let object_store_writer =
object_store::buffered::BufWriter::new(self.object_store.clone(), dir_path);
let compressed_writer =
async_compression::tokio::write::ZstdEncoder::new(object_store_writer);
let mut directories_sink = LengthDelimitedCodec::builder()
.max_frame_length(MAX_FRAME_LENGTH)
.length_field_type::<u32>()
.new_write(compressed_writer);
for directory in directories {
directories_sink
.send(proto::Directory::from(directory).encode_to_vec().into())
.await?;
}
let mut compressed_writer = directories_sink.into_inner();
compressed_writer.shutdown().await?;
}
// other error
Err(err) => Err(std::io::Error::from(err))?,
}
Ok(root_digest)
}
}

View file

@ -0,0 +1,188 @@
use std::collections::HashSet;
use tracing::warn;
use super::Directory;
use crate::{B3Digest, Node};
pub trait OrderValidator {
/// Update the order validator's state with the directory
/// Returns whether the directory was accepted
fn add_directory(&mut self, directory: &Directory) -> bool;
}
#[derive(Default)]
/// Validates that newly introduced directories are already referenced from
/// the root via existing directories.
/// Commonly used when _receiving_ a directory closure _from_ a store.
pub struct RootToLeavesValidator {
/// Only used to remember the root node, not for validation
expected_digests: HashSet<B3Digest>,
}
impl RootToLeavesValidator {
/// Use to validate the root digest of the closure upon receiving the first
/// directory.
pub fn new_with_root_digest(root_digest: B3Digest) -> Self {
let mut this = Self::default();
this.expected_digests.insert(root_digest);
this
}
/// Checks if a directory is in-order based on its digest.
///
/// Particularly useful when receiving directories in canonical protobuf
/// encoding, so that directories not connected to the root can be rejected
/// without parsing.
///
/// After parsing, the directory must be passed to `add_directory_unchecked`
/// to add its children to the list of expected digests.
pub fn digest_allowed(&self, digest: &B3Digest) -> bool {
self.expected_digests.is_empty() // we don't know the root node; allow any
|| self.expected_digests.contains(digest)
}
/// Update the order validator's state with the directory
pub fn add_directory_unchecked(&mut self, directory: &Directory) {
// No initial root was specified and this is the first directory
if self.expected_digests.is_empty() {
self.expected_digests.insert(directory.digest());
}
// Allow the children to appear next
for (_, node) in directory.nodes() {
if let Node::Directory { digest, .. } = node {
self.expected_digests.insert(digest.clone());
}
}
}
}
impl OrderValidator for RootToLeavesValidator {
fn add_directory(&mut self, directory: &Directory) -> bool {
if !self.digest_allowed(&directory.digest()) {
return false;
}
self.add_directory_unchecked(directory);
true
}
}
#[derive(Default)]
/// Validates that newly uploaded directories only reference directories which
/// have already been introduced.
/// Commonly used when _uploading_ a directory closure _to_ a store.
pub struct LeavesToRootValidator {
/// This is empty in the beginning, and gets filled as leaves and intermediates are
/// inserted
allowed_references: HashSet<B3Digest>,
}
impl OrderValidator for LeavesToRootValidator {
fn add_directory(&mut self, directory: &Directory) -> bool {
let digest = directory.digest();
for (_, node) in directory.nodes() {
if let Node::Directory {
digest: subdir_node_digest,
..
} = node
{
if !self.allowed_references.contains(subdir_node_digest) {
warn!(
directory.digest = %digest,
subdirectory.digest = %subdir_node_digest,
"unexpected directory reference"
);
return false;
}
}
}
self.allowed_references.insert(digest.clone());
true
}
}
#[cfg(test)]
mod tests {
use super::{LeavesToRootValidator, RootToLeavesValidator};
use crate::directoryservice::order_validator::OrderValidator;
use crate::directoryservice::Directory;
use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
use rstest::rstest;
#[rstest]
/// Uploading an empty directory should succeed.
#[case::empty_directory(&[&*DIRECTORY_A], false)]
/// Uploading A, then B (referring to A) should succeed.
#[case::simple_closure(&[&*DIRECTORY_A, &*DIRECTORY_B], false)]
/// Uploading A, then A, then C (referring to A twice) should succeed.
/// We pretend to be a dumb client not deduping directories.
#[case::same_child(&[&*DIRECTORY_A, &*DIRECTORY_A, &*DIRECTORY_C], false)]
/// Uploading A, then C (referring to A twice) should succeed.
#[case::same_child_dedup(&[&*DIRECTORY_A, &*DIRECTORY_C], false)]
/// Uploading A, then C (referring to A twice), then B (itself referring to A) should fail during close,
/// as B itself would be left unconnected.
#[case::unconnected_node(&[&*DIRECTORY_A, &*DIRECTORY_C, &*DIRECTORY_B], false)]
/// Uploading B (referring to A) should fail immediately, because A was never uploaded.
#[case::dangling_pointer(&[&*DIRECTORY_B], true)]
fn leaves_to_root(
#[case] directories_to_upload: &[&Directory],
#[case] exp_fail_upload_last: bool,
) {
let mut validator = LeavesToRootValidator::default();
let len_directories_to_upload = directories_to_upload.len();
for (i, d) in directories_to_upload.iter().enumerate() {
let resp = validator.add_directory(d);
if i == len_directories_to_upload - 1 && exp_fail_upload_last {
assert!(!resp, "expect last put to fail");
// We don't really care anymore what finalize() would return, as
// the add() failed.
return;
} else {
assert!(resp, "expect put to succeed");
}
}
}
#[rstest]
/// Downloading an empty directory should succeed.
#[case::empty_directory(&*DIRECTORY_A, &[&*DIRECTORY_A], false)]
/// Downlading B, then A (referenced by B) should succeed.
#[case::simple_closure(&*DIRECTORY_B, &[&*DIRECTORY_B, &*DIRECTORY_A], false)]
/// Downloading C (referring to A twice), then A should succeed.
#[case::same_child_dedup(&*DIRECTORY_C, &[&*DIRECTORY_C, &*DIRECTORY_A], false)]
/// Downloading C, then B (both referring to A but not referring to each other) should fail immediately as B has no connection to C (the root)
#[case::unconnected_node(&*DIRECTORY_C, &[&*DIRECTORY_C, &*DIRECTORY_B], true)]
/// Downloading B (specified as the root) but receiving A instead should fail immediately, because A has no connection to B (the root).
#[case::dangling_pointer(&*DIRECTORY_B, &[&*DIRECTORY_A], true)]
fn root_to_leaves(
#[case] root: &Directory,
#[case] directories_to_upload: &[&Directory],
#[case] exp_fail_upload_last: bool,
) {
let mut validator = RootToLeavesValidator::new_with_root_digest(root.digest());
let len_directories_to_upload = directories_to_upload.len();
for (i, d) in directories_to_upload.iter().enumerate() {
let resp1 = validator.digest_allowed(&d.digest());
let resp = validator.add_directory(d);
assert_eq!(
resp1, resp,
"digest_allowed should return the same value as add_directory"
);
if i == len_directories_to_upload - 1 && exp_fail_upload_last {
assert!(!resp, "expect last put to fail");
// We don't really care anymore what finalize() would return, as
// the add() failed.
return;
} else {
assert!(resp, "expect put to succeed");
}
}
}
}

View file

@ -0,0 +1,309 @@
use futures::stream::BoxStream;
use prost::Message;
use redb::{Database, TableDefinition};
use std::{path::PathBuf, sync::Arc};
use tonic::async_trait;
use tracing::{instrument, warn};
use super::{
traverse_directory, Directory, DirectoryGraph, DirectoryPutter, DirectoryService,
LeavesToRootValidator,
};
use crate::{
composition::{CompositionContext, ServiceBuilder},
proto, B3Digest, Error,
};
const DIRECTORY_TABLE: TableDefinition<[u8; B3Digest::LENGTH], Vec<u8>> =
TableDefinition::new("directory");
#[derive(Clone)]
pub struct RedbDirectoryService {
instance_name: String,
// We wrap the db in an Arc to be able to move it into spawn_blocking,
// as discussed in https://github.com/cberner/redb/issues/789
db: Arc<Database>,
}
impl RedbDirectoryService {
/// Constructs a new instance using the specified filesystem path for
/// storage.
pub async fn new(instance_name: String, path: PathBuf) -> Result<Self, Error> {
if path == PathBuf::from("/") {
return Err(Error::StorageError(
"cowardly refusing to open / with redb".to_string(),
));
}
let db = tokio::task::spawn_blocking(|| -> Result<_, redb::Error> {
let db = redb::Database::create(path)?;
create_schema(&db)?;
Ok(db)
})
.await??;
Ok(Self {
instance_name,
db: Arc::new(db),
})
}
/// Constructs a new instance using the in-memory backend.
pub fn new_temporary() -> Result<Self, Error> {
let db =
redb::Database::builder().create_with_backend(redb::backends::InMemoryBackend::new())?;
create_schema(&db)?;
Ok(Self {
instance_name: "root".into(),
db: Arc::new(db),
})
}
}
/// Ensures all tables are present.
/// Opens a write transaction and calls open_table on DIRECTORY_TABLE, which will
/// create it if not present.
fn create_schema(db: &redb::Database) -> Result<(), redb::Error> {
let txn = db.begin_write()?;
txn.open_table(DIRECTORY_TABLE)?;
txn.commit()?;
Ok(())
}
#[async_trait]
impl DirectoryService for RedbDirectoryService {
#[instrument(skip(self, digest), fields(directory.digest = %digest, instance_name = %self.instance_name))]
async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
let db = self.db.clone();
// Retrieves the protobuf-encoded Directory for the corresponding digest.
let db_get_resp = tokio::task::spawn_blocking({
let digest = *digest.as_ref();
move || -> Result<_, redb::Error> {
let txn = db.begin_read()?;
let table = txn.open_table(DIRECTORY_TABLE)?;
Ok(table.get(digest)?)
}
})
.await?
.map_err(|e| {
warn!(err=%e, "failed to retrieve Directory");
Error::StorageError("failed to retrieve Directory".to_string())
})?;
// The Directory was not found, return None.
let directory_data = match db_get_resp {
None => return Ok(None),
Some(d) => d,
};
// We check that the digest of the retrieved Directory matches the expected digest.
let actual_digest = blake3::hash(directory_data.value().as_slice());
if actual_digest.as_bytes() != digest.as_slice() {
warn!(directory.actual_digest=%actual_digest, "requested Directory got the wrong digest");
return Err(Error::StorageError(
"requested Directory got the wrong digest".to_string(),
));
}
// Attempt to decode the retrieved protobuf-encoded Directory, returning a parsing error if
// the decoding failed.
let directory = match proto::Directory::decode(&*directory_data.value()) {
Ok(dir) => {
// The returned Directory must be valid.
dir.try_into().map_err(|e| {
warn!(err=%e, "Directory failed validation");
Error::StorageError("Directory failed validation".to_string())
})?
}
Err(e) => {
warn!(err=%e, "failed to parse Directory");
return Err(Error::StorageError("failed to parse Directory".to_string()));
}
};
Ok(Some(directory))
}
#[instrument(skip(self, directory), fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
tokio::task::spawn_blocking({
let db = self.db.clone();
move || {
let digest = directory.digest();
// Store the directory in the table.
let txn = db.begin_write()?;
{
let mut table = txn.open_table(DIRECTORY_TABLE)?;
table.insert(
digest.as_ref(),
proto::Directory::from(directory).encode_to_vec(),
)?;
}
txn.commit()?;
Ok(digest)
}
})
.await?
}
#[instrument(skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
fn get_recursive(
&self,
root_directory_digest: &B3Digest,
) -> BoxStream<'static, Result<Directory, Error>> {
// FUTUREWORK: Ideally we should have all of the directory traversing happen in a single
// redb transaction to avoid constantly closing and opening new transactions for the
// database.
traverse_directory(self.clone(), root_directory_digest)
}
#[instrument(skip_all)]
fn put_multiple_start(&self) -> Box<dyn DirectoryPutter + '_> {
Box::new(RedbDirectoryPutter {
db: &self.db,
directory_validator: Some(Default::default()),
})
}
}
pub struct RedbDirectoryPutter<'a> {
db: &'a Database,
/// The directories (inside the directory validator) that we insert later,
/// or None, if they were already inserted.
directory_validator: Option<DirectoryGraph<LeavesToRootValidator>>,
}
#[async_trait]
impl DirectoryPutter for RedbDirectoryPutter<'_> {
#[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
async fn put(&mut self, directory: Directory) -> Result<(), Error> {
match self.directory_validator {
None => return Err(Error::StorageError("already closed".to_string())),
Some(ref mut validator) => {
validator
.add(directory)
.map_err(|e| Error::StorageError(e.to_string()))?;
}
}
Ok(())
}
#[instrument(level = "trace", skip_all, ret, err)]
async fn close(&mut self) -> Result<B3Digest, Error> {
match self.directory_validator.take() {
None => Err(Error::StorageError("already closed".to_string())),
Some(validator) => {
// Insert all directories as a batch.
tokio::task::spawn_blocking({
let txn = self.db.begin_write()?;
move || {
// Retrieve the validated directories.
let directories = validator
.validate()
.map_err(|e| Error::StorageError(e.to_string()))?
.drain_leaves_to_root()
.collect::<Vec<_>>();
// Get the root digest, which is at the end (cf. insertion order)
let root_digest = directories
.last()
.ok_or_else(|| Error::StorageError("got no directories".to_string()))?
.digest();
{
let mut table = txn.open_table(DIRECTORY_TABLE)?;
// Looping over all the verified directories, queuing them up for a
// batch insertion.
for directory in directories {
table.insert(
directory.digest().as_ref(),
proto::Directory::from(directory).encode_to_vec(),
)?;
}
}
txn.commit()?;
Ok(root_digest)
}
})
.await?
}
}
}
}
#[derive(serde::Deserialize)]
#[serde(deny_unknown_fields)]
pub struct RedbDirectoryServiceConfig {
is_temporary: bool,
#[serde(default)]
/// required when is_temporary = false
path: Option<PathBuf>,
}
impl TryFrom<url::Url> for RedbDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// redb doesn't support host, and a path can be provided (otherwise
// it'll live in memory only).
if url.has_host() {
return Err(Error::StorageError("no host allowed".to_string()).into());
}
Ok(if url.path().is_empty() {
RedbDirectoryServiceConfig {
is_temporary: true,
path: None,
}
} else {
RedbDirectoryServiceConfig {
is_temporary: false,
path: Some(url.path().into()),
}
})
}
}
#[async_trait]
impl ServiceBuilder for RedbDirectoryServiceConfig {
type Output = dyn DirectoryService;
async fn build<'a>(
&'a self,
instance_name: &str,
_context: &CompositionContext,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
match self {
RedbDirectoryServiceConfig {
is_temporary: true,
path: None,
} => Ok(Arc::new(RedbDirectoryService::new_temporary()?)),
RedbDirectoryServiceConfig {
is_temporary: true,
path: Some(_),
} => Err(Error::StorageError(
"Temporary RedbDirectoryService can not have path".into(),
)
.into()),
RedbDirectoryServiceConfig {
is_temporary: false,
path: None,
} => Err(Error::StorageError("RedbDirectoryService is missing path".into()).into()),
RedbDirectoryServiceConfig {
is_temporary: false,
path: Some(path),
} => Ok(Arc::new(
RedbDirectoryService::new(instance_name.to_string(), path.into()).await?,
)),
}
}
}

View file

@ -0,0 +1,83 @@
use super::DirectoryPutter;
use super::DirectoryService;
use super::{Directory, DirectoryGraph, LeavesToRootValidator};
use crate::B3Digest;
use crate::Error;
use tonic::async_trait;
use tracing::instrument;
use tracing::warn;
/// This is an implementation of DirectoryPutter that simply
/// inserts individual Directory messages one by one, on close, after
/// they successfully validated.
pub struct SimplePutter<'a, DS> {
directory_service: &'a DS,
directory_validator: Option<DirectoryGraph<LeavesToRootValidator>>,
}
impl<'a, DS> SimplePutter<'a, DS>
where
DS: DirectoryService,
{
pub fn new(directory_service: &'a DS) -> Self {
Self {
directory_service,
directory_validator: Some(Default::default()),
}
}
}
#[async_trait]
impl<DS: DirectoryService + 'static> DirectoryPutter for SimplePutter<'_, DS> {
#[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
async fn put(&mut self, directory: Directory) -> Result<(), Error> {
match self.directory_validator {
None => return Err(Error::StorageError("already closed".to_string())),
Some(ref mut validator) => {
validator
.add(directory)
.map_err(|e| Error::StorageError(e.to_string()))?;
}
}
Ok(())
}
#[instrument(level = "trace", skip_all, ret, err)]
async fn close(&mut self) -> Result<B3Digest, Error> {
match self.directory_validator.take() {
None => Err(Error::InvalidRequest("already closed".to_string())),
Some(validator) => {
// retrieve the validated directories.
let directories = validator
.validate()
.map_err(|e| Error::StorageError(e.to_string()))?
.drain_leaves_to_root()
.collect::<Vec<_>>();
// Get the root digest, which is at the end (cf. insertion order)
let root_digest = directories
.last()
.ok_or_else(|| Error::InvalidRequest("got no directories".to_string()))?
.digest();
// call an individual put for each directory and await the insertion.
for directory in directories {
let exp_digest = directory.digest();
let actual_digest = self.directory_service.put(directory).await?;
// ensure the digest the backend told us matches our expectations.
if exp_digest != actual_digest {
warn!(directory.digest_expected=%exp_digest, directory.digest_actual=%actual_digest, "unexpected digest");
return Err(Error::StorageError(
"got unexpected digest from backend during put".into(),
));
}
}
Ok(root_digest)
}
}
}
}

View file

@ -0,0 +1,237 @@
//! This contains test scenarios that a given [DirectoryService] needs to pass.
//! We use [rstest] and [rstest_reuse] to provide all services we want to test
//! against, and then apply this template to all test functions.
use futures::StreamExt;
use rstest::*;
use rstest_reuse::{self, *};
use super::DirectoryService;
use crate::directoryservice;
use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C, DIRECTORY_D};
use crate::{Directory, Node};
mod utils;
use self::utils::make_grpc_directory_service_client;
// TODO: add tests doing individual puts of a closure, then doing a get_recursive
// (and figure out semantics if necessary)
/// This produces a template, which will be applied to all individual test functions.
/// See https://github.com/la10736/rstest/issues/130#issuecomment-968864832
#[template]
#[rstest]
#[case::grpc(make_grpc_directory_service_client().await)]
#[case::memory(directoryservice::from_addr("memory://").await.unwrap())]
#[case::redb(directoryservice::from_addr("redb://").await.unwrap())]
#[case::objectstore(directoryservice::from_addr("objectstore+memory://").await.unwrap())]
#[cfg_attr(all(feature = "cloud", feature = "integration"), case::bigtable(directoryservice::from_addr("bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1").await.unwrap()))]
pub fn directory_services(#[case] directory_service: impl DirectoryService) {}
/// Ensures asking for a directory that doesn't exist returns a Ok(None), and a get_recursive
/// returns an empty stream.
#[apply(directory_services)]
#[tokio::test]
async fn test_non_exist(directory_service: impl DirectoryService) {
// single get
assert_eq!(Ok(None), directory_service.get(&DIRECTORY_A.digest()).await);
// recursive get
assert_eq!(
Vec::<Result<Directory, crate::Error>>::new(),
directory_service
.get_recursive(&DIRECTORY_A.digest())
.collect::<Vec<Result<Directory, crate::Error>>>()
.await
);
}
/// Putting a single directory into the store, and then getting it out both via
/// `.get[_recursive]` should work.
#[apply(directory_services)]
#[tokio::test]
async fn put_get(directory_service: impl DirectoryService) {
// Insert a Directory.
let digest = directory_service.put(DIRECTORY_A.clone()).await.unwrap();
assert_eq!(DIRECTORY_A.digest(), digest, "returned digest must match");
// single get
assert_eq!(
Some(DIRECTORY_A.clone()),
directory_service.get(&DIRECTORY_A.digest()).await.unwrap()
);
// recursive get
assert_eq!(
vec![Ok(DIRECTORY_A.clone())],
directory_service
.get_recursive(&DIRECTORY_A.digest())
.collect::<Vec<_>>()
.await
);
}
/// Putting a directory closure should work, and it should be possible to get
/// back the root node both via .get[_recursive]. We don't check `.get` for the
/// leaf node is possible, as it's Ok for stores to not support that.
#[apply(directory_services)]
#[tokio::test]
async fn put_get_multiple_success(directory_service: impl DirectoryService) {
// Insert a Directory closure.
let mut handle = directory_service.put_multiple_start();
handle.put(DIRECTORY_A.clone()).await.unwrap();
handle.put(DIRECTORY_C.clone()).await.unwrap();
let root_digest = handle.close().await.unwrap();
assert_eq!(
DIRECTORY_C.digest(),
root_digest,
"root digest should match"
);
// Get the root node.
assert_eq!(
Some(DIRECTORY_C.clone()),
directory_service.get(&DIRECTORY_C.digest()).await.unwrap()
);
// Get the closure. Ensure it's sent from the root to the leaves.
assert_eq!(
vec![Ok(DIRECTORY_C.clone()), Ok(DIRECTORY_A.clone())],
directory_service
.get_recursive(&DIRECTORY_C.digest())
.collect::<Vec<_>>()
.await
)
}
/// Puts a directory closure, but simulates a dumb client not deduplicating
/// its list. Ensure we still only get back a deduplicated list.
#[apply(directory_services)]
#[tokio::test]
async fn put_get_multiple_dedup(directory_service: impl DirectoryService) {
// Insert a Directory closure.
let mut handle = directory_service.put_multiple_start();
handle.put(DIRECTORY_A.clone()).await.unwrap();
handle.put(DIRECTORY_A.clone()).await.unwrap();
handle.put(DIRECTORY_C.clone()).await.unwrap();
let root_digest = handle.close().await.unwrap();
assert_eq!(
DIRECTORY_C.digest(),
root_digest,
"root digest should match"
);
// Ensure the returned closure only contains `DIRECTORY_A` once.
assert_eq!(
vec![Ok(DIRECTORY_C.clone()), Ok(DIRECTORY_A.clone())],
directory_service
.get_recursive(&DIRECTORY_C.digest())
.collect::<Vec<_>>()
.await
)
}
/// This tests the insertion and retrieval of a closure which contains a duplicated directory
/// (DIRECTORY_A, which is an empty directory), once in the root, and once in a subdir.
#[apply(directory_services)]
#[tokio::test]
async fn put_get_foo(directory_service: impl DirectoryService) {
let mut handle = directory_service.put_multiple_start();
handle.put(DIRECTORY_A.clone()).await.unwrap();
handle.put(DIRECTORY_B.clone()).await.unwrap();
handle.put(DIRECTORY_D.clone()).await.unwrap();
let root_digest = handle.close().await.unwrap();
assert_eq!(
DIRECTORY_D.digest(),
root_digest,
"root digest should match"
);
// Ensure we can get the closure back out of the service, and it is returned in a valid order
// (there are multiple valid possibilities)
let retrieved_closure = directory_service
.get_recursive(&DIRECTORY_D.digest())
.collect::<Vec<_>>()
.await;
let valid_closures = [
vec![
Ok(DIRECTORY_D.clone()),
Ok(DIRECTORY_B.clone()),
Ok(DIRECTORY_A.clone()),
],
vec![
Ok(DIRECTORY_D.clone()),
Ok(DIRECTORY_A.clone()),
Ok(DIRECTORY_B.clone()),
],
];
if !valid_closures.contains(&retrieved_closure) {
panic!("invalid closure returned: {:?}", retrieved_closure);
}
}
/// Uploading A, then C (referring to A twice), then B (itself referring to A) should fail during close,
/// as B itself would be left unconnected.
#[apply(directory_services)]
#[tokio::test]
async fn upload_reject_unconnected(directory_service: impl DirectoryService) {
let mut handle = directory_service.put_multiple_start();
handle.put(DIRECTORY_A.clone()).await.unwrap();
handle.put(DIRECTORY_C.clone()).await.unwrap();
handle.put(DIRECTORY_B.clone()).await.unwrap();
assert!(
handle.close().await.is_err(),
"closing handle should fail, as B would be left unconnected"
);
}
/// Uploading a directory that refers to another directory not yet uploaded
/// should fail.
#[apply(directory_services)]
#[tokio::test]
async fn upload_reject_dangling_pointer(directory_service: impl DirectoryService) {
let mut handle = directory_service.put_multiple_start();
// We insert DIRECTORY_A on its own, to ensure the check runs for the
// individual put_multiple session, not across the global DirectoryService
// contents.
directory_service.put(DIRECTORY_A.clone()).await.unwrap();
// DIRECTORY_B refers to DIRECTORY_A, which is not uploaded with this handle.
if handle.put(DIRECTORY_B.clone()).await.is_ok() {
assert!(
handle.close().await.is_err(),
"when succeeding put, close must fail"
)
}
}
/// Try uploading a Directory that refers to a previously-uploaded directory.
/// Both pass their isolated validation, but the size field in the parent is wrong.
/// This should be rejected.
#[apply(directory_services)]
#[tokio::test]
async fn upload_reject_wrong_size(directory_service: impl DirectoryService) {
let wrong_parent_directory = Directory::try_from_iter([(
"foo".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size() + 42, // wrong!
},
)])
.unwrap();
// Now upload both. Ensure it either fails during the second put, or during
// the close.
let mut handle = directory_service.put_multiple_start();
handle.put(DIRECTORY_A.clone()).await.unwrap();
if handle.put(wrong_parent_directory).await.is_ok() {
assert!(
handle.close().await.is_err(),
"when second put succeeds, close must fail"
)
}
}

View file

@ -0,0 +1,48 @@
use crate::directoryservice::{DirectoryService, GRPCDirectoryService};
use crate::proto::directory_service_client::DirectoryServiceClient;
use crate::proto::GRPCDirectoryServiceWrapper;
use crate::{
directoryservice::MemoryDirectoryService,
proto::directory_service_server::DirectoryServiceServer,
};
use hyper_util::rt::TokioIo;
use tonic::transport::{Endpoint, Server, Uri};
/// Constructs and returns a gRPC DirectoryService.
/// The server part is a [MemoryDirectoryService], exposed via the
/// [GRPCDirectoryServiceWrapper], and connected through a DuplexStream.
pub async fn make_grpc_directory_service_client() -> Box<dyn DirectoryService> {
let (left, right) = tokio::io::duplex(64);
// spin up a server, which will only connect once, to the left side.
tokio::spawn(async {
let directory_service =
Box::<MemoryDirectoryService>::default() as Box<dyn DirectoryService>;
let mut server = Server::builder();
let router = server.add_service(DirectoryServiceServer::new(
GRPCDirectoryServiceWrapper::new(directory_service),
));
router
.serve_with_incoming(tokio_stream::once(Ok::<_, std::io::Error>(left)))
.await
});
// Create a client, connecting to the right side. The URI is unused.
let mut maybe_right = Some(right);
Box::new(GRPCDirectoryService::from_client(
"root".into(),
DirectoryServiceClient::new(
Endpoint::try_from("http://[::]:50051")
.unwrap()
.connect_with_connector(tower::service_fn(move |_: Uri| {
let right = maybe_right.take().unwrap();
async move { Ok::<_, std::io::Error>(TokioIo::new(right)) }
}))
.await
.unwrap(),
),
))
}

View file

@ -0,0 +1,175 @@
use crate::{directoryservice::DirectoryService, Error, Node, Path};
use tracing::{instrument, warn};
/// This descends from a (root) node to the given (sub)path, returning the Node
/// at that path, or none, if there's nothing at that path.
#[instrument(skip(directory_service, path), fields(%path))]
pub async fn descend_to<DS>(
directory_service: DS,
root_node: Node,
path: impl AsRef<Path> + std::fmt::Display,
) -> Result<Option<Node>, Error>
where
DS: DirectoryService,
{
let mut parent_node = root_node;
for component in path.as_ref().components_bytes() {
match parent_node {
Node::File { .. } | Node::Symlink { .. } => {
// There's still some path left, but the parent node is no directory.
// This means the path doesn't exist, as we can't reach it.
return Ok(None);
}
Node::Directory { digest, .. } => {
// fetch the linked node from the directory_service.
let directory = directory_service.get(&digest).await?.ok_or_else(|| {
// If we didn't get the directory node that's linked, that's a store inconsistency, bail out!
warn!(directory.digest = %digest, "directory does not exist");
Error::StorageError(format!("directory {} does not exist", digest))
})?;
// look for the component in the [Directory].
if let Some((_child_name, child_node)) = directory
.into_nodes()
.find(|(name, _node)| name.as_ref() == component)
{
// child node found, update prev_node to that and continue.
parent_node = child_node.clone();
} else {
// child node not found means there's no such element inside the directory.
return Ok(None);
};
}
}
}
// We traversed the entire path, so this must be the node.
Ok(Some(parent_node))
}
#[cfg(test)]
mod tests {
use crate::{
directoryservice,
fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST},
Node, PathBuf,
};
use super::descend_to;
#[tokio::test]
async fn test_descend_to() {
let directory_service = directoryservice::from_addr("memory://").await.unwrap();
let mut handle = directory_service.put_multiple_start();
handle
.put(DIRECTORY_WITH_KEEP.clone())
.await
.expect("must succeed");
handle
.put(DIRECTORY_COMPLICATED.clone())
.await
.expect("must succeed");
handle.close().await.expect("must upload");
// construct the node for DIRECTORY_COMPLICATED
let node_directory_complicated = Node::Directory {
digest: DIRECTORY_COMPLICATED.digest(),
size: DIRECTORY_COMPLICATED.size(),
};
// construct the node for DIRECTORY_COMPLICATED
let node_directory_with_keep = Node::Directory {
digest: DIRECTORY_WITH_KEEP.digest(),
size: DIRECTORY_WITH_KEEP.size(),
};
// construct the node for the .keep file
let node_file_keep = Node::File {
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false,
};
// traversal to an empty subpath should return the root node.
{
let resp = descend_to(
&directory_service,
node_directory_complicated.clone(),
"".parse::<PathBuf>().unwrap(),
)
.await
.expect("must succeed");
assert_eq!(Some(node_directory_complicated.clone()), resp);
}
// traversal to `keep` should return the node for DIRECTORY_WITH_KEEP
{
let resp = descend_to(
&directory_service,
node_directory_complicated.clone(),
"keep".parse::<PathBuf>().unwrap(),
)
.await
.expect("must succeed");
assert_eq!(Some(node_directory_with_keep), resp);
}
// traversal to `keep/.keep` should return the node for the .keep file
{
let resp = descend_to(
&directory_service,
node_directory_complicated.clone(),
"keep/.keep".parse::<PathBuf>().unwrap(),
)
.await
.expect("must succeed");
assert_eq!(Some(node_file_keep.clone()), resp);
}
// traversal to `void` should return None (doesn't exist)
{
let resp = descend_to(
&directory_service,
node_directory_complicated.clone(),
"void".parse::<PathBuf>().unwrap(),
)
.await
.expect("must succeed");
assert_eq!(None, resp);
}
// traversal to `v/oid` should return None (doesn't exist)
{
let resp = descend_to(
&directory_service,
node_directory_complicated.clone(),
"v/oid".parse::<PathBuf>().unwrap(),
)
.await
.expect("must succeed");
assert_eq!(None, resp);
}
// traversal to `keep/.keep/404` should return None (the path can't be
// reached, as keep/.keep already is a file)
{
let resp = descend_to(
&directory_service,
node_directory_complicated.clone(),
"keep/.keep/foo".parse::<PathBuf>().unwrap(),
)
.await
.expect("must succeed");
assert_eq!(None, resp);
}
}
}

View file

@ -0,0 +1,75 @@
use super::Directory;
use super::DirectoryService;
use crate::B3Digest;
use crate::Error;
use crate::Node;
use async_stream::try_stream;
use futures::stream::BoxStream;
use std::collections::{HashSet, VecDeque};
use tracing::instrument;
use tracing::warn;
/// Traverses a [Directory] from the root to the children.
///
/// This is mostly BFS, but directories are only returned once.
#[instrument(skip(directory_service))]
pub fn traverse_directory<'a, DS: DirectoryService + 'static>(
directory_service: DS,
root_directory_digest: &B3Digest,
) -> BoxStream<'a, Result<Directory, Error>> {
// The list of all directories that still need to be traversed. The next
// element is picked from the front, new elements are enqueued at the
// back.
let mut worklist_directory_digests: VecDeque<B3Digest> =
VecDeque::from([root_directory_digest.clone()]);
// The list of directory digests already sent to the consumer.
// We omit sending the same directories multiple times.
let mut sent_directory_digests: HashSet<B3Digest> = HashSet::new();
let root_directory_digest = root_directory_digest.clone();
Box::pin(try_stream! {
while let Some(current_directory_digest) = worklist_directory_digests.pop_front() {
let current_directory = match directory_service.get(&current_directory_digest).await.map_err(|e| {
warn!("failed to look up directory");
Error::StorageError(format!(
"unable to look up directory {}: {}",
current_directory_digest, e
))
})? {
// the root node of the requested closure was not found, return an empty list
None if current_directory_digest == root_directory_digest => break,
// if a child directory of the closure is not there, we have an inconsistent store!
None => {
warn!("directory {} does not exist", current_directory_digest);
Err(Error::StorageError(format!(
"directory {} does not exist",
current_directory_digest
)))?;
break;
}
Some(dir) => dir,
};
// We're about to send this directory, so let's avoid sending it again if a
// descendant has it.
sent_directory_digests.insert(current_directory_digest);
// enqueue all child directory digests to the work queue, as
// long as they're not part of the worklist or already sent.
// This panics if the digest looks invalid, it's supposed to be checked first.
for (_, child_directory_node) in current_directory.nodes() {
if let Node::Directory{digest: child_digest, ..} = child_directory_node {
if worklist_directory_digests.contains(child_digest)
|| sent_directory_digests.contains(child_digest)
{
continue;
}
worklist_directory_digests.push_back(child_digest.clone());
}
}
yield current_directory;
}
})
}

142
snix/castore/src/errors.rs Normal file
View file

@ -0,0 +1,142 @@
use bstr::ByteSlice;
use thiserror::Error;
use tokio::task::JoinError;
use tonic::Status;
use crate::{
path::{PathComponent, PathComponentError},
SymlinkTargetError,
};
/// Errors related to communication with the store.
#[derive(Debug, Error, PartialEq)]
pub enum Error {
#[error("invalid request: {0}")]
InvalidRequest(String),
#[error("internal storage error: {0}")]
StorageError(String),
}
/// Errors that occur during construction of [crate::Node]
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum ValidateNodeError {
/// Invalid digest length encountered
#[error("invalid digest length: {0}")]
InvalidDigestLen(usize),
/// Invalid symlink target
#[error("Invalid symlink target: {0}")]
InvalidSymlinkTarget(SymlinkTargetError),
}
impl From<crate::digests::Error> for ValidateNodeError {
fn from(e: crate::digests::Error) -> Self {
match e {
crate::digests::Error::InvalidDigestLen(n) => ValidateNodeError::InvalidDigestLen(n),
}
}
}
/// Errors that can occur when populating [crate::Directory] messages,
/// or parsing [crate::proto::Directory]
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum DirectoryError {
/// Multiple elements with the same name encountered
#[error("{:?} is a duplicate name", .0)]
DuplicateName(PathComponent),
/// Node failed validation
#[error("invalid node with name {}: {:?}", .0.as_bstr(), .1.to_string())]
InvalidNode(bytes::Bytes, ValidateNodeError),
#[error("Total size exceeds u64::MAX")]
SizeOverflow,
/// Invalid name encountered
#[error("Invalid name: {0}")]
InvalidName(PathComponentError),
/// This can occur if a protobuf node with a name is passed where we expect
/// it to be anonymous.
#[error("Name is set when it shouldn't")]
NameInAnonymousNode,
/// Elements are not in sorted order. Can only happen on protos
#[error("{:?} is not sorted", .0.as_bstr())]
WrongSorting(bytes::Bytes),
/// This can only happen if there's an unknown node type (on protos)
#[error("No node set")]
NoNodeSet,
}
impl From<JoinError> for Error {
fn from(value: JoinError) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<Error> for Status {
fn from(value: Error) -> Self {
match value {
Error::InvalidRequest(msg) => Status::invalid_argument(msg),
Error::StorageError(msg) => Status::data_loss(format!("storage error: {}", msg)),
}
}
}
impl From<crate::tonic::Error> for Error {
fn from(value: crate::tonic::Error) -> Self {
Self::StorageError(value.to_string())
}
}
impl From<redb::Error> for Error {
fn from(value: redb::Error) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<redb::DatabaseError> for Error {
fn from(value: redb::DatabaseError) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<redb::TableError> for Error {
fn from(value: redb::TableError) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<redb::TransactionError> for Error {
fn from(value: redb::TransactionError) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<redb::StorageError> for Error {
fn from(value: redb::StorageError) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<redb::CommitError> for Error {
fn from(value: redb::CommitError) -> Self {
Error::StorageError(value.to_string())
}
}
impl From<std::io::Error> for Error {
fn from(value: std::io::Error) -> Self {
if value.kind() == std::io::ErrorKind::InvalidInput {
Error::InvalidRequest(value.to_string())
} else {
Error::StorageError(value.to_string())
}
}
}
// TODO: this should probably go somewhere else?
impl From<Error> for std::io::Error {
fn from(value: Error) -> Self {
match value {
Error::InvalidRequest(msg) => Self::new(std::io::ErrorKind::InvalidInput, msg),
Error::StorageError(msg) => Self::new(std::io::ErrorKind::Other, msg),
}
}
}

View file

@ -0,0 +1,120 @@
use bytes::Bytes;
use std::sync::LazyLock;
use crate::{B3Digest, Directory, Node};
pub const HELLOWORLD_BLOB_CONTENTS: &[u8] = b"Hello World!";
pub const EMPTY_BLOB_CONTENTS: &[u8] = b"";
pub static DUMMY_DIGEST: LazyLock<B3Digest> = LazyLock::new(|| (&[0u8; 32]).into());
pub static DUMMY_DIGEST_2: LazyLock<B3Digest> = LazyLock::new(|| {
let mut u = [0u8; 32];
u[0] = 0x10;
(&u).into()
});
pub static DUMMY_DATA_1: LazyLock<Bytes> = LazyLock::new(|| vec![0x01, 0x02, 0x03].into());
pub static DUMMY_DATA_2: LazyLock<Bytes> = LazyLock::new(|| vec![0x04, 0x05].into());
pub static HELLOWORLD_BLOB_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(HELLOWORLD_BLOB_CONTENTS).as_bytes().into());
pub static EMPTY_BLOB_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(EMPTY_BLOB_CONTENTS).as_bytes().into());
// 2 bytes
pub static BLOB_A: LazyLock<Bytes> = LazyLock::new(|| vec![0x00, 0x01].into());
pub static BLOB_A_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&BLOB_A).as_bytes().into());
// 1MB
pub static BLOB_B: LazyLock<Bytes> =
LazyLock::new(|| (0..255).collect::<Vec<u8>>().repeat(4 * 1024).into());
pub static BLOB_B_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&BLOB_B).as_bytes().into());
// Directories
pub static DIRECTORY_WITH_KEEP: LazyLock<Directory> = LazyLock::new(|| {
Directory::try_from_iter([(
".keep".try_into().unwrap(),
Node::File {
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false,
},
)])
.unwrap()
});
pub static DIRECTORY_COMPLICATED: LazyLock<Directory> = LazyLock::new(|| {
Directory::try_from_iter([
(
"keep".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_WITH_KEEP.digest(),
size: DIRECTORY_WITH_KEEP.size(),
},
),
(
".keep".try_into().unwrap(),
Node::File {
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false,
},
),
(
"aa".try_into().unwrap(),
Node::Symlink {
target: "/nix/store/somewhereelse".try_into().unwrap(),
},
),
])
.unwrap()
});
pub static DIRECTORY_A: LazyLock<Directory> = LazyLock::new(Directory::new);
pub static DIRECTORY_B: LazyLock<Directory> = LazyLock::new(|| {
Directory::try_from_iter([(
"a".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
},
)])
.unwrap()
});
pub static DIRECTORY_C: LazyLock<Directory> = LazyLock::new(|| {
Directory::try_from_iter([
(
"a".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
},
),
(
"a'".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
},
),
])
.unwrap()
});
pub static DIRECTORY_D: LazyLock<Directory> = LazyLock::new(|| {
Directory::try_from_iter([
(
"a".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
},
),
(
"b".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_B.digest(),
size: DIRECTORY_B.size(),
},
),
])
.unwrap()
});

View file

@ -0,0 +1,29 @@
#![allow(clippy::unnecessary_cast)] // libc::S_IFDIR is u32 on Linux and u16 on MacOS
use fuse_backend_rs::abi::fuse_abi::Attr;
/// The [Attr] describing the root
pub const ROOT_FILE_ATTR: Attr = Attr {
ino: fuse_backend_rs::api::filesystem::ROOT_ID,
size: 0,
blksize: 1024,
blocks: 0,
mode: libc::S_IFDIR as u32 | 0o555,
atime: 0,
mtime: 0,
ctime: 0,
atimensec: 0,
mtimensec: 0,
ctimensec: 0,
nlink: 0,
uid: 0,
gid: 0,
rdev: 0,
flags: 0,
#[cfg(target_os = "macos")]
crtime: 0,
#[cfg(target_os = "macos")]
crtimensec: 0,
#[cfg(target_os = "macos")]
padding: 0,
};

View file

@ -0,0 +1,137 @@
use std::{io, path::Path, sync::Arc};
use fuse_backend_rs::{api::filesystem::FileSystem, transport::FuseSession};
use parking_lot::Mutex;
use threadpool::ThreadPool;
use tracing::{error, instrument};
#[cfg(test)]
mod tests;
struct FuseServer<FS>
where
FS: FileSystem + Sync + Send,
{
server: Arc<fuse_backend_rs::api::server::Server<Arc<FS>>>,
channel: fuse_backend_rs::transport::FuseChannel,
}
#[cfg(target_os = "macos")]
const BADFD: libc::c_int = libc::EBADF;
#[cfg(target_os = "linux")]
const BADFD: libc::c_int = libc::EBADFD;
impl<FS> FuseServer<FS>
where
FS: FileSystem + Sync + Send,
{
fn start(&mut self) -> io::Result<()> {
while let Some((reader, writer)) = self
.channel
.get_request()
.map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?
{
if let Err(e) = self
.server
.handle_message(reader, writer.into(), None, None)
{
match e {
// This indicates the session has been shut down.
fuse_backend_rs::Error::EncodeMessage(e) if e.raw_os_error() == Some(BADFD) => {
break;
}
error => {
error!(?error, "failed to handle fuse request");
continue;
}
}
}
}
Ok(())
}
}
/// Starts a [FileSystem] with the specified number of threads, and provides
/// functions to unmount, and wait for it to have completed.
#[derive(Clone)]
pub struct FuseDaemon {
session: Arc<Mutex<FuseSession>>,
threads: Arc<ThreadPool>,
}
impl FuseDaemon {
#[instrument(skip(fs, mountpoint), fields(mountpoint=?mountpoint), err)]
pub fn new<FS, P>(
fs: FS,
mountpoint: P,
num_threads: usize,
allow_other: bool,
) -> Result<Self, io::Error>
where
FS: FileSystem + Sync + Send + 'static,
P: AsRef<Path> + std::fmt::Debug,
{
let server = Arc::new(fuse_backend_rs::api::server::Server::new(Arc::new(fs)));
let mut session = FuseSession::new(mountpoint.as_ref(), "snix-store", "", true)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
#[cfg(target_os = "linux")]
session.set_allow_other(allow_other);
session
.mount()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
// construct a thread pool
let threads = threadpool::Builder::new()
.num_threads(num_threads)
.thread_name("fuse_server".to_string())
.build();
for _ in 0..num_threads {
// for each thread requested, create and start a FuseServer accepting requests.
let mut server = FuseServer {
server: server.clone(),
channel: session
.new_channel()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?,
};
threads.execute(move || {
let _ = server.start();
});
}
Ok(FuseDaemon {
session: Arc::new(Mutex::new(session)),
threads: Arc::new(threads),
})
}
/// Waits for all threads to finish.
#[instrument(skip_all)]
pub fn wait(&self) {
self.threads.join()
}
/// Send the unmount command, and waits for all threads to finish.
#[instrument(skip_all, err)]
pub fn unmount(&self) -> Result<(), io::Error> {
// Send the unmount command.
self.session
.lock()
.umount()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
self.wait();
Ok(())
}
}
impl Drop for FuseDaemon {
fn drop(&mut self) {
if let Err(error) = self.unmount() {
error!(?error, "failed to unmont fuse filesystem")
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,207 @@
use std::{collections::HashMap, sync::Arc};
use super::inodes::{DirectoryInodeData, InodeData};
use crate::B3Digest;
/// InodeTracker keeps track of inodes, stores data being these inodes and deals
/// with inode allocation.
pub struct InodeTracker {
data: HashMap<u64, Arc<InodeData>>,
// lookup table for blobs by their B3Digest
blob_digest_to_inode: HashMap<B3Digest, u64>,
// lookup table for symlinks by their target
symlink_target_to_inode: HashMap<bytes::Bytes, u64>,
// lookup table for directories by their B3Digest.
// Note the corresponding directory may not be present in data yet.
directory_digest_to_inode: HashMap<B3Digest, u64>,
// the next inode to allocate
next_inode: u64,
}
impl Default for InodeTracker {
fn default() -> Self {
Self {
data: Default::default(),
blob_digest_to_inode: Default::default(),
symlink_target_to_inode: Default::default(),
directory_digest_to_inode: Default::default(),
next_inode: 2,
}
}
}
impl InodeTracker {
// Retrieves data for a given inode, if it exists.
pub fn get(&self, ino: u64) -> Option<Arc<InodeData>> {
self.data.get(&ino).cloned()
}
// Replaces data for a given inode.
// Panics if the inode doesn't already exist.
pub fn replace(&mut self, ino: u64, data: Arc<InodeData>) {
if self.data.insert(ino, data).is_none() {
panic!("replace called on unknown inode");
}
}
// Stores data and returns the inode for it.
// In case an inode has already been allocated for the same data, that inode
// is returned, otherwise a new one is allocated.
// In case data is a [InodeData::Directory], inodes for all items are looked
// up
pub fn put(&mut self, data: InodeData) -> u64 {
match data {
InodeData::Regular(ref digest, _, _) => {
match self.blob_digest_to_inode.get(digest) {
Some(found_ino) => {
// We already have it, return the inode.
*found_ino
}
None => self.insert_and_increment(data),
}
}
InodeData::Symlink(ref target) => {
match self.symlink_target_to_inode.get(target) {
Some(found_ino) => {
// We already have it, return the inode.
*found_ino
}
None => self.insert_and_increment(data),
}
}
InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => {
// check the lookup table if the B3Digest is known.
match self.directory_digest_to_inode.get(digest) {
Some(found_ino) => {
// We already have it, return the inode.
*found_ino
}
None => {
// insert and return the inode
self.insert_and_increment(data)
}
}
}
// Inserting [DirectoryInodeData::Populated] doesn't normally happen,
// only via [replace].
InodeData::Directory(DirectoryInodeData::Populated(..)) => {
unreachable!("should never be called with DirectoryInodeData::Populated")
}
}
}
// Inserts the data and returns the inode it was stored at, while
// incrementing next_inode.
fn insert_and_increment(&mut self, data: InodeData) -> u64 {
let ino = self.next_inode;
// insert into lookup tables
match data {
InodeData::Regular(ref digest, _, _) => {
self.blob_digest_to_inode.insert(digest.clone(), ino);
}
InodeData::Symlink(ref target) => {
self.symlink_target_to_inode.insert(target.clone(), ino);
}
InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => {
self.directory_digest_to_inode.insert(digest.clone(), ino);
}
// This is currently not used outside test fixtures.
// Usually a [DirectoryInodeData::Sparse] is inserted and later
// "upgraded" with more data.
// However, as a future optimization, a lookup for a PathInfo could trigger a
// [DirectoryService::get_recursive()] request that "forks into
// background" and prepopulates all Directories in a closure.
InodeData::Directory(DirectoryInodeData::Populated(ref digest, _)) => {
self.directory_digest_to_inode.insert(digest.clone(), ino);
}
}
// Insert data
self.data.insert(ino, Arc::new(data));
// increment inode counter and return old inode.
self.next_inode += 1;
ino
}
}
#[cfg(test)]
mod tests {
use crate::fixtures;
use super::InodeData;
use super::InodeTracker;
/// Getting something non-existent should be none
#[test]
fn get_nonexistent() {
let inode_tracker = InodeTracker::default();
assert!(inode_tracker.get(1).is_none());
}
/// Put of a regular file should allocate a uid, which should be the same when inserting again.
#[test]
fn put_regular() {
let mut inode_tracker = InodeTracker::default();
let f = InodeData::Regular(
fixtures::BLOB_A_DIGEST.clone(),
fixtures::BLOB_A.len() as u64,
false,
);
// put it in
let ino = inode_tracker.put(f.clone());
// a get should return the right data
let data = inode_tracker.get(ino).expect("must be some");
match *data {
InodeData::Regular(ref digest, _, _) => {
assert_eq!(&fixtures::BLOB_A_DIGEST.clone(), digest);
}
InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"),
}
// another put should return the same ino
assert_eq!(ino, inode_tracker.put(f));
// inserting another file should return a different ino
assert_ne!(
ino,
inode_tracker.put(InodeData::Regular(
fixtures::BLOB_B_DIGEST.clone(),
fixtures::BLOB_B.len() as u64,
false,
))
);
}
// Put of a symlink should allocate a uid, which should be the same when inserting again
#[test]
fn put_symlink() {
let mut inode_tracker = InodeTracker::default();
let f = InodeData::Symlink("target".into());
// put it in
let ino = inode_tracker.put(f.clone());
// a get should return the right data
let data = inode_tracker.get(ino).expect("must be some");
match *data {
InodeData::Symlink(ref target) => {
assert_eq!(b"target".to_vec(), *target);
}
InodeData::Regular(..) | InodeData::Directory(..) => panic!("wrong type"),
}
// another put should return the same ino
assert_eq!(ino, inode_tracker.put(f));
// inserting another file should return a different ino
assert_ne!(ino, inode_tracker.put(InodeData::Symlink("target2".into())));
}
}

View file

@ -0,0 +1,89 @@
//! This module contains all the data structures used to track information
//! about inodes, which present snix-castore nodes in a filesystem.
use std::time::Duration;
use crate::{path::PathComponent, B3Digest, Node};
#[derive(Clone, Debug)]
pub enum InodeData {
Regular(B3Digest, u64, bool), // digest, size, executable
Symlink(bytes::Bytes), // target
Directory(DirectoryInodeData), // either [DirectoryInodeData:Sparse] or [DirectoryInodeData:Populated]
}
/// This encodes the two different states of [InodeData::Directory].
/// Either the data still is sparse (we only saw a [castorepb::DirectoryNode],
/// but didn't fetch the [castorepb::Directory] struct yet, or we processed a
/// lookup and did fetch the data.
#[derive(Clone, Debug)]
pub enum DirectoryInodeData {
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, PathComponent, Node)>), // [(child_inode, name, node)]
}
impl InodeData {
/// Constructs a new InodeData by consuming a [Node].
pub fn from_node(node: &Node) -> Self {
match node {
Node::Directory { digest, size } => {
Self::Directory(DirectoryInodeData::Sparse(digest.clone(), *size))
}
Node::File {
digest,
size,
executable,
} => Self::Regular(digest.clone(), *size, *executable),
Node::Symlink { target } => Self::Symlink(target.clone().into()),
}
}
pub fn as_fuse_file_attr(&self, inode: u64) -> fuse_backend_rs::abi::fuse_abi::Attr {
fuse_backend_rs::abi::fuse_abi::Attr {
ino: inode,
// FUTUREWORK: play with this numbers, as it affects read sizes for client applications.
blocks: 1024,
size: match self {
InodeData::Regular(_, size, _) => *size,
InodeData::Symlink(target) => target.len() as u64,
InodeData::Directory(DirectoryInodeData::Sparse(_, size)) => *size,
InodeData::Directory(DirectoryInodeData::Populated(_, ref children)) => {
children.len() as u64
}
},
mode: self.as_fuse_type() | self.mode(),
..Default::default()
}
}
fn mode(&self) -> u32 {
match self {
InodeData::Regular(_, _, false) | InodeData::Symlink(_) => 0o444,
InodeData::Regular(_, _, true) | InodeData::Directory(_) => 0o555,
}
}
pub fn as_fuse_entry(&self, inode: u64) -> fuse_backend_rs::api::filesystem::Entry {
fuse_backend_rs::api::filesystem::Entry {
inode,
attr: self.as_fuse_file_attr(inode).into(),
attr_timeout: Duration::MAX,
entry_timeout: Duration::MAX,
..Default::default()
}
}
/// Returns the u32 fuse type
pub fn as_fuse_type(&self) -> u32 {
#[allow(clippy::let_and_return)]
let ty = match self {
InodeData::Regular(_, _, _) => libc::S_IFREG,
InodeData::Symlink(_) => libc::S_IFLNK,
InodeData::Directory(_) => libc::S_IFDIR,
};
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
#[cfg(target_os = "macos")]
let ty = ty as u32;
ty
}
}

915
snix/castore/src/fs/mod.rs Normal file
View file

@ -0,0 +1,915 @@
mod file_attr;
mod inode_tracker;
mod inodes;
mod root_nodes;
#[cfg(feature = "fuse")]
pub mod fuse;
#[cfg(feature = "virtiofs")]
pub mod virtiofs;
pub use self::root_nodes::RootNodes;
use self::{
file_attr::ROOT_FILE_ATTR,
inode_tracker::InodeTracker,
inodes::{DirectoryInodeData, InodeData},
};
use crate::{
blobservice::{BlobReader, BlobService},
directoryservice::DirectoryService,
path::PathComponent,
B3Digest, Node,
};
use bstr::ByteVec;
use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions};
use fuse_backend_rs::api::filesystem::{
Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID,
};
use futures::StreamExt;
use parking_lot::RwLock;
use std::sync::Mutex;
use std::{
collections::HashMap,
io,
sync::atomic::AtomicU64,
sync::{atomic::Ordering, Arc},
time::Duration,
};
use std::{ffi::CStr, io::Cursor};
use tokio::{
io::{AsyncReadExt, AsyncSeekExt},
sync::mpsc,
};
use tracing::{debug, error, instrument, warn, Instrument as _, Span};
/// This implements a read-only FUSE filesystem for a snix-store
/// with the passed [BlobService], [DirectoryService] and [RootNodes].
///
/// Linux uses inodes in filesystems. When implementing FUSE, most calls are
/// *for* a given inode.
///
/// This means, we need to have a stable mapping of inode numbers to the
/// corresponding store nodes.
///
/// We internally delegate all inode allocation and state keeping to the
/// inode tracker.
/// We store a mapping from currently "explored" names in the root to their
/// inode.
///
/// There's some places where inodes are allocated / data inserted into
/// the inode tracker, if not allocated before already:
/// - Processing a `lookup` request, either in the mount root, or somewhere
/// deeper.
/// - Processing a `readdir` request
///
/// Things pointing to the same contents get the same inodes, irrespective of
/// their own location.
/// This means:
/// - Symlinks with the same target will get the same inode.
/// - Regular/executable files with the same contents will get the same inode
/// - Directories with the same contents will get the same inode.
///
/// Due to the above being valid across the whole store, and considering the
/// merkle structure is a DAG, not a tree, this also means we can't do "bucketed
/// allocation", aka reserve Directory.size inodes for each directory node we
/// explore.
/// Tests for this live in the snix-store crate.
pub struct SnixStoreFs<BS, DS, RN> {
blob_service: BS,
directory_service: DS,
root_nodes_provider: RN,
/// Whether to (try) listing elements in the root.
list_root: bool,
/// Whether to expose blob and directory digests as extended attributes.
show_xattr: bool,
/// This maps a given basename in the root to the inode we allocated for the node.
root_nodes: RwLock<HashMap<PathComponent, u64>>,
/// This keeps track of inodes and data alongside them.
inode_tracker: RwLock<InodeTracker>,
// FUTUREWORK: have a generic container type for dir/file handles and handle
// allocation.
/// Maps from the handle returned from an opendir to
/// This holds all opendir handles (for the root inode)
/// They point to the rx part of the channel producing the listing.
#[allow(clippy::type_complexity)]
dir_handles: RwLock<
HashMap<
u64,
(
Span,
Arc<Mutex<mpsc::Receiver<(usize, Result<(PathComponent, Node), crate::Error>)>>>,
),
>,
>,
next_dir_handle: AtomicU64,
/// This holds all open file handles
#[allow(clippy::type_complexity)]
file_handles: RwLock<HashMap<u64, (Span, Arc<Mutex<Box<dyn BlobReader>>>)>>,
next_file_handle: AtomicU64,
tokio_handle: tokio::runtime::Handle,
}
impl<BS, DS, RN> SnixStoreFs<BS, DS, RN>
where
BS: BlobService + Clone + Send,
DS: DirectoryService + Clone + Send + 'static,
RN: RootNodes + Clone + 'static,
{
pub fn new(
blob_service: BS,
directory_service: DS,
root_nodes_provider: RN,
list_root: bool,
show_xattr: bool,
) -> Self {
Self {
blob_service,
directory_service,
root_nodes_provider,
list_root,
show_xattr,
root_nodes: RwLock::new(HashMap::default()),
inode_tracker: RwLock::new(Default::default()),
dir_handles: RwLock::new(Default::default()),
next_dir_handle: AtomicU64::new(1),
file_handles: RwLock::new(Default::default()),
next_file_handle: AtomicU64::new(1),
tokio_handle: tokio::runtime::Handle::current(),
}
}
/// Retrieves the inode for a given root node basename, if present.
/// This obtains a read lock on self.root_nodes.
fn get_inode_for_root_name(&self, name: &PathComponent) -> Option<u64> {
self.root_nodes.read().get(name).cloned()
}
/// For a given inode, look up the given directory behind it (from
/// self.inode_tracker), and return its children.
/// The inode_tracker MUST know about this inode already, and it MUST point
/// to a [InodeData::Directory].
/// It is ok if it's a [DirectoryInodeData::Sparse] - in that case, a lookup
/// in self.directory_service is performed, and self.inode_tracker is updated with the
/// [DirectoryInodeData::Populated].
#[allow(clippy::type_complexity)]
#[instrument(skip(self), err)]
fn get_directory_children(
&self,
ino: u64,
) -> io::Result<(B3Digest, Vec<(u64, PathComponent, Node)>)> {
let data = self.inode_tracker.read().get(ino).unwrap();
match *data {
// if it's populated already, return children.
InodeData::Directory(DirectoryInodeData::Populated(
ref parent_digest,
ref children,
)) => Ok((parent_digest.clone(), children.clone())),
// if it's sparse, fetch data using directory_service, populate child nodes
// and update it in [self.inode_tracker].
InodeData::Directory(DirectoryInodeData::Sparse(ref parent_digest, _)) => {
let directory = self
.tokio_handle
.block_on({
let directory_service = self.directory_service.clone();
let parent_digest = parent_digest.to_owned();
async move { directory_service.get(&parent_digest).await }
})?
.ok_or_else(|| {
warn!(directory.digest=%parent_digest, "directory not found");
// If the Directory can't be found, this is a hole, bail out.
io::Error::from_raw_os_error(libc::EIO)
})?;
// Turn the retrieved directory into a InodeData::Directory(DirectoryInodeData::Populated(..)),
// allocating inodes for the children on the way.
// FUTUREWORK: there's a bunch of cloning going on here, which we can probably avoid.
let children = {
let mut inode_tracker = self.inode_tracker.write();
let children: Vec<(u64, PathComponent, Node)> = directory
.into_nodes()
.map(|(child_name, child_node)| {
let inode_data = InodeData::from_node(&child_node);
let child_ino = inode_tracker.put(inode_data);
(child_ino, child_name, child_node)
})
.collect();
// replace.
inode_tracker.replace(
ino,
Arc::new(InodeData::Directory(DirectoryInodeData::Populated(
parent_digest.clone(),
children.clone(),
))),
);
children
};
Ok((parent_digest.clone(), children))
}
// if the parent inode was not a directory, this doesn't make sense
InodeData::Regular(..) | InodeData::Symlink(_) => {
Err(io::Error::from_raw_os_error(libc::ENOTDIR))
}
}
}
/// This will turn a lookup request for a name in the root to a ino and
/// [InodeData].
/// It will peek in [self.root_nodes], and then either look it up from
/// [self.inode_tracker],
/// or otherwise fetch from [self.root_nodes], and then insert into
/// [self.inode_tracker].
/// In the case the name can't be found, a libc::ENOENT is returned.
fn name_in_root_to_ino_and_data(
&self,
name: &PathComponent,
) -> io::Result<(u64, Arc<InodeData>)> {
// Look up the inode for that root node.
// If there's one, [self.inode_tracker] MUST also contain the data,
// which we can then return.
if let Some(inode) = self.get_inode_for_root_name(name) {
return Ok((
inode,
self.inode_tracker
.read()
.get(inode)
.expect("must exist")
.to_owned(),
));
}
// We don't have it yet, look it up in [self.root_nodes].
match self.tokio_handle.block_on({
let root_nodes_provider = self.root_nodes_provider.clone();
let name = name.clone();
async move { root_nodes_provider.get_by_basename(&name).await }
}) {
// if there was an error looking up the root node, propagate up an IO error.
Err(_e) => Err(io::Error::from_raw_os_error(libc::EIO)),
// the root node doesn't exist, so the file doesn't exist.
Ok(None) => Err(io::Error::from_raw_os_error(libc::ENOENT)),
// The root node does exist
Ok(Some(root_node)) => {
// Let's check if someone else beat us to updating the inode tracker and
// root_nodes map. This avoids locking inode_tracker for writing.
if let Some(ino) = self.root_nodes.read().get(name) {
return Ok((
*ino,
self.inode_tracker.read().get(*ino).expect("must exist"),
));
}
// Only in case it doesn't, lock [self.root_nodes] and
// [self.inode_tracker] for writing.
let mut root_nodes = self.root_nodes.write();
let mut inode_tracker = self.inode_tracker.write();
// insert the (sparse) inode data and register in
// self.root_nodes.
let inode_data = InodeData::from_node(&root_node);
let ino = inode_tracker.put(inode_data.clone());
root_nodes.insert(name.to_owned(), ino);
Ok((ino, Arc::new(inode_data)))
}
}
}
}
/// Buffer size of the channel providing nodes in the mount root
const ROOT_NODES_BUFFER_SIZE: usize = 16;
const XATTR_NAME_DIRECTORY_DIGEST: &[u8] = b"user.snix.castore.directory.digest";
const XATTR_NAME_BLOB_DIGEST: &[u8] = b"user.snix.castore.blob.digest";
#[cfg(all(feature = "virtiofs", target_os = "linux"))]
impl<BS, DS, RN> fuse_backend_rs::api::filesystem::Layer for SnixStoreFs<BS, DS, RN>
where
BS: BlobService + Clone + Send + 'static,
DS: DirectoryService + Send + Clone + 'static,
RN: RootNodes + Clone + 'static,
{
fn root_inode(&self) -> Self::Inode {
ROOT_ID
}
}
impl<BS, DS, RN> FileSystem for SnixStoreFs<BS, DS, RN>
where
BS: BlobService + Clone + Send + 'static,
DS: DirectoryService + Send + Clone + 'static,
RN: RootNodes + Clone + 'static,
{
type Handle = u64;
type Inode = u64;
fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
let mut opts = FsOptions::empty();
// allow more than one pending read request per file-handle at any time
opts |= FsOptions::ASYNC_READ;
#[cfg(target_os = "linux")]
{
// the filesystem supports readdirplus
opts |= FsOptions::DO_READDIRPLUS;
// issue both readdir and readdirplus depending on the information expected to be required
opts |= FsOptions::READDIRPLUS_AUTO;
// allow concurrent lookup() and readdir() requests for the same directory
opts |= FsOptions::PARALLEL_DIROPS;
// have the kernel cache symlink contents
opts |= FsOptions::CACHE_SYMLINKS;
}
// TODO: figure out what dawrin options make sense.
Ok(opts)
}
#[tracing::instrument(skip_all, fields(rq.inode = inode))]
fn getattr(
&self,
_ctx: &Context,
inode: Self::Inode,
_handle: Option<Self::Handle>,
) -> io::Result<(stat64, Duration)> {
if inode == ROOT_ID {
return Ok((ROOT_FILE_ATTR.into(), Duration::MAX));
}
match self.inode_tracker.read().get(inode) {
None => Err(io::Error::from_raw_os_error(libc::ENOENT)),
Some(inode_data) => {
debug!(inode_data = ?inode_data, "found node");
Ok((inode_data.as_fuse_file_attr(inode).into(), Duration::MAX))
}
}
}
#[tracing::instrument(skip_all, fields(rq.parent_inode = parent, rq.name = ?name))]
fn lookup(
&self,
_ctx: &Context,
parent: Self::Inode,
name: &std::ffi::CStr,
) -> io::Result<fuse_backend_rs::api::filesystem::Entry> {
debug!("lookup");
// convert the CStr to a PathComponent
// If it can't be converted, we definitely don't have anything here.
let name: PathComponent = name.try_into().map_err(|_| std::io::ErrorKind::NotFound)?;
// This goes from a parent inode to a node.
// - If the parent is [ROOT_ID], we need to check
// [self.root_nodes] (fetching from a [RootNode] provider if needed)
// - Otherwise, lookup the parent in [self.inode_tracker] (which must be
// a [InodeData::Directory]), and find the child with that name.
if parent == ROOT_ID {
let (ino, inode_data) = self.name_in_root_to_ino_and_data(&name)?;
debug!(inode_data=?&inode_data, ino=ino, "Some");
return Ok(inode_data.as_fuse_entry(ino));
}
// This is the "lookup for "a" inside inode 42.
// We already know that inode 42 must be a directory.
let (parent_digest, children) = self.get_directory_children(parent)?;
Span::current().record("directory.digest", parent_digest.to_string());
// Search for that name in the list of children and return the FileAttrs.
// in the children, find the one with the desired name.
if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == &name) {
// lookup the child [InodeData] in [self.inode_tracker].
// We know the inodes for children have already been allocated.
let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap();
// Reply with the file attributes for the child.
// For child directories, we still have all data we need to reply.
Ok(child_inode_data.as_fuse_entry(*child_ino))
} else {
// Child not found, return ENOENT.
Err(io::Error::from_raw_os_error(libc::ENOENT))
}
}
#[tracing::instrument(skip_all, fields(rq.inode = inode))]
fn opendir(
&self,
_ctx: &Context,
inode: Self::Inode,
_flags: u32,
) -> io::Result<(Option<Self::Handle>, OpenOptions)> {
// In case opendir on the root is called, we provide the handle, as re-entering that listing is expensive.
// For all other directory inodes we just let readdir take care of it.
if inode == ROOT_ID {
if !self.list_root {
return Err(io::Error::from_raw_os_error(libc::EPERM)); // same error code as ipfs/kubo
}
let root_nodes_provider = self.root_nodes_provider.clone();
let (tx, rx) = mpsc::channel(ROOT_NODES_BUFFER_SIZE);
// This task will run in the background immediately and will exit
// after the stream ends or if we no longer want any more entries.
self.tokio_handle.spawn(
async move {
let mut stream = root_nodes_provider.list().enumerate();
while let Some(e) = stream.next().await {
if tx.send(e).await.is_err() {
// If we get a send error, it means the sync code
// doesn't want any more entries.
break;
}
}
}
// instrument the task with the current span, this is not done by default
.in_current_span(),
);
// Put the rx part into [self.dir_handles].
// TODO: this will overflow after 2**64 operations,
// which is fine for now.
// See https://cl.tvl.fyi/c/depot/+/8834/comment/a6684ce0_d72469d1
// for the discussion on alternatives.
let dh = self.next_dir_handle.fetch_add(1, Ordering::SeqCst);
self.dir_handles
.write()
.insert(dh, (Span::current(), Arc::new(Mutex::new(rx))));
return Ok((Some(dh), OpenOptions::NONSEEKABLE));
}
let mut opts = OpenOptions::empty();
opts |= OpenOptions::KEEP_CACHE;
#[cfg(target_os = "linux")]
{
opts |= OpenOptions::CACHE_DIR;
}
// allow caching this directory contents, don't invalidate on open
Ok((None, opts))
}
#[tracing::instrument(skip_all, fields(rq.inode = inode, rq.handle = handle, rq.offset = offset), parent = self.dir_handles.read().get(&handle).and_then(|x| x.0.id()))]
fn readdir(
&self,
_ctx: &Context,
inode: Self::Inode,
handle: Self::Handle,
_size: u32,
offset: u64,
add_entry: &mut dyn FnMut(fuse_backend_rs::api::filesystem::DirEntry) -> io::Result<usize>,
) -> io::Result<()> {
debug!("readdir");
if inode == ROOT_ID {
if !self.list_root {
return Err(io::Error::from_raw_os_error(libc::EPERM)); // same error code as ipfs/kubo
}
// get the handle from [self.dir_handles]
let (_span, rx) = match self.dir_handles.read().get(&handle) {
Some(rx) => rx.clone(),
None => {
warn!("dir handle {} unknown", handle);
return Err(io::Error::from_raw_os_error(libc::EIO));
}
};
let mut rx = rx
.lock()
.map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?;
while let Some((i, n)) = rx.blocking_recv() {
let (name, node) = n.map_err(|e| {
warn!("failed to retrieve root node: {}", e);
io::Error::from_raw_os_error(libc::EIO)
})?;
let inode_data = InodeData::from_node(&node);
// obtain the inode, or allocate a new one.
let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| {
// insert the (sparse) inode data and register in
// self.root_nodes.
let ino = self.inode_tracker.write().put(inode_data.clone());
self.root_nodes.write().insert(name.clone(), ino);
ino
});
let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry {
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: name.as_ref(),
})?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
break;
}
}
return Ok(());
}
// Non root-node case: lookup the children, or return an error if it's not a directory.
let (parent_digest, children) = self.get_directory_children(inode)?;
Span::current().record("directory.digest", parent_digest.to_string());
for (i, (ino, child_name, child_node)) in
children.into_iter().skip(offset as usize).enumerate()
{
let inode_data = InodeData::from_node(&child_node);
// the second parameter will become the "offset" parameter on the next call.
let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry {
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: child_name.as_ref(),
})?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
break;
}
}
Ok(())
}
#[tracing::instrument(skip_all, fields(rq.inode = inode, rq.handle = handle), parent = self.dir_handles.read().get(&handle).and_then(|x| x.0.id()))]
fn readdirplus(
&self,
_ctx: &Context,
inode: Self::Inode,
handle: Self::Handle,
_size: u32,
offset: u64,
add_entry: &mut dyn FnMut(
fuse_backend_rs::api::filesystem::DirEntry,
fuse_backend_rs::api::filesystem::Entry,
) -> io::Result<usize>,
) -> io::Result<()> {
debug!("readdirplus");
if inode == ROOT_ID {
if !self.list_root {
return Err(io::Error::from_raw_os_error(libc::EPERM)); // same error code as ipfs/kubo
}
// get the handle from [self.dir_handles]
let (_span, rx) = match self.dir_handles.read().get(&handle) {
Some(rx) => rx.clone(),
None => {
warn!("dir handle {} unknown", handle);
return Err(io::Error::from_raw_os_error(libc::EIO));
}
};
let mut rx = rx
.lock()
.map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?;
while let Some((i, n)) = rx.blocking_recv() {
let (name, node) = n.map_err(|e| {
warn!("failed to retrieve root node: {}", e);
io::Error::from_raw_os_error(libc::EPERM)
})?;
let inode_data = InodeData::from_node(&node);
// obtain the inode, or allocate a new one.
let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| {
// insert the (sparse) inode data and register in
// self.root_nodes.
let ino = self.inode_tracker.write().put(inode_data.clone());
self.root_nodes.write().insert(name.clone(), ino);
ino
});
let written = add_entry(
fuse_backend_rs::api::filesystem::DirEntry {
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: name.as_ref(),
},
inode_data.as_fuse_entry(ino),
)?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
break;
}
}
return Ok(());
}
// Non root-node case: lookup the children, or return an error if it's not a directory.
let (parent_digest, children) = self.get_directory_children(inode)?;
Span::current().record("directory.digest", parent_digest.to_string());
for (i, (ino, name, child_node)) in children.into_iter().skip(offset as usize).enumerate() {
let inode_data = InodeData::from_node(&child_node);
// the second parameter will become the "offset" parameter on the next call.
let written = add_entry(
fuse_backend_rs::api::filesystem::DirEntry {
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: name.as_ref(),
},
inode_data.as_fuse_entry(ino),
)?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
break;
}
}
Ok(())
}
#[tracing::instrument(skip_all, fields(rq.inode = inode, rq.handle = handle), parent = self.dir_handles.read().get(&handle).and_then(|x| x.0.id()))]
fn releasedir(
&self,
_ctx: &Context,
inode: Self::Inode,
_flags: u32,
handle: Self::Handle,
) -> io::Result<()> {
if inode == ROOT_ID {
// drop the rx part of the channel.
match self.dir_handles.write().remove(&handle) {
// drop it, which will close it.
Some(rx) => drop(rx),
None => {
warn!("dir handle not found");
}
}
}
Ok(())
}
#[tracing::instrument(skip_all, fields(rq.inode = inode))]
fn open(
&self,
_ctx: &Context,
inode: Self::Inode,
_flags: u32,
_fuse_flags: u32,
) -> io::Result<(Option<Self::Handle>, OpenOptions, Option<u32>)> {
if inode == ROOT_ID {
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
// lookup the inode
match *self.inode_tracker.read().get(inode).unwrap() {
// read is invalid on non-files.
InodeData::Directory(..) | InodeData::Symlink(_) => {
warn!("is directory");
Err(io::Error::from_raw_os_error(libc::EISDIR))
}
InodeData::Regular(ref blob_digest, _blob_size, _) => {
Span::current().record("blob.digest", blob_digest.to_string());
match self.tokio_handle.block_on({
let blob_service = self.blob_service.clone();
let blob_digest = blob_digest.clone();
async move { blob_service.open_read(&blob_digest).await }
}) {
Ok(None) => {
warn!("blob not found");
Err(io::Error::from_raw_os_error(libc::EIO))
}
Err(e) => {
warn!(e=?e, "error opening blob");
Err(io::Error::from_raw_os_error(libc::EIO))
}
Ok(Some(blob_reader)) => {
// get a new file handle
// TODO: this will overflow after 2**64 operations,
// which is fine for now.
// See https://cl.tvl.fyi/c/depot/+/8834/comment/a6684ce0_d72469d1
// for the discussion on alternatives.
let fh = self.next_file_handle.fetch_add(1, Ordering::SeqCst);
self.file_handles
.write()
.insert(fh, (Span::current(), Arc::new(Mutex::new(blob_reader))));
Ok((
Some(fh),
// Don't invalidate the data cache on open.
OpenOptions::KEEP_CACHE,
None,
))
}
}
}
}
}
#[tracing::instrument(skip_all, fields(rq.inode = inode, rq.handle = handle), parent = self.file_handles.read().get(&handle).and_then(|x| x.0.id()))]
fn release(
&self,
_ctx: &Context,
inode: Self::Inode,
_flags: u32,
handle: Self::Handle,
_flush: bool,
_flock_release: bool,
_lock_owner: Option<u64>,
) -> io::Result<()> {
match self.file_handles.write().remove(&handle) {
// drop the blob reader, which will close it.
Some(blob_reader) => drop(blob_reader),
None => {
// These might already be dropped if a read error occured.
warn!("file handle not found");
}
}
Ok(())
}
#[tracing::instrument(skip_all, fields(rq.inode = inode, rq.handle = handle, rq.offset = offset, rq.size = size), parent = self.file_handles.read().get(&handle).and_then(|x| x.0.id()))]
fn read(
&self,
_ctx: &Context,
inode: Self::Inode,
handle: Self::Handle,
w: &mut dyn fuse_backend_rs::api::filesystem::ZeroCopyWriter,
size: u32,
offset: u64,
_lock_owner: Option<u64>,
_flags: u32,
) -> io::Result<usize> {
debug!("read");
// We need to take out the blob reader from self.file_handles, so we can
// interact with it in the separate task.
// On success, we pass it back out of the task, so we can put it back in self.file_handles.
let (_span, blob_reader) = self
.file_handles
.read()
.get(&handle)
.ok_or_else(|| {
warn!("file handle {} unknown", handle);
io::Error::from_raw_os_error(libc::EIO)
})
.cloned()?;
let mut blob_reader = blob_reader
.lock()
.map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?;
let buf = self.tokio_handle.block_on(async move {
// seek to the offset specified, which is relative to the start of the file.
let pos = blob_reader
.seek(io::SeekFrom::Start(offset))
.await
.map_err(|e| {
warn!("failed to seek to offset {}: {}", offset, e);
io::Error::from_raw_os_error(libc::EIO)
})?;
debug_assert_eq!(offset, pos);
// As written in the fuse docs, read should send exactly the number
// of bytes requested except on EOF or error.
let mut buf: Vec<u8> = Vec::with_capacity(size as usize);
// copy things from the internal buffer into buf to fill it till up until size
tokio::io::copy(&mut blob_reader.as_mut().take(size as u64), &mut buf).await?;
Ok::<_, std::io::Error>(buf)
})?;
// We cannot use w.write() here, we're required to call write multiple
// times until we wrote the entirety of the buffer (which is `size`, except on EOF).
let buf_len = buf.len();
let bytes_written = io::copy(&mut Cursor::new(buf), w)?;
if bytes_written != buf_len as u64 {
error!(bytes_written=%bytes_written, "unable to write all of buf to kernel");
return Err(io::Error::from_raw_os_error(libc::EIO));
}
Ok(bytes_written as usize)
}
#[tracing::instrument(skip_all, fields(rq.inode = inode))]
fn readlink(&self, _ctx: &Context, inode: Self::Inode) -> io::Result<Vec<u8>> {
if inode == ROOT_ID {
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
// lookup the inode
match *self.inode_tracker.read().get(inode).unwrap() {
InodeData::Directory(..) | InodeData::Regular(..) => {
Err(io::Error::from_raw_os_error(libc::EINVAL))
}
InodeData::Symlink(ref target) => Ok(target.to_vec()),
}
}
#[tracing::instrument(skip_all, fields(rq.inode = inode, name=?name))]
fn getxattr(
&self,
_ctx: &Context,
inode: Self::Inode,
name: &CStr,
size: u32,
) -> io::Result<GetxattrReply> {
if !self.show_xattr {
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
// Peek at the inode requested, and construct the response.
let digest_str = match *self
.inode_tracker
.read()
.get(inode)
.ok_or_else(|| io::Error::from_raw_os_error(libc::ENODATA))?
{
InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _))
| InodeData::Directory(DirectoryInodeData::Populated(ref digest, _))
if name.to_bytes() == XATTR_NAME_DIRECTORY_DIGEST =>
{
digest.to_string()
}
InodeData::Regular(ref digest, _, _) if name.to_bytes() == XATTR_NAME_BLOB_DIGEST => {
digest.to_string()
}
_ => {
return Err(io::Error::from_raw_os_error(libc::ENODATA));
}
};
if size == 0 {
Ok(GetxattrReply::Count(digest_str.len() as u32))
} else if size < digest_str.len() as u32 {
Err(io::Error::from_raw_os_error(libc::ERANGE))
} else {
Ok(GetxattrReply::Value(digest_str.into_bytes()))
}
}
#[tracing::instrument(skip_all, fields(rq.inode = inode))]
fn listxattr(
&self,
_ctx: &Context,
inode: Self::Inode,
size: u32,
) -> io::Result<ListxattrReply> {
if !self.show_xattr {
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
// determine the (\0-terminated list) to of xattr keys present, depending on the type of the inode.
let xattrs_names = {
let mut out = Vec::new();
if let Some(inode_data) = self.inode_tracker.read().get(inode) {
match *inode_data {
InodeData::Directory(_) => {
out.extend_from_slice(XATTR_NAME_DIRECTORY_DIGEST);
out.push_byte(b'\x00');
}
InodeData::Regular(..) => {
out.extend_from_slice(XATTR_NAME_BLOB_DIGEST);
out.push_byte(b'\x00');
}
_ => {}
}
}
out
};
if size == 0 {
Ok(ListxattrReply::Count(xattrs_names.len() as u32))
} else if size < xattrs_names.len() as u32 {
Err(io::Error::from_raw_os_error(libc::ERANGE))
} else {
Ok(ListxattrReply::Names(xattrs_names.to_vec()))
}
}
}

View file

@ -0,0 +1,39 @@
use std::collections::BTreeMap;
use crate::{path::PathComponent, Error, Node};
use futures::stream::BoxStream;
use tonic::async_trait;
/// Provides an interface for looking up root nodes in snix-castore by given
/// a lookup key (usually the basename), and optionally allow a listing.
#[async_trait]
pub trait RootNodes: Send + Sync {
/// Looks up a root CA node based on the basename of the node in the root
/// directory of the filesystem.
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error>;
/// Lists all root CA nodes in the filesystem, as a tuple of (base)name
/// and Node.
/// An error can be returned in case listing is not allowed.
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>>;
}
#[async_trait]
/// Implements RootNodes for something deref'ing to a BTreeMap of Nodes, where
/// the key is the node name.
impl<T> RootNodes for T
where
T: AsRef<BTreeMap<PathComponent, Node>> + Send + Sync,
{
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error> {
Ok(self.as_ref().get(name).cloned())
}
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>> {
Box::pin(tokio_stream::iter(
self.as_ref()
.iter()
.map(|(name, node)| Ok((name.to_owned(), node.to_owned()))),
))
}
}

View file

@ -0,0 +1,238 @@
use std::{
convert, error, fmt, io,
ops::Deref,
path::Path,
sync::{Arc, MutexGuard, RwLock},
};
use fuse_backend_rs::{
api::{filesystem::FileSystem, server::Server},
transport::{FsCacheReqHandler, Reader, VirtioFsWriter},
};
use tracing::error;
use vhost::vhost_user::{
Listener, SlaveFsCacheReq, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
};
use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringMutex, VringState, VringT};
use virtio_bindings::bindings::virtio_ring::{
VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC,
};
use virtio_queue::QueueT;
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap};
use vmm_sys_util::epoll::EventSet;
const VIRTIO_F_VERSION_1: u32 = 32;
const NUM_QUEUES: usize = 2;
const QUEUE_SIZE: usize = 1024;
#[derive(Debug)]
enum Error {
/// Failed to handle non-input event.
HandleEventNotEpollIn,
/// Failed to handle unknown event.
HandleEventUnknownEvent,
/// Invalid descriptor chain.
InvalidDescriptorChain,
/// Failed to handle filesystem requests.
#[allow(dead_code)]
HandleRequests(fuse_backend_rs::Error),
/// Failed to construct new vhost user daemon.
NewDaemon,
/// Failed to start the vhost user daemon.
StartDaemon,
/// Failed to wait for the vhost user daemon.
WaitDaemon,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_fs_error: {self:?}")
}
}
impl error::Error for Error {}
impl convert::From<Error> for io::Error {
fn from(e: Error) -> Self {
io::Error::new(io::ErrorKind::Other, e)
}
}
struct VhostUserFsBackend<FS>
where
FS: FileSystem + Send + Sync,
{
server: Arc<Server<Arc<FS>>>,
event_idx: bool,
guest_mem: GuestMemoryAtomic<GuestMemoryMmap>,
cache_req: Option<SlaveFsCacheReq>,
}
impl<FS> VhostUserFsBackend<FS>
where
FS: FileSystem + Send + Sync,
{
fn process_queue(&mut self, vring: &mut MutexGuard<VringState>) -> std::io::Result<bool> {
let mut used_descs = false;
while let Some(desc_chain) = vring
.get_queue_mut()
.pop_descriptor_chain(self.guest_mem.memory())
{
let memory = desc_chain.memory();
let reader = Reader::from_descriptor_chain(memory, desc_chain.clone())
.map_err(|_| Error::InvalidDescriptorChain)?;
let writer = VirtioFsWriter::new(memory, desc_chain.clone())
.map_err(|_| Error::InvalidDescriptorChain)?;
self.server
.handle_message(
reader,
writer.into(),
self.cache_req
.as_mut()
.map(|req| req as &mut dyn FsCacheReqHandler),
None,
)
.map_err(Error::HandleRequests)?;
// TODO: Is len 0 correct?
if let Err(error) = vring
.get_queue_mut()
.add_used(memory, desc_chain.head_index(), 0)
{
error!(?error, "failed to add desc back to ring");
}
// TODO: What happens if we error out before here?
used_descs = true;
}
let needs_notification = if self.event_idx {
match vring
.get_queue_mut()
.needs_notification(self.guest_mem.memory().deref())
{
Ok(needs_notification) => needs_notification,
Err(error) => {
error!(?error, "failed to check if queue needs notification");
true
}
}
} else {
true
};
if needs_notification {
if let Err(error) = vring.signal_used_queue() {
error!(?error, "failed to signal used queue");
}
}
Ok(used_descs)
}
}
impl<FS> VhostUserBackendMut<VringMutex> for VhostUserFsBackend<FS>
where
FS: FileSystem + Send + Sync,
{
fn num_queues(&self) -> usize {
NUM_QUEUES
}
fn max_queue_size(&self) -> usize {
QUEUE_SIZE
}
fn features(&self) -> u64 {
1 << VIRTIO_F_VERSION_1
| 1 << VIRTIO_RING_F_INDIRECT_DESC
| 1 << VIRTIO_RING_F_EVENT_IDX
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
}
fn protocol_features(&self) -> VhostUserProtocolFeatures {
VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ
}
fn set_event_idx(&mut self, enabled: bool) {
self.event_idx = enabled;
}
fn update_memory(&mut self, _mem: GuestMemoryAtomic<GuestMemoryMmap>) -> std::io::Result<()> {
// This is what most the vhost user implementations do...
Ok(())
}
fn set_slave_req_fd(&mut self, cache_req: SlaveFsCacheReq) {
self.cache_req = Some(cache_req);
}
fn handle_event(
&mut self,
device_event: u16,
evset: vmm_sys_util::epoll::EventSet,
vrings: &[VringMutex],
_thread_id: usize,
) -> std::io::Result<bool> {
if evset != EventSet::IN {
return Err(Error::HandleEventNotEpollIn.into());
}
let mut queue = match device_event {
// High priority queue
0 => vrings[0].get_mut(),
// Regurlar priority queue
1 => vrings[1].get_mut(),
_ => {
return Err(Error::HandleEventUnknownEvent.into());
}
};
if self.event_idx {
loop {
queue
.get_queue_mut()
.enable_notification(self.guest_mem.memory().deref())
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
if !self.process_queue(&mut queue)? {
break;
}
}
} else {
self.process_queue(&mut queue)?;
}
Ok(false)
}
}
pub fn start_virtiofs_daemon<FS, P>(fs: FS, socket: P) -> io::Result<()>
where
FS: FileSystem + Send + Sync + 'static,
P: AsRef<Path>,
{
let guest_mem = GuestMemoryAtomic::new(GuestMemoryMmap::new());
let server = Arc::new(fuse_backend_rs::api::server::Server::new(Arc::new(fs)));
let backend = Arc::new(RwLock::new(VhostUserFsBackend {
server,
guest_mem: guest_mem.clone(),
event_idx: false,
cache_req: None,
}));
let listener = Listener::new(socket, true).unwrap();
let mut fs_daemon =
VhostUserDaemon::new(String::from("vhost-user-fs-snix-store"), backend, guest_mem)
.map_err(|_| Error::NewDaemon)?;
fs_daemon.start(listener).map_err(|_| Error::StartDaemon)?;
fs_daemon.wait().map_err(|_| Error::WaitDaemon)?;
Ok(())
}

View file

@ -0,0 +1,89 @@
use pin_project_lite::pin_project;
use tokio::io::AsyncRead;
pin_project! {
/// Wraps an existing AsyncRead, and allows querying for the digest of all
/// data read "through" it.
/// The hash function is configurable by type parameter.
pub struct HashingReader<R, H>
where
R: AsyncRead,
H: digest::Digest,
{
#[pin]
inner: R,
hasher: H,
}
}
pub type B3HashingReader<R> = HashingReader<R, blake3::Hasher>;
impl<R, H> HashingReader<R, H>
where
R: AsyncRead,
H: digest::Digest,
{
pub fn from(r: R) -> Self {
Self {
inner: r,
hasher: H::new(),
}
}
/// Return the digest.
pub fn digest(self) -> digest::Output<H> {
self.hasher.finalize()
}
}
impl<R, H> tokio::io::AsyncRead for HashingReader<R, H>
where
R: AsyncRead,
H: digest::Digest,
{
fn poll_read(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
let buf_filled_len_before = buf.filled().len();
let this = self.project();
let ret = this.inner.poll_read(cx, buf);
// write everything new filled into the hasher.
this.hasher.update(&buf.filled()[buf_filled_len_before..]);
ret
}
}
#[cfg(test)]
mod tests {
use std::io::Cursor;
use rstest::rstest;
use crate::fixtures::BLOB_A;
use crate::fixtures::BLOB_A_DIGEST;
use crate::fixtures::BLOB_B;
use crate::fixtures::BLOB_B_DIGEST;
use crate::fixtures::EMPTY_BLOB_DIGEST;
use crate::{B3Digest, B3HashingReader};
#[rstest]
#[case::blob_a(&BLOB_A, &BLOB_A_DIGEST)]
#[case::blob_b(&BLOB_B, &BLOB_B_DIGEST)]
#[case::empty_blob(&[], &EMPTY_BLOB_DIGEST)]
#[tokio::test]
async fn test_b3_hashing_reader(#[case] data: &[u8], #[case] b3_digest: &B3Digest) {
let r = Cursor::new(data);
let mut hr = B3HashingReader::from(r);
tokio::io::copy(&mut hr, &mut tokio::io::sink())
.await
.expect("read must succeed");
assert_eq!(*b3_digest, hr.digest().into());
}
}

View file

@ -0,0 +1,372 @@
//! Imports from an archive (tarballs)
use std::collections::HashMap;
use petgraph::graph::{DiGraph, NodeIndex};
use petgraph::visit::{DfsPostOrder, EdgeRef};
use petgraph::Direction;
use tokio::io::AsyncRead;
use tokio_stream::StreamExt;
use tokio_tar::Archive;
use tracing::{instrument, warn, Level};
use crate::blobservice::BlobService;
use crate::directoryservice::DirectoryService;
use crate::import::{ingest_entries, IngestionEntry, IngestionError};
use crate::Node;
use super::blobs::{self, ConcurrentBlobUploader};
type TarPathBuf = std::path::PathBuf;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("unable to construct stream of entries: {0}")]
Entries(std::io::Error),
#[error("unable to read next entry: {0}")]
NextEntry(std::io::Error),
#[error("unable to read path for entry: {0}")]
PathRead(std::io::Error),
#[error("unable to convert path {0} for entry: {1}")]
PathConvert(TarPathBuf, std::io::Error),
#[error("unable to read size field for {0}: {1}")]
Size(TarPathBuf, std::io::Error),
#[error("unable to read mode field for {0}: {1}")]
Mode(TarPathBuf, std::io::Error),
#[error("unable to read link name field for {0}: {1}")]
LinkName(TarPathBuf, std::io::Error),
#[error("unsupported tar entry {0} type: {1:?}")]
EntryType(TarPathBuf, tokio_tar::EntryType),
#[error("symlink missing target {0}")]
MissingSymlinkTarget(TarPathBuf),
#[error("unexpected number of top level directory entries")]
UnexpectedNumberOfTopLevelEntries,
#[error(transparent)]
BlobUploadError(#[from] blobs::Error),
}
/// Ingests elements from the given tar [`Archive`] into a the passed [`BlobService`] and
/// [`DirectoryService`].
#[instrument(skip_all, ret(level = Level::TRACE), err)]
pub async fn ingest_archive<BS, DS, R>(
blob_service: BS,
directory_service: DS,
mut archive: Archive<R>,
) -> Result<Node, IngestionError<Error>>
where
BS: BlobService + Clone + 'static,
DS: DirectoryService,
R: AsyncRead + Unpin,
{
// Since tarballs can have entries in any arbitrary order, we need to
// buffer all of the directory metadata so we can reorder directory
// contents and entries to meet the requires of the castore.
// In the first phase, collect up all the regular files and symlinks.
let mut nodes = IngestionEntryGraph::new();
let mut blob_uploader = ConcurrentBlobUploader::new(blob_service);
let mut entries_iter = archive.entries().map_err(Error::Entries)?;
while let Some(mut entry) = entries_iter.try_next().await.map_err(Error::NextEntry)? {
let tar_path: TarPathBuf = entry.path().map_err(Error::PathRead)?.into();
// construct a castore PathBuf, which we use in the produced IngestionEntry.
let path = crate::path::PathBuf::from_host_path(tar_path.as_path(), true)
.map_err(|e| Error::PathConvert(tar_path.clone(), e))?;
let header = entry.header();
let entry = match header.entry_type() {
tokio_tar::EntryType::Regular
| tokio_tar::EntryType::GNUSparse
| tokio_tar::EntryType::Continuous => {
let size = header
.size()
.map_err(|e| Error::Size(tar_path.clone(), e))?;
let digest = blob_uploader
.upload(&path, size, &mut entry)
.await
.map_err(Error::BlobUploadError)?;
let executable = entry
.header()
.mode()
.map_err(|e| Error::Mode(tar_path, e))?
& 64
!= 0;
IngestionEntry::Regular {
path,
size,
executable,
digest,
}
}
tokio_tar::EntryType::Symlink => IngestionEntry::Symlink {
target: entry
.link_name()
.map_err(|e| Error::LinkName(tar_path.clone(), e))?
.ok_or_else(|| Error::MissingSymlinkTarget(tar_path.clone()))?
.into_owned()
.into_os_string()
.into_encoded_bytes(),
path,
},
// Push a bogus directory marker so we can make sure this directoy gets
// created. We don't know the digest and size until after reading the full
// tarball.
tokio_tar::EntryType::Directory => IngestionEntry::Dir { path },
tokio_tar::EntryType::XGlobalHeader | tokio_tar::EntryType::XHeader => continue,
entry_type => return Err(Error::EntryType(tar_path, entry_type).into()),
};
nodes.add(entry)?;
}
blob_uploader.join().await.map_err(Error::BlobUploadError)?;
let root_node = ingest_entries(
directory_service,
futures::stream::iter(nodes.finalize()?.into_iter().map(Ok)),
)
.await?;
Ok(root_node)
}
/// Keep track of the directory structure of a file tree being ingested. This is used
/// for ingestion sources which do not provide any ordering or uniqueness guarantees
/// like tarballs.
///
/// If we ingest multiple entries with the same paths and both entries are not directories,
/// the newer entry will replace the latter entry, disconnecting the old node's children
/// from the graph.
///
/// Once all nodes are ingested a call to [IngestionEntryGraph::finalize] will return
/// a list of entries compute by performaing a DFS post order traversal of the graph
/// from the top-level directory entry.
///
/// This expects the directory structure to contain a single top-level directory entry.
/// An error is returned if this is not the case and ingestion will fail.
struct IngestionEntryGraph {
graph: DiGraph<IngestionEntry, ()>,
path_to_index: HashMap<crate::path::PathBuf, NodeIndex>,
root_node: Option<NodeIndex>,
}
impl Default for IngestionEntryGraph {
fn default() -> Self {
Self::new()
}
}
impl IngestionEntryGraph {
/// Creates a new ingestion entry graph.
pub fn new() -> Self {
IngestionEntryGraph {
graph: DiGraph::new(),
path_to_index: HashMap::new(),
root_node: None,
}
}
/// Adds a new entry to the graph. Parent directories are automatically inserted.
/// If a node exists in the graph with the same name as the new entry and both the old
/// and new nodes are not directories, the node is replaced and is disconnected from its
/// children.
pub fn add(&mut self, entry: IngestionEntry) -> Result<NodeIndex, Error> {
let path = entry.path().to_owned();
let index = match self.path_to_index.get(entry.path()) {
Some(&index) => {
// If either the old entry or new entry are not directories, we'll replace the old
// entry.
if !entry.is_dir() || !self.get_node(index).is_dir() {
self.replace_node(index, entry);
}
index
}
None => self.graph.add_node(entry),
};
// for archives, a path with 1 component is the root node
if path.components().count() == 1 {
// We expect archives to contain a single root node, if there is another root node
// entry with a different path name, this is unsupported.
if let Some(root_node) = self.root_node {
if self.get_node(root_node).path() != path.as_ref() {
return Err(Error::UnexpectedNumberOfTopLevelEntries);
}
}
self.root_node = Some(index)
} else if let Some(parent_path) = path.parent() {
// Recursively add the parent node until it hits the root node.
let parent_index = self.add(IngestionEntry::Dir {
path: parent_path.to_owned(),
})?;
// Insert an edge from the parent directory to the child entry.
self.graph.add_edge(parent_index, index, ());
}
self.path_to_index.insert(path, index);
Ok(index)
}
/// Traverses the graph in DFS post order and collects the entries into a [Vec<IngestionEntry>].
///
/// Unreachable parts of the graph are not included in the result.
pub fn finalize(self) -> Result<Vec<IngestionEntry>, Error> {
// There must be a root node.
let Some(root_node_index) = self.root_node else {
return Err(Error::UnexpectedNumberOfTopLevelEntries);
};
// The root node must be a directory.
if !self.get_node(root_node_index).is_dir() {
return Err(Error::UnexpectedNumberOfTopLevelEntries);
}
let mut traversal = DfsPostOrder::new(&self.graph, root_node_index);
let mut nodes = Vec::with_capacity(self.graph.node_count());
while let Some(node_index) = traversal.next(&self.graph) {
nodes.push(self.get_node(node_index).clone());
}
Ok(nodes)
}
/// Replaces the node with the specified entry. The node's children are disconnected.
///
/// This should never be called if both the old and new nodes are directories.
fn replace_node(&mut self, index: NodeIndex, new_entry: IngestionEntry) {
let entry = self
.graph
.node_weight_mut(index)
.expect("Snix bug: missing node entry");
debug_assert!(!(entry.is_dir() && new_entry.is_dir()));
// Replace the node itself.
warn!(
"saw duplicate entry in archive at path {:?}. old: {:?} new: {:?}",
entry.path(),
&entry,
&new_entry
);
*entry = new_entry;
// Remove any outgoing edges to disconnect the old node's children.
let edges = self
.graph
.edges_directed(index, Direction::Outgoing)
.map(|edge| edge.id())
.collect::<Vec<_>>();
for edge in edges {
self.graph.remove_edge(edge);
}
}
fn get_node(&self, index: NodeIndex) -> &IngestionEntry {
self.graph
.node_weight(index)
.expect("Snix bug: missing node entry")
}
}
#[cfg(test)]
mod test {
use std::sync::LazyLock;
use super::{Error, IngestionEntryGraph};
use crate::import::IngestionEntry;
use crate::B3Digest;
use rstest::rstest;
pub static EMPTY_DIGEST: LazyLock<B3Digest> =
LazyLock::new(|| blake3::hash(&[]).as_bytes().into());
pub static DIR_A: LazyLock<IngestionEntry> = LazyLock::new(|| IngestionEntry::Dir {
path: "a".parse().unwrap(),
});
pub static DIR_B: LazyLock<IngestionEntry> = LazyLock::new(|| IngestionEntry::Dir {
path: "b".parse().unwrap(),
});
pub static DIR_A_B: LazyLock<IngestionEntry> = LazyLock::new(|| IngestionEntry::Dir {
path: "a/b".parse().unwrap(),
});
pub static FILE_A: LazyLock<IngestionEntry> = LazyLock::new(|| IngestionEntry::Regular {
path: "a".parse().unwrap(),
size: 0,
executable: false,
digest: EMPTY_DIGEST.clone(),
});
pub static FILE_A_B: LazyLock<IngestionEntry> = LazyLock::new(|| IngestionEntry::Regular {
path: "a/b".parse().unwrap(),
size: 0,
executable: false,
digest: EMPTY_DIGEST.clone(),
});
pub static FILE_A_B_C: LazyLock<IngestionEntry> = LazyLock::new(|| IngestionEntry::Regular {
path: "a/b/c".parse().unwrap(),
size: 0,
executable: false,
digest: EMPTY_DIGEST.clone(),
});
#[rstest]
#[case::implicit_directories(&[&*FILE_A_B_C], &[&*FILE_A_B_C, &*DIR_A_B, &*DIR_A])]
#[case::explicit_directories(&[&*DIR_A, &*DIR_A_B, &*FILE_A_B_C], &[&*FILE_A_B_C, &*DIR_A_B, &*DIR_A])]
#[case::inaccesible_tree(&[&*DIR_A, &*DIR_A_B, &*FILE_A_B], &[&*FILE_A_B, &*DIR_A])]
fn node_ingestion_success(
#[case] in_entries: &[&IngestionEntry],
#[case] exp_entries: &[&IngestionEntry],
) {
let mut nodes = IngestionEntryGraph::new();
for entry in in_entries {
nodes.add((*entry).clone()).expect("failed to add entry");
}
let entries = nodes.finalize().expect("invalid entries");
let exp_entries: Vec<IngestionEntry> =
exp_entries.iter().map(|entry| (*entry).clone()).collect();
assert_eq!(entries, exp_entries);
}
#[rstest]
#[case::no_top_level_entries(&[], Error::UnexpectedNumberOfTopLevelEntries)]
#[case::multiple_top_level_dirs(&[&*DIR_A, &*DIR_B], Error::UnexpectedNumberOfTopLevelEntries)]
#[case::top_level_file_entry(&[&*FILE_A], Error::UnexpectedNumberOfTopLevelEntries)]
fn node_ingestion_error(#[case] in_entries: &[&IngestionEntry], #[case] exp_error: Error) {
let mut nodes = IngestionEntryGraph::new();
let result = (|| {
for entry in in_entries {
nodes.add((*entry).clone())?;
}
nodes.finalize()
})();
let error = result.expect_err("expected error");
assert_eq!(error.to_string(), exp_error.to_string());
}
}

View file

@ -0,0 +1,192 @@
use std::{
io::{Cursor, Write},
sync::Arc,
};
use tokio::{
io::AsyncRead,
sync::Semaphore,
task::{JoinError, JoinSet},
};
use tokio_util::io::InspectReader;
use tracing::{info_span, Instrument};
use crate::{blobservice::BlobService, B3Digest, Path, PathBuf};
/// Files smaller than this threshold, in bytes, are uploaded to the [BlobService] in the
/// background.
///
/// This is a u32 since we acquire a weighted semaphore using the size of the blob.
/// [Semaphore::acquire_many_owned] takes a u32, so we need to ensure the size of
/// the blob can be represented using a u32 and will not cause an overflow.
const CONCURRENT_BLOB_UPLOAD_THRESHOLD: u32 = 1024 * 1024;
/// The maximum amount of bytes allowed to be buffered in memory to perform async blob uploads.
const MAX_BUFFER_SIZE: usize = 128 * 1024 * 1024;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("unable to read blob contents for {0}: {1}")]
BlobRead(PathBuf, std::io::Error),
#[error("unable to check whether blob at {0} already exists: {1}")]
BlobCheck(PathBuf, std::io::Error),
// FUTUREWORK: proper error for blob finalize
#[error("unable to finalize blob {0}: {1}")]
BlobFinalize(PathBuf, std::io::Error),
#[error("unexpected size for {path} wanted: {wanted} got: {got}")]
UnexpectedSize {
path: PathBuf,
wanted: u64,
got: u64,
},
#[error("blob upload join error: {0}")]
JoinError(#[from] JoinError),
}
/// The concurrent blob uploader provides a mechanism for concurrently uploading small blobs.
/// This is useful when ingesting from sources like tarballs and archives which each blob entry
/// must be read sequentially. Ingesting many small blobs sequentially becomes slow due to
/// round trip time with the blob service. The concurrent blob uploader will buffer small
/// blobs in memory and upload them to the blob service in the background.
///
/// Once all blobs have been uploaded, make sure to call [ConcurrentBlobUploader::join] to wait
/// for all background jobs to complete and check for any errors.
pub struct ConcurrentBlobUploader<BS> {
blob_service: BS,
upload_tasks: JoinSet<Result<(), Error>>,
upload_semaphore: Arc<Semaphore>,
}
impl<BS> ConcurrentBlobUploader<BS>
where
BS: BlobService + Clone + 'static,
{
/// Creates a new concurrent blob uploader which uploads blobs to the provided
/// blob service.
pub fn new(blob_service: BS) -> Self {
Self {
blob_service,
upload_tasks: JoinSet::new(),
upload_semaphore: Arc::new(Semaphore::new(MAX_BUFFER_SIZE)),
}
}
/// Uploads a blob to the blob service. If the blob is small enough it will be read to a buffer
/// and uploaded in the background.
/// This will read the entirety of the provided reader unless an error occurs, even if blobs
/// are uploaded in the background..
pub async fn upload<R>(
&mut self,
path: &Path,
expected_size: u64,
mut r: R,
) -> Result<B3Digest, Error>
where
R: AsyncRead + Unpin,
{
if expected_size < CONCURRENT_BLOB_UPLOAD_THRESHOLD as u64 {
let mut buffer = Vec::with_capacity(expected_size as usize);
let mut hasher = blake3::Hasher::new();
let mut reader = InspectReader::new(&mut r, |bytes| {
hasher.write_all(bytes).unwrap();
});
let permit = self
.upload_semaphore
.clone()
// This cast is safe because ensure the header_size is less than
// CONCURRENT_BLOB_UPLOAD_THRESHOLD which is a u32.
.acquire_many_owned(expected_size as u32)
.await
.unwrap();
let size = tokio::io::copy(&mut reader, &mut buffer)
.await
.map_err(|e| Error::BlobRead(path.into(), e))?;
let digest: B3Digest = hasher.finalize().as_bytes().into();
if size != expected_size {
return Err(Error::UnexpectedSize {
path: path.into(),
wanted: expected_size,
got: size,
});
}
self.upload_tasks.spawn({
let blob_service = self.blob_service.clone();
let expected_digest = digest.clone();
let path = path.to_owned();
let r = Cursor::new(buffer);
async move {
// We know the blob digest already, check it exists before sending it.
if blob_service
.has(&expected_digest)
.await
.map_err(|e| Error::BlobCheck(path.clone(), e))?
{
drop(permit);
return Ok(());
}
let digest = upload_blob(&blob_service, &path, expected_size, r).await?;
assert_eq!(digest, expected_digest, "Snix bug: blob digest mismatch");
// Make sure we hold the permit until we finish writing the blob
// to the [BlobService].
drop(permit);
Ok(())
}
.instrument(info_span!("upload_task"))
});
return Ok(digest);
}
upload_blob(&self.blob_service, path, expected_size, r).await
}
/// Waits for all background upload jobs to complete, returning any upload errors.
pub async fn join(mut self) -> Result<(), Error> {
while let Some(result) = self.upload_tasks.join_next().await {
result??;
}
Ok(())
}
}
async fn upload_blob<BS, R>(
blob_service: &BS,
path: &Path,
expected_size: u64,
mut r: R,
) -> Result<B3Digest, Error>
where
BS: BlobService,
R: AsyncRead + Unpin,
{
let mut writer = blob_service.open_write().await;
let size = tokio::io::copy(&mut r, &mut writer)
.await
.map_err(|e| Error::BlobRead(path.into(), e))?;
let digest = writer
.close()
.await
.map_err(|e| Error::BlobFinalize(path.into(), e))?;
if size != expected_size {
return Err(Error::UnexpectedSize {
path: path.into(),
wanted: expected_size,
got: size,
});
}
Ok(digest)
}

View file

@ -0,0 +1,23 @@
use super::PathBuf;
use crate::Error as CastoreError;
/// Represents all error types that emitted by ingest_entries.
/// It can represent errors uploading individual Directories and finalizing
/// the upload.
/// It also contains a generic error kind that'll carry ingestion-method
/// specific errors.
#[derive(Debug, thiserror::Error)]
pub enum IngestionError<E: std::fmt::Display> {
#[error("error from producer: {0}")]
Producer(#[from] E),
#[error("failed to upload directory at {0}: {1}")]
UploadDirectoryError(PathBuf, CastoreError),
#[error("failed to finalize directory upload: {0}")]
FinalizeDirectoryUpload(CastoreError),
#[error("unexpected end of stream")]
UnexpectedEndOfStream,
}

Some files were not shown because too many files have changed in this diff Show more