refactor(server): Use wrapper script to avoid path dependency
Instead of requiring the server component to be made aware of the location of the Nix builder via environment variables, this commit introduces a wrapper script for the builder that can simply exist on the builders $PATH. This is one step towards a slightly nicer out-of-the-box experience when using `nix-build -A nixery-bin`.
This commit is contained in:
parent
819b460278
commit
6d718bf271
7 changed files with 73 additions and 9 deletions
292
tools/nixery/build-image/build-image.nix
Normal file
292
tools/nixery/build-image/build-image.nix
Normal file
|
|
@ -0,0 +1,292 @@
|
|||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This file contains a modified version of dockerTools.buildImage that, instead
|
||||
# of outputting a single tarball which can be imported into a running Docker
|
||||
# daemon, builds a manifest file that can be used for serving the image over a
|
||||
# registry API.
|
||||
|
||||
{
|
||||
# Image Name
|
||||
name,
|
||||
# Image tag, the Nix's output hash will be used if null
|
||||
tag ? null,
|
||||
# Files to put on the image (a nix store path or list of paths).
|
||||
contents ? [],
|
||||
# Packages to install by name (which must refer to top-level attributes of
|
||||
# nixpkgs). This is passed in as a JSON-array in string form.
|
||||
packages ? "[]",
|
||||
# Optional bash script to run on the files prior to fixturizing the layer.
|
||||
extraCommands ? "", uid ? 0, gid ? 0,
|
||||
# Docker's modern image storage mechanisms have a maximum of 125
|
||||
# layers. To allow for some extensibility (via additional layers),
|
||||
# the default here is set to something a little less than that.
|
||||
maxLayers ? 96,
|
||||
|
||||
# Configuration for which package set to use when building.
|
||||
#
|
||||
# Both channels of the public nixpkgs repository as well as imports
|
||||
# from private repositories are supported.
|
||||
#
|
||||
# This setting can be invoked with three different formats:
|
||||
#
|
||||
# 1. nixpkgs!$channel (e.g. nixpkgs!nixos-19.03)
|
||||
# 2. git!$repo!$rev (e.g. git!git@github.com:NixOS/nixpkgs.git!master)
|
||||
# 3. path!$path (e.g. path!/var/local/nixpkgs)
|
||||
#
|
||||
# '!' was chosen as the separator because `builtins.split` does not
|
||||
# support regex escapes and there are few other candidates. It
|
||||
# doesn't matter much because this is invoked by the server.
|
||||
pkgSource ? "nixpkgs!nixos-19.03"
|
||||
}:
|
||||
|
||||
let
|
||||
# If a nixpkgs channel is requested, it is retrieved from Github (as
|
||||
# a tarball) and imported.
|
||||
fetchImportChannel = channel:
|
||||
let url = "https://github.com/NixOS/nixpkgs-channels/archive/${channel}.tar.gz";
|
||||
in import (builtins.fetchTarball url) {};
|
||||
|
||||
# If a git repository is requested, it is retrieved via
|
||||
# builtins.fetchGit which defaults to the git configuration of the
|
||||
# outside environment. This means that user-configured SSH
|
||||
# credentials etc. are going to work as expected.
|
||||
fetchImportGit = url: rev:
|
||||
let
|
||||
# builtins.fetchGit needs to know whether 'rev' is a reference
|
||||
# (e.g. a branch/tag) or a revision (i.e. a commit hash)
|
||||
#
|
||||
# Since this data is being extrapolated from the supplied image
|
||||
# tag, we have to guess if we want to avoid specifying a format.
|
||||
#
|
||||
# There are some additional caveats around whether the default
|
||||
# branch contains the specified revision, which need to be
|
||||
# explained to users.
|
||||
spec = if (builtins.stringLength rev) == 40 then {
|
||||
inherit url rev;
|
||||
} else {
|
||||
inherit url;
|
||||
ref = rev;
|
||||
};
|
||||
in import (builtins.fetchGit spec) {};
|
||||
|
||||
importPath = path: import (builtins.toPath path) {};
|
||||
|
||||
source = builtins.split "!" pkgSource;
|
||||
sourceType = builtins.elemAt source 0;
|
||||
pkgs = with builtins;
|
||||
if sourceType == "nixpkgs"
|
||||
then fetchImportChannel (elemAt source 2)
|
||||
else if sourceType == "git"
|
||||
then fetchImportGit (elemAt source 2) (elemAt source 4)
|
||||
else if sourceType == "path"
|
||||
then importPath (elemAt source 2)
|
||||
else builtins.throw("Invalid package set source specification: ${pkgSource}");
|
||||
in
|
||||
|
||||
# Since this is essentially a re-wrapping of some of the functionality that is
|
||||
# implemented in the dockerTools, we need all of its components in our top-level
|
||||
# namespace.
|
||||
with builtins;
|
||||
with pkgs;
|
||||
with dockerTools;
|
||||
|
||||
let
|
||||
tarLayer = "application/vnd.docker.image.rootfs.diff.tar";
|
||||
baseName = baseNameOf name;
|
||||
|
||||
# deepFetch traverses the top-level Nix package set to retrieve an item via a
|
||||
# path specified in string form.
|
||||
#
|
||||
# For top-level items, the name of the key yields the result directly. Nested
|
||||
# items are fetched by using dot-syntax, as in Nix itself.
|
||||
#
|
||||
# Due to a restriction of the registry API specification it is not possible to
|
||||
# pass uppercase characters in an image name, however the Nix package set
|
||||
# makes use of camelCasing repeatedly (for example for `haskellPackages`).
|
||||
#
|
||||
# To work around this, if no value is found on the top-level a second lookup
|
||||
# is done on the package set using lowercase-names. This is not done for
|
||||
# nested sets, as they often have keys that only differ in case.
|
||||
#
|
||||
# For example, `deepFetch pkgs "xorg.xev"` retrieves `pkgs.xorg.xev` and
|
||||
# `deepFetch haskellpackages.stylish-haskell` retrieves
|
||||
# `haskellPackages.stylish-haskell`.
|
||||
deepFetch = with lib; s: n:
|
||||
let path = splitString "." n;
|
||||
err = { error = "not_found"; pkg = n; };
|
||||
# The most efficient way I've found to do a lookup against
|
||||
# case-differing versions of an attribute is to first construct a
|
||||
# mapping of all lowercased attribute names to their differently cased
|
||||
# equivalents.
|
||||
#
|
||||
# This map is then used for a second lookup if the top-level
|
||||
# (case-sensitive) one does not yield a result.
|
||||
hasUpper = str: (match ".*[A-Z].*" str) != null;
|
||||
allUpperKeys = filter hasUpper (attrNames s);
|
||||
lowercased = listToAttrs (map (k: {
|
||||
name = toLower k;
|
||||
value = k;
|
||||
}) allUpperKeys);
|
||||
caseAmendedPath = map (v: if hasAttr v lowercased then lowercased."${v}" else v) path;
|
||||
fetchLower = attrByPath caseAmendedPath err s;
|
||||
in attrByPath path fetchLower s;
|
||||
|
||||
# allContents is the combination of all derivations and store paths passed in
|
||||
# directly, as well as packages referred to by name.
|
||||
#
|
||||
# It accumulates potential errors about packages that could not be found to
|
||||
# return this information back to the server.
|
||||
allContents =
|
||||
# Folds over the results of 'deepFetch' on all requested packages to
|
||||
# separate them into errors and content. This allows the program to
|
||||
# terminate early and return only the errors if any are encountered.
|
||||
let splitter = attrs: res:
|
||||
if hasAttr "error" res
|
||||
then attrs // { errors = attrs.errors ++ [ res ]; }
|
||||
else attrs // { contents = attrs.contents ++ [ res ]; };
|
||||
init = { inherit contents; errors = []; };
|
||||
fetched = (map (deepFetch pkgs) (fromJSON packages));
|
||||
in foldl' splitter init fetched;
|
||||
|
||||
contentsEnv = symlinkJoin {
|
||||
name = "bulk-layers";
|
||||
paths = allContents.contents;
|
||||
};
|
||||
|
||||
# The image build infrastructure expects to be outputting a slightly different
|
||||
# format than the one we serve over the registry protocol. To work around its
|
||||
# expectations we need to provide an empty JSON file that it can write some
|
||||
# fun data into.
|
||||
emptyJson = writeText "empty.json" "{}";
|
||||
|
||||
bulkLayers = mkManyPureLayers {
|
||||
name = baseName;
|
||||
configJson = emptyJson;
|
||||
closure = writeText "closure" "${contentsEnv} ${emptyJson}";
|
||||
# One layer will be taken up by the customisationLayer, so
|
||||
# take up one less.
|
||||
maxLayers = maxLayers - 1;
|
||||
};
|
||||
|
||||
customisationLayer = mkCustomisationLayer {
|
||||
name = baseName;
|
||||
contents = contentsEnv;
|
||||
baseJson = emptyJson;
|
||||
inherit uid gid extraCommands;
|
||||
};
|
||||
|
||||
# Inspect the returned bulk layers to determine which layers belong to the
|
||||
# image and how to serve them.
|
||||
#
|
||||
# This computes both an MD5 and a SHA256 hash of each layer, which are used
|
||||
# for different purposes. See the registry server implementation for details.
|
||||
#
|
||||
# Some of this logic is copied straight from `buildLayeredImage`.
|
||||
allLayersJson = runCommand "fs-layer-list.json" {
|
||||
buildInputs = [ coreutils findutils jq openssl ];
|
||||
} ''
|
||||
find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
|
||||
echo ${customisationLayer} >> layer-list
|
||||
|
||||
for layer in $(cat layer-list); do
|
||||
layerPath="$layer/layer.tar"
|
||||
layerSha256=$(sha256sum $layerPath | cut -d ' ' -f1)
|
||||
# The server application compares binary MD5 hashes and expects base64
|
||||
# encoding instead of hex.
|
||||
layerMd5=$(openssl dgst -md5 -binary $layerPath | openssl enc -base64)
|
||||
layerSize=$(wc -c $layerPath | cut -d ' ' -f1)
|
||||
|
||||
jq -n -c --arg sha256 $layerSha256 --arg md5 $layerMd5 --arg size $layerSize --arg path $layerPath \
|
||||
'{ size: ($size | tonumber), sha256: $sha256, md5: $md5, path: $path }' >> fs-layers
|
||||
done
|
||||
|
||||
cat fs-layers | jq -s -c '.' > $out
|
||||
'';
|
||||
allLayers = fromJSON (readFile allLayersJson);
|
||||
|
||||
# Image configuration corresponding to the OCI specification for the file type
|
||||
# 'application/vnd.oci.image.config.v1+json'
|
||||
config = {
|
||||
architecture = "amd64";
|
||||
os = "linux";
|
||||
rootfs.type = "layers";
|
||||
rootfs.diff_ids = map (layer: "sha256:${layer.sha256}") allLayers;
|
||||
# Required to let Kubernetes import Nixery images
|
||||
config = {};
|
||||
};
|
||||
configJson = writeText "${baseName}-config.json" (toJSON config);
|
||||
configMetadata = fromJSON (readFile (runCommand "config-meta" {
|
||||
buildInputs = [ jq openssl ];
|
||||
} ''
|
||||
size=$(wc -c ${configJson} | cut -d ' ' -f1)
|
||||
sha256=$(sha256sum ${configJson} | cut -d ' ' -f1)
|
||||
md5=$(openssl dgst -md5 -binary ${configJson} | openssl enc -base64)
|
||||
jq -n -c --arg size $size --arg sha256 $sha256 --arg md5 $md5 \
|
||||
'{ size: ($size | tonumber), sha256: $sha256, md5: $md5 }' \
|
||||
>> $out
|
||||
''));
|
||||
|
||||
# Corresponds to the manifest JSON expected by the Registry API.
|
||||
#
|
||||
# This is Docker's "Image Manifest V2, Schema 2":
|
||||
# https://docs.docker.com/registry/spec/manifest-v2-2/
|
||||
manifest = {
|
||||
schemaVersion = 2;
|
||||
mediaType = "application/vnd.docker.distribution.manifest.v2+json";
|
||||
|
||||
config = {
|
||||
mediaType = "application/vnd.docker.container.image.v1+json";
|
||||
size = configMetadata.size;
|
||||
digest = "sha256:${configMetadata.sha256}";
|
||||
};
|
||||
|
||||
layers = map (layer: {
|
||||
mediaType = tarLayer;
|
||||
digest = "sha256:${layer.sha256}";
|
||||
size = layer.size;
|
||||
}) allLayers;
|
||||
};
|
||||
|
||||
# This structure maps each layer digest to the actual tarball that will need
|
||||
# to be served. It is used by the controller to cache the paths during a pull.
|
||||
layerLocations = {
|
||||
"${configMetadata.sha256}" = {
|
||||
path = configJson;
|
||||
md5 = configMetadata.md5;
|
||||
};
|
||||
} // (listToAttrs (map (layer: {
|
||||
name = "${layer.sha256}";
|
||||
value = {
|
||||
path = layer.path;
|
||||
md5 = layer.md5;
|
||||
};
|
||||
}) allLayers));
|
||||
|
||||
# Final output structure returned to the controller in the case of a
|
||||
# successful build.
|
||||
manifestOutput = {
|
||||
inherit manifest layerLocations;
|
||||
};
|
||||
|
||||
# Output structure returned if errors occured during the build. Currently the
|
||||
# only error type that is returned in a structured way is 'not_found'.
|
||||
errorOutput = {
|
||||
error = "not_found";
|
||||
pkgs = map (err: err.pkg) allContents.errors;
|
||||
};
|
||||
in writeText "manifest-output.json" (if (length allContents.errors) == 0
|
||||
then toJSON manifestOutput
|
||||
else toJSON errorOutput
|
||||
)
|
||||
40
tools/nixery/build-image/default.nix
Normal file
40
tools/nixery/build-image/default.nix
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This file builds the tool used to calculate layer distribution and
|
||||
# moves the files needed to call the Nix builds at runtime in the
|
||||
# correct locations.
|
||||
|
||||
{ buildGoPackage, lib, nix, writeShellScriptBin }:
|
||||
|
||||
let
|
||||
group-layers = buildGoPackage {
|
||||
name = "group-layers";
|
||||
goDeps = ./go-deps.nix;
|
||||
src = ./.;
|
||||
|
||||
goPackagePath = "github.com/google/nixery/group-layers";
|
||||
|
||||
meta = {
|
||||
description = "Tool to group a set of packages into container image layers";
|
||||
license = lib.licenses.asl20;
|
||||
maintainers = [ lib.maintainers.tazjin ];
|
||||
};
|
||||
};
|
||||
|
||||
# Wrapper script which is called by the Nixery server to trigger an
|
||||
# actual image build.
|
||||
in writeShellScriptBin "nixery-build-image" ''
|
||||
exec ${nix}/bin/nix-build --show-trace --no-out-link "$@" ${./build-image.nix}
|
||||
''
|
||||
12
tools/nixery/build-image/go-deps.nix
Normal file
12
tools/nixery/build-image/go-deps.nix
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# This file was generated by https://github.com/kamilchm/go2nix v1.3.0
|
||||
[
|
||||
{
|
||||
goPackagePath = "gonum.org/v1/gonum";
|
||||
fetch = {
|
||||
type = "git";
|
||||
url = "https://github.com/gonum/gonum";
|
||||
rev = "ced62fe5104b907b6c16cb7e575c17b2e62ceddd";
|
||||
sha256 = "1b7q6haabnp53igpmvr6a2414yralhbrldixx4kbxxg1apy8jdjg";
|
||||
};
|
||||
}
|
||||
]
|
||||
352
tools/nixery/build-image/group-layers.go
Normal file
352
tools/nixery/build-image/group-layers.go
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
// This program reads an export reference graph (i.e. a graph representing the
|
||||
// runtime dependencies of a set of derivations) created by Nix and groups them
|
||||
// in a way that is likely to match the grouping for other derivation sets with
|
||||
// overlapping dependencies.
|
||||
//
|
||||
// This is used to determine which derivations to include in which layers of a
|
||||
// container image.
|
||||
//
|
||||
// # Inputs
|
||||
//
|
||||
// * a graph of Nix runtime dependencies, generated via exportReferenceGraph
|
||||
// * a file containing absolute popularity values of packages in the
|
||||
// Nix package set (in the form of a direct reference count)
|
||||
// * a maximum number of layers to allocate for the image (the "layer budget")
|
||||
//
|
||||
// # Algorithm
|
||||
//
|
||||
// It works by first creating a (directed) dependency tree:
|
||||
//
|
||||
// img (root node)
|
||||
// │
|
||||
// ├───> A ─────┐
|
||||
// │ v
|
||||
// ├───> B ───> E
|
||||
// │ ^
|
||||
// ├───> C ─────┘
|
||||
// │ │
|
||||
// │ v
|
||||
// └───> D ───> F
|
||||
// │
|
||||
// └────> G
|
||||
//
|
||||
// Each node (i.e. package) is then visited to determine how important
|
||||
// it is to separate this node into its own layer, specifically:
|
||||
//
|
||||
// 1. Is the node within a certain threshold percentile of absolute
|
||||
// popularity within all of nixpkgs? (e.g. `glibc`, `openssl`)
|
||||
//
|
||||
// 2. Is the node's runtime closure above a threshold size? (e.g. 100MB)
|
||||
//
|
||||
// In either case, a bit is flipped for this node representing each
|
||||
// condition and an edge to it is inserted directly from the image
|
||||
// root, if it does not already exist.
|
||||
//
|
||||
// For the rest of the example we assume 'G' is above the threshold
|
||||
// size and 'E' is popular.
|
||||
//
|
||||
// This tree is then transformed into a dominator tree:
|
||||
//
|
||||
// img
|
||||
// │
|
||||
// ├───> A
|
||||
// ├───> B
|
||||
// ├───> C
|
||||
// ├───> E
|
||||
// ├───> D ───> F
|
||||
// └───> G
|
||||
//
|
||||
// Specifically this means that the paths to A, B, C, E, G, and D
|
||||
// always pass through the root (i.e. are dominated by it), whilst F
|
||||
// is dominated by D (all paths go through it).
|
||||
//
|
||||
// The top-level subtrees are considered as the initially selected
|
||||
// layers.
|
||||
//
|
||||
// If the list of layers fits within the layer budget, it is returned.
|
||||
//
|
||||
// Otherwise, a merge rating is calculated for each layer. This is the
|
||||
// product of the layer's total size and its root node's popularity.
|
||||
//
|
||||
// Layers are then merged in ascending order of merge ratings until
|
||||
// they fit into the layer budget.
|
||||
//
|
||||
// # Threshold values
|
||||
//
|
||||
// Threshold values for the partitioning conditions mentioned above
|
||||
// have not yet been determined, but we will make a good first guess
|
||||
// based on gut feeling and proceed to measure their impact on cache
|
||||
// hits/misses.
|
||||
//
|
||||
// # Example
|
||||
//
|
||||
// Using the logic described above as well as the example presented in
|
||||
// the introduction, this program would create the following layer
|
||||
// groupings (assuming no additional partitioning):
|
||||
//
|
||||
// Layer budget: 1
|
||||
// Layers: { A, B, C, D, E, F, G }
|
||||
//
|
||||
// Layer budget: 2
|
||||
// Layers: { G }, { A, B, C, D, E, F }
|
||||
//
|
||||
// Layer budget: 3
|
||||
// Layers: { G }, { E }, { A, B, C, D, F }
|
||||
//
|
||||
// Layer budget: 4
|
||||
// Layers: { G }, { E }, { D, F }, { A, B, C }
|
||||
//
|
||||
// ...
|
||||
//
|
||||
// Layer budget: 10
|
||||
// Layers: { E }, { D, F }, { A }, { B }, { C }
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"regexp"
|
||||
"sort"
|
||||
|
||||
"gonum.org/v1/gonum/graph/flow"
|
||||
"gonum.org/v1/gonum/graph/simple"
|
||||
)
|
||||
|
||||
// closureGraph represents the structured attributes Nix outputs when asking it
|
||||
// for the exportReferencesGraph of a list of derivations.
|
||||
type exportReferences struct {
|
||||
References struct {
|
||||
Graph []string `json:"graph"`
|
||||
} `json:"exportReferencesGraph"`
|
||||
|
||||
Graph []struct {
|
||||
Size uint64 `json:"closureSize"`
|
||||
Path string `json:"path"`
|
||||
Refs []string `json:"references"`
|
||||
} `json:"graph"`
|
||||
}
|
||||
|
||||
// Popularity data for each Nix package that was calculated in advance.
|
||||
//
|
||||
// Popularity is a number from 1-100 that represents the
|
||||
// popularity percentile in which this package resides inside
|
||||
// of the nixpkgs tree.
|
||||
type pkgsMetadata = map[string]int
|
||||
|
||||
// layer represents the data returned for each layer that Nix should
|
||||
// build for the container image.
|
||||
type layer struct {
|
||||
Contents []string `json:"contents"`
|
||||
mergeRating uint64
|
||||
}
|
||||
|
||||
func (a layer) merge(b layer) layer {
|
||||
a.Contents = append(a.Contents, b.Contents...)
|
||||
a.mergeRating += b.mergeRating
|
||||
return a
|
||||
}
|
||||
|
||||
// closure as pointed to by the graph nodes.
|
||||
type closure struct {
|
||||
GraphID int64
|
||||
Path string
|
||||
Size uint64
|
||||
Refs []string
|
||||
Popularity int
|
||||
}
|
||||
|
||||
func (c *closure) ID() int64 {
|
||||
return c.GraphID
|
||||
}
|
||||
|
||||
var nixRegexp = regexp.MustCompile(`^/nix/store/[a-z0-9]+-`)
|
||||
|
||||
func (c *closure) DOTID() string {
|
||||
return nixRegexp.ReplaceAllString(c.Path, "")
|
||||
}
|
||||
|
||||
// bigOrPopular checks whether this closure should be considered for
|
||||
// separation into its own layer, even if it would otherwise only
|
||||
// appear in a subtree of the dominator tree.
|
||||
func (c *closure) bigOrPopular() bool {
|
||||
const sizeThreshold = 100 * 1000000 // 100MB
|
||||
|
||||
if c.Size > sizeThreshold {
|
||||
return true
|
||||
}
|
||||
|
||||
// The threshold value used here is currently roughly the
|
||||
// minimum number of references that only 1% of packages in
|
||||
// the entire package set have.
|
||||
//
|
||||
// TODO(tazjin): Do this more elegantly by calculating
|
||||
// percentiles for each package and using those instead.
|
||||
if c.Popularity >= 1000 {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func insertEdges(graph *simple.DirectedGraph, cmap *map[string]*closure, node *closure) {
|
||||
// Big or popular nodes get a separate edge from the top to
|
||||
// flag them for their own layer.
|
||||
if node.bigOrPopular() && !graph.HasEdgeFromTo(0, node.ID()) {
|
||||
edge := graph.NewEdge(graph.Node(0), node)
|
||||
graph.SetEdge(edge)
|
||||
}
|
||||
|
||||
for _, c := range node.Refs {
|
||||
// Nix adds a self reference to each node, which
|
||||
// should not be inserted.
|
||||
if c != node.Path {
|
||||
edge := graph.NewEdge(node, (*cmap)[c])
|
||||
graph.SetEdge(edge)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a graph structure from the references supplied by Nix.
|
||||
func buildGraph(refs *exportReferences, pop *pkgsMetadata) *simple.DirectedGraph {
|
||||
cmap := make(map[string]*closure)
|
||||
graph := simple.NewDirectedGraph()
|
||||
|
||||
// Insert all closures into the graph, as well as a fake root
|
||||
// closure which serves as the top of the tree.
|
||||
//
|
||||
// A map from store paths to IDs is kept to actually insert
|
||||
// edges below.
|
||||
root := &closure{
|
||||
GraphID: 0,
|
||||
Path: "image_root",
|
||||
}
|
||||
graph.AddNode(root)
|
||||
|
||||
for idx, c := range refs.Graph {
|
||||
node := &closure{
|
||||
GraphID: int64(idx + 1), // inc because of root node
|
||||
Path: c.Path,
|
||||
Size: c.Size,
|
||||
Refs: c.Refs,
|
||||
}
|
||||
|
||||
if p, ok := (*pop)[node.DOTID()]; ok {
|
||||
node.Popularity = p
|
||||
} else {
|
||||
node.Popularity = 1
|
||||
}
|
||||
|
||||
graph.AddNode(node)
|
||||
cmap[c.Path] = node
|
||||
}
|
||||
|
||||
// Insert the top-level closures with edges from the root
|
||||
// node, then insert all edges for each closure.
|
||||
for _, p := range refs.References.Graph {
|
||||
edge := graph.NewEdge(root, cmap[p])
|
||||
graph.SetEdge(edge)
|
||||
}
|
||||
|
||||
for _, c := range cmap {
|
||||
insertEdges(graph, &cmap, c)
|
||||
}
|
||||
|
||||
return graph
|
||||
}
|
||||
|
||||
// Extracts a subgraph starting at the specified root from the
|
||||
// dominator tree. The subgraph is converted into a flat list of
|
||||
// layers, each containing the store paths and merge rating.
|
||||
func groupLayer(dt *flow.DominatorTree, root *closure) layer {
|
||||
size := root.Size
|
||||
contents := []string{root.Path}
|
||||
children := dt.DominatedBy(root.ID())
|
||||
|
||||
// This iteration does not use 'range' because the list being
|
||||
// iterated is modified during the iteration (yes, I'm sorry).
|
||||
for i := 0; i < len(children); i++ {
|
||||
child := children[i].(*closure)
|
||||
size += child.Size
|
||||
contents = append(contents, child.Path)
|
||||
children = append(children, dt.DominatedBy(child.ID())...)
|
||||
}
|
||||
|
||||
return layer{
|
||||
Contents: contents,
|
||||
// TODO(tazjin): The point of this is to factor in
|
||||
// both the size and the popularity when making merge
|
||||
// decisions, but there might be a smarter way to do
|
||||
// it than a plain multiplication.
|
||||
mergeRating: uint64(root.Popularity) * size,
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the dominator tree of the entire package set and group
|
||||
// each top-level subtree into a layer.
|
||||
//
|
||||
// Layers are merged together until they fit into the layer budget,
|
||||
// based on their merge rating.
|
||||
func dominate(budget int, graph *simple.DirectedGraph) []layer {
|
||||
dt := flow.Dominators(graph.Node(0), graph)
|
||||
|
||||
var layers []layer
|
||||
for _, n := range dt.DominatedBy(dt.Root().ID()) {
|
||||
layers = append(layers, groupLayer(&dt, n.(*closure)))
|
||||
}
|
||||
|
||||
sort.Slice(layers, func(i, j int) bool {
|
||||
return layers[i].mergeRating < layers[j].mergeRating
|
||||
})
|
||||
|
||||
if len(layers) > budget {
|
||||
log.Printf("Ideal image has %v layers, but budget is %v\n", len(layers), budget)
|
||||
}
|
||||
|
||||
for len(layers) > budget {
|
||||
merged := layers[0].merge(layers[1])
|
||||
layers[1] = merged
|
||||
layers = layers[1:]
|
||||
}
|
||||
|
||||
return layers
|
||||
}
|
||||
|
||||
func main() {
|
||||
graphFile := flag.String("graph", ".attrs.json", "Input file containing graph")
|
||||
popFile := flag.String("pop", "popularity.json", "Package popularity data")
|
||||
outFile := flag.String("out", "layers.json", "File to write layers to")
|
||||
layerBudget := flag.Int("budget", 94, "Total layer budget available")
|
||||
flag.Parse()
|
||||
|
||||
// Parse graph data
|
||||
file, err := ioutil.ReadFile(*graphFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to load input: %s\n", err)
|
||||
}
|
||||
|
||||
var refs exportReferences
|
||||
err = json.Unmarshal(file, &refs)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to deserialise input: %s\n", err)
|
||||
}
|
||||
|
||||
// Parse popularity data
|
||||
popBytes, err := ioutil.ReadFile(*popFile)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to load input: %s\n", err)
|
||||
}
|
||||
|
||||
var pop pkgsMetadata
|
||||
err = json.Unmarshal(popBytes, &pop)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to deserialise input: %s\n", err)
|
||||
}
|
||||
|
||||
graph := buildGraph(&refs, &pop)
|
||||
layers := dominate(*layerBudget, graph)
|
||||
|
||||
j, _ := json.Marshal(layers)
|
||||
ioutil.WriteFile(*outFile, j, 0644)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue