Cleans up a whole bunch of things I wanted to get out of the door right away: * depot internal references to //third_party/nixery have been replaced with //tools/nixery * cleaned up files from Github * fixed SPDX & Copyright headers * code formatting and inclusion in //tools/depotfmt checks Change-Id: Iea79f0fdf3aa04f71741d4f4032f88605ae415bb Reviewed-on: https://cl.tvl.fyi/c/depot/+/5486 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su> Autosubmit: tazjin <tazjin@tvl.su>
		
			
				
	
	
		
			280 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			280 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2022 The TVL Contributors
 | |
| // SPDX-License-Identifier: Apache-2.0
 | |
| 
 | |
| // Popcount fetches popularity information for each store path in a
 | |
| // given Nix channel from the upstream binary cache.
 | |
| //
 | |
| // It does this simply by inspecting the narinfo files, rather than
 | |
| // attempting to deal with instantiation of the binary cache.
 | |
| //
 | |
| // This is *significantly* faster than attempting to realise the whole
 | |
| // channel and then calling `nix path-info` on it.
 | |
| //
 | |
| // TODO(tazjin): Persist intermediate results (references for each
 | |
| // store path) to speed up subsequent runs.
 | |
| package main
 | |
| 
 | |
| import (
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"io/ioutil"
 | |
| 	"log"
 | |
| 	"net/http"
 | |
| 	"os"
 | |
| 	"os/exec"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| )
 | |
| 
 | |
| var client http.Client
 | |
| var pathexp = regexp.MustCompile("/nix/store/([a-z0-9]{32})-(.*)$")
 | |
| var refsexp = regexp.MustCompile("(?m:^References: (.*)$)")
 | |
| var refexp = regexp.MustCompile("^([a-z0-9]{32})-(.*)$")
 | |
| 
 | |
| type meta struct {
 | |
| 	name   string
 | |
| 	url    string
 | |
| 	commit string
 | |
| }
 | |
| 
 | |
| type item struct {
 | |
| 	name string
 | |
| 	hash string
 | |
| }
 | |
| 
 | |
| func failOn(err error, msg string) {
 | |
| 	if err != nil {
 | |
| 		log.Fatalf("%s: %s", msg, err)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func channelMetadata(channel string) meta {
 | |
| 	// This needs an HTTP client that does not follow redirects
 | |
| 	// because the channel URL is used explicitly for other
 | |
| 	// downloads.
 | |
| 	c := http.Client{
 | |
| 		CheckRedirect: func(req *http.Request, via []*http.Request) error {
 | |
| 			return http.ErrUseLastResponse
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	resp, err := c.Get(fmt.Sprintf("https://channels.nixos.org/%s", channel))
 | |
| 	failOn(err, "failed to retrieve channel metadata")
 | |
| 
 | |
| 	loc, err := resp.Location()
 | |
| 	failOn(err, "no redirect location given for channel")
 | |
| 
 | |
| 	// TODO(tazjin): These redirects are currently served as 301s, but
 | |
| 	// should (and used to) be 302s. Check if/when this is fixed and
 | |
| 	// update accordingly.
 | |
| 	if !(resp.StatusCode == 301 || resp.StatusCode == 302) {
 | |
| 		log.Fatalf("Expected redirect for channel, but received '%s'\n", resp.Status)
 | |
| 	}
 | |
| 
 | |
| 	commitResp, err := c.Get(fmt.Sprintf("%s/git-revision", loc.String()))
 | |
| 	failOn(err, "failed to retrieve commit for channel")
 | |
| 
 | |
| 	defer commitResp.Body.Close()
 | |
| 	commit, err := ioutil.ReadAll(commitResp.Body)
 | |
| 	failOn(err, "failed to read commit from response")
 | |
| 	if commitResp.StatusCode != 200 {
 | |
| 		log.Fatalf("non-success status code when fetching commit: %s (%v)", string(commit), commitResp.StatusCode)
 | |
| 	}
 | |
| 
 | |
| 	return meta{
 | |
| 		name:   channel,
 | |
| 		url:    loc.String(),
 | |
| 		commit: string(commit),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func downloadStorePaths(c *meta) []string {
 | |
| 	resp, err := client.Get(fmt.Sprintf("%s/store-paths.xz", c.url))
 | |
| 	failOn(err, "failed to download store-paths.xz")
 | |
| 	defer resp.Body.Close()
 | |
| 
 | |
| 	cmd := exec.Command("xzcat")
 | |
| 	stdin, err := cmd.StdinPipe()
 | |
| 	failOn(err, "failed to open xzcat stdin")
 | |
| 	stdout, err := cmd.StdoutPipe()
 | |
| 	failOn(err, "failed to open xzcat stdout")
 | |
| 	defer stdout.Close()
 | |
| 
 | |
| 	go func() {
 | |
| 		defer stdin.Close()
 | |
| 		io.Copy(stdin, resp.Body)
 | |
| 	}()
 | |
| 
 | |
| 	err = cmd.Start()
 | |
| 	failOn(err, "failed to start xzcat")
 | |
| 
 | |
| 	paths, err := ioutil.ReadAll(stdout)
 | |
| 	failOn(err, "failed to read uncompressed store paths")
 | |
| 
 | |
| 	err = cmd.Wait()
 | |
| 	failOn(err, "xzcat failed to decompress")
 | |
| 
 | |
| 	return strings.Split(string(paths), "\n")
 | |
| }
 | |
| 
 | |
| func storePathToItem(path string) *item {
 | |
| 	res := pathexp.FindStringSubmatch(path)
 | |
| 	if len(res) != 3 {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	return &item{
 | |
| 		hash: res[1],
 | |
| 		name: res[2],
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func narInfoToRefs(narinfo string) []string {
 | |
| 	all := refsexp.FindAllStringSubmatch(narinfo, 1)
 | |
| 
 | |
| 	if len(all) != 1 {
 | |
| 		log.Fatalf("failed to parse narinfo:\n%s\nfound: %v\n", narinfo, all[0])
 | |
| 	}
 | |
| 
 | |
| 	if len(all[0]) != 2 {
 | |
| 		// no references found
 | |
| 		return []string{}
 | |
| 	}
 | |
| 
 | |
| 	refs := strings.Split(all[0][1], " ")
 | |
| 	for i, s := range refs {
 | |
| 		if s == "" {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		res := refexp.FindStringSubmatch(s)
 | |
| 		refs[i] = res[2]
 | |
| 	}
 | |
| 
 | |
| 	return refs
 | |
| }
 | |
| 
 | |
| func fetchNarInfo(i *item) (string, error) {
 | |
| 	file, err := ioutil.ReadFile("popcache/" + i.hash)
 | |
| 	if err == nil {
 | |
| 		return string(file), nil
 | |
| 	}
 | |
| 
 | |
| 	resp, err := client.Get(fmt.Sprintf("https://cache.nixos.org/%s.narinfo", i.hash))
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	defer resp.Body.Close()
 | |
| 
 | |
| 	narinfo, err := ioutil.ReadAll(resp.Body)
 | |
| 
 | |
| 	// best-effort write the file to the cache
 | |
| 	ioutil.WriteFile("popcache/"+i.hash, narinfo, 0644)
 | |
| 
 | |
| 	return string(narinfo), err
 | |
| }
 | |
| 
 | |
| // downloader starts a worker that takes care of downloading narinfos
 | |
| // for all paths received from the queue.
 | |
| //
 | |
| // If there is no data remaining in the queue, the downloader exits
 | |
| // and informs the finaliser queue about having exited.
 | |
| func downloader(queue chan *item, narinfos chan string, downloaders chan struct{}) {
 | |
| 	for i := range queue {
 | |
| 		ni, err := fetchNarInfo(i)
 | |
| 		if err != nil {
 | |
| 			log.Printf("couldn't fetch narinfo for %s: %s\n", i.name, err)
 | |
| 			continue
 | |
| 
 | |
| 		}
 | |
| 		narinfos <- ni
 | |
| 	}
 | |
| 	downloaders <- struct{}{}
 | |
| }
 | |
| 
 | |
| // finaliser counts the number of downloaders that have exited and
 | |
| // closes the narinfos queue to signal to the counters that no more
 | |
| // elements will arrive.
 | |
| func finaliser(count int, downloaders chan struct{}, narinfos chan string) {
 | |
| 	for range downloaders {
 | |
| 		count--
 | |
| 		if count == 0 {
 | |
| 			close(downloaders)
 | |
| 			close(narinfos)
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func main() {
 | |
| 	if len(os.Args) == 1 {
 | |
| 		log.Fatalf("Nix channel must be specified as first argument")
 | |
| 	}
 | |
| 
 | |
| 	err := os.MkdirAll("popcache", 0755)
 | |
| 	if err != nil {
 | |
| 		log.Fatalf("Failed to create 'popcache' directory in current folder: %s\n", err)
 | |
| 	}
 | |
| 
 | |
| 	count := 42 // concurrent downloader count
 | |
| 	channel := os.Args[1]
 | |
| 	log.Printf("Fetching metadata for channel '%s'\n", channel)
 | |
| 
 | |
| 	meta := channelMetadata(channel)
 | |
| 	log.Printf("Pinned channel '%s' to commit '%s'\n", meta.name, meta.commit)
 | |
| 
 | |
| 	paths := downloadStorePaths(&meta)
 | |
| 	log.Printf("Fetching references for %d store paths\n", len(paths))
 | |
| 
 | |
| 	// Download paths concurrently and receive their narinfos into
 | |
| 	// a channel. Data is collated centrally into a map and
 | |
| 	// serialised at the /very/ end.
 | |
| 	downloadQueue := make(chan *item, len(paths))
 | |
| 	for _, p := range paths {
 | |
| 		if i := storePathToItem(p); i != nil {
 | |
| 			downloadQueue <- i
 | |
| 		}
 | |
| 	}
 | |
| 	close(downloadQueue)
 | |
| 
 | |
| 	// Set up a task tracking channel for parsing & counting
 | |
| 	// narinfos, as well as a coordination channel for signaling
 | |
| 	// that all downloads have finished
 | |
| 	narinfos := make(chan string, 50)
 | |
| 	downloaders := make(chan struct{}, count)
 | |
| 	for i := 0; i < count; i++ {
 | |
| 		go downloader(downloadQueue, narinfos, downloaders)
 | |
| 	}
 | |
| 
 | |
| 	go finaliser(count, downloaders, narinfos)
 | |
| 
 | |
| 	counts := make(map[string]int)
 | |
| 	for ni := range narinfos {
 | |
| 		refs := narInfoToRefs(ni)
 | |
| 		for _, ref := range refs {
 | |
| 			if ref == "" {
 | |
| 				continue
 | |
| 			}
 | |
| 
 | |
| 			counts[ref] += 1
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Remove all self-references (i.e. packages not referenced by anyone else)
 | |
| 	for k, v := range counts {
 | |
| 		if v == 1 {
 | |
| 			delete(counts, k)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	bytes, _ := json.Marshal(counts)
 | |
| 	outfile := fmt.Sprintf("popularity-%s-%s.json", meta.name, meta.commit)
 | |
| 	err = ioutil.WriteFile(outfile, bytes, 0644)
 | |
| 	if err != nil {
 | |
| 		log.Fatalf("Failed to write output to '%s': %s\n", outfile, err)
 | |
| 	}
 | |
| 
 | |
| 	log.Printf("Wrote output to '%s'\n", outfile)
 | |
| }
 |