If you use munki to serve software to your Macs, and autopkg to automatically fetch new updates, you might end up with a bit of an oversized munki-repo after a while.

This could be due to some automation gone wrong downloading duplicate pkgs without adding pkginfo-files. Or it could be a colleague who hasn’t had their coffee yet munkiimporting something without committing the pkginfo-file or it could be a lot of things!

For these cases you could use a simple Python-script for traversing the pkgs-directory, and checking every pkginfo-file for whether or not that pkg is used anywhere. Such a script could look something like this.

#!/usr/bin/env python
"""
Usage:
  ./find_orphaned_pkgs.py
  ./find_orphaned_pkgs.py | grep -ie libreoffice | xargs rm # Remove all orphaned libreoffice pkgs
  ./find_orphaned_pkgs.py --munki-repo path/to/munki/repo
  ./find_orphaned_pkgs.py --count
"""
import argparse
import os.path as op
import os
import re
import plistlib

def munki_path(full_path):
    return re.search(r'^.+pkgs\/(.+)$', full_path).group(1)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('-m', '--munki-repo', type=str, default='./',
                        help='Path to munki repo. Defaults to current working dir.')
    parser.add_argument('-c', '--count', action='store_true',
                        help='Print total number of orphans.')

    args = parser.parse_args()

    installer_item_location = 'installer_item_location'

    if not op.exists(args.munki_repo):
        raise Exception('No such path')

    dirs = os.listdir(args.munki_repo)

    if not 'pkgs' in dirs and not 'pkgsinfo' in dirs:
        raise Exception('This does not look like a munki repo')

    pkgs = []

    for root, dirs, files in os.walk(op.join(args.munki_repo, 'pkgs/')):
        for _dir in dirs:
            if _dir.endswith('.pkg'):
                pkgs.append(munki_path(op.join(root, _dir)))

        for _file in files:
            if _file.startswith('._') or _file.startswith('.DS_Store'):
                continue

            path = munki_path(op.join(root, _file))

            # Don't react on pkgs where the script is able to traverse sub-files
            if '.pkg' in path and (not path.endswith('.pkg') or path.count('.pkg') != 1):
                continue

            pkgs.append(path)

    installer_items = []

    for root, dirs, files in os.walk(op.join(args.munki_repo, 'pkgsinfo')):
        for pkginfo in files:
            plist = plistlib.readPlist(op.join(root, pkginfo))

            if not installer_item_location in plist.keys():
                continue

            installer_items.append(plist[installer_item_location])

    counter = 0

    for pkg in pkgs:
        if pkg not in installer_items:
            counter += 1
            print op.join(args.munki_repo, 'pkgs', pkg)

    if args.count:
        print 'Found %d orphaned files.' % counter

This can easily be used together with other unix-tools like grep and rm to filter and delete the pkgs you want.

$ ./find_orphaned_pkgs.py | grep -ie spotify
./pkgs/free/Spotify-1.0.7.157.g2a6526f9.dmg
./pkgs/free/Spotify-0.9.14.11.g7e298e37.dmg
./pkgs/free/Spotify-0.9.14.13.gba5645ad.dmg
./pkgs/free/Spotify-0.9.10.21.g22fbdb39.dmg
./pkgs/free/Spotify-1.0.4.89.g22b04e33.dmg
./pkgs/free/Spotify-1.0.47.13.gd8e05b1f.dmg
./pkgs/free/Spotify-1.0.43.123.g80176796__1.dmg
./pkgs/free/Spotify-1.0.9.133.gcedaee38.dmg
./pkgs/free/Spotify-1.0.27.75.gdc223232__2.dmg
./pkgs/free/Spotify-0.9.11.27.g2b1a638c.dmg
./pkgs/free/Spotify-1.0.43.123.g80176796.dmg
./pkgs/free/Spotify-1.0.20.94.g8f8543b3__1.dmg
./pkgs/free/Spotify-1.0.27.75.gdc223232__1.dmg

$ ./find_orphaned_pkgs.py | grep -ie spotify | xargs rm

$ ./find_orphaned_pkgs.py | grep -ie spotify