#!/usr/bin/python # # Copyright (c) 2002 Michal Moskal . # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by Michal Moskal. # 4. Neither the name of the author nor the names of any co-contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY MICHAL MOSKAL AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # import os, rpm, sys, re, getopt default_out_name = "clean-dir" help_str = """ This script when run on directory containing RPM packages (either binary or source) will find all packages that are there in two or more versions and mark all but the most recent one for removal. Additionally it prints warnings when there are binary packages built from different versions of the same source RPM (this is for example the case when some subpackage is removed). List of files to be removed is printed in form of shell script to a file. You can edit and run it. If there are no entries to be removed, file is not written. In case rpm cannot compare two versions of the same package (for example they are equal, or rpm thinks v1 < v2 and v2 < v1) script gives up for this package. Good packaging policy suggests setting Epoch then. Version: $Id: find-duplicates.py,v 1.2 2002/09/05 14:29:35 wrobell Exp $ USAGE: %s [-s skip_regexp] [-o out_file] directory out_file defaults to '%s'. If file name matches skip_regexp it is skipped. """ % (sys.argv[0], default_out_name) class package: def __init__(self, header, filename, is_source): self.name = header[rpm.RPMTAG_NAME] self.epoch = header[rpm.RPMTAG_EPOCH] # rpmmodule stinks and crashes when we send it int instead of string as # epoch so fix it up here if self.epoch != None: self.epoch = "%d" % self.epoch self.version = header[rpm.RPMTAG_VERSION] self.release = header[rpm.RPMTAG_RELEASE] self.filename = filename self.is_source = is_source if not is_source: self.srpm = header[rpm.RPMTAG_SOURCERPM] def evr(self): return (self.epoch, self.version, self.release) def desc(self): return "%s-%s" % (self.name, ("%s:%s-%s" % self.evr())) def msg(s): sys.stdout.write(s) sys.stdout.flush() def find_dup_srpms(packages): srpms = {} name_rx = re.compile(r"^(.*)-[^-]+-[^-]+\.src\.rpm$") for pkg in packages.values(): if pkg.is_source: continue match = name_rx.search(pkg.srpm) if match == None: sys.stderr.write("problems with SRPM name: %s\n" % pkg.srpm) else: srpm_name = match.group(1) if srpms.has_key(srpm_name): pkg_list = srpms[srpm_name] found = 0 for entry in pkg_list: if entry.srpm == pkg.srpm: found = 1 break if not found: pkg_list.append(pkg) else: srpms[srpm_name] = [pkg] for pkg_list in srpms.values(): if len(pkg_list) > 1: sys.stderr.write("Packages come from different versions of the same SRPM:\n") for pkg in pkg_list: sys.stderr.write(" - %s from %s\n" % (pkg.filename, pkg.srpm)) def process(dir, out_name, skip_rx): packages = {} old_list = [] cnt = 0 sys.stderr.write("reading packages... ") for name in os.listdir(dir): cnt += 1 if cnt % 100 == 0: sys.stderr.write("%d... " % cnt) if skip_rx != None and skip_rx.search(name): continue name = dir + "/" + name try: fd = os.open(name, os.O_RDONLY) except OSError: continue try: (header, is_source) = rpm.headerFromPackage(fd) except rpm.error: print >> sys.stderr, "%s is not rpm package... skipping..." % (name) continue os.close(fd) if header != None: pkg = package(header, name, is_source) if not packages.has_key(pkg.name): packages[pkg.name] = pkg else: other_pkg = packages[pkg.name] res1 = rpm.labelCompare(pkg.evr(), other_pkg.evr()) res2 = rpm.labelCompare(other_pkg.evr(), pkg.evr()) if res1 == 1 and res2 == -1: old_list.append(other_pkg.filename) packages[pkg.name] = pkg elif res1 == -1 and res2 == 1: old_list.append(pkg.filename) else: sys.stderr.write("\npackages cannot be compared: %s <-> %s\n" % (pkg.filename, other_pkg.filename)) sys.stderr.write("%d\n" % cnt) old_list.sort() try: os.unlink(out_name) except OSError: pass if len(old_list) == 0: sys.stderr.write("no entries to unlink\n") else: f = open(out_name, "w") f.write("#!/bin/sh\n# autogenerated\n") for name in old_list: f.write("rm -f %s\n" % name) f.write("# EOF\n") f.close() os.chmod(out_name, 0700) sys.stderr.write("saved removal script to %s, %d entries\n" % (out_name, len(old_list))) find_dup_srpms(packages) def usage(): global help_str sys.stderr.write(help_str) sys.exit(1) def main(): global default_out_name skip_rx = None out_name = default_out_name try: opts, args = getopt.getopt(sys.argv[1:], "s:o:") except getopt.GetoptError: usage() for o, v in opts: if o == "-s": skip_rx = v if o == "-o": out_name = v if len(args) != 1: usage() if skip_rx != None: skip_rx = re.compile(skip_rx) process(args[0], out_name, skip_rx) main()