#!/usr/bin/env python import os import sys import re import urllib from tempfile import mkstemp import tarfile import gzip import logging import shutil from difflib import unified_diff from portage.exception import AmbiguousPackageName from gentoolkit.query import Query from BeautifulSoup import BeautifulSoup, SoupStrainer logger = logging.getLogger(__name__) # From portage-janitor def guess_indent_values(before): rindent = -1 indent = -1 tab = False def guess_for_tags(tags): for tag in tags: for i in [0, 2, 4, 6, 8, 12, 16]: if '\n%s<%s' % (' ' * i, tag) in before: return i, False for i in [0, 1, 2]: if '\n%s<%s' % ('\t' * i, tag) in before: return i, True return -1, False rindent, tab = guess_for_tags( ['herd', 'maintainer', 'longdescription', 'use', 'upstream'] ) if rindent == -1: rindent = 2 rindent_str = ('\t' if tab else ' ') * rindent indent, tab = guess_for_tags(['watch', 'name', 'email']) if indent == -1: indent = rindent * 2 if rindent else 4 if rindent and rindent_str == '\t': tab = True indent_str = ('\t' if tab else ' ') * indent return rindent_str, indent_str def get_watch_data(package): deb_url, deb_type = get_deb_url(package.name) if deb_type == "source": return handle_source(deb_url) if deb_type == "diff": return handle_diff(deb_url) def handle_diff(deb_url): _, temp_deb = mkstemp() logger.info(" Downloading debian diff %s...", deb_url) urllib.urlretrieve(deb_url, temp_deb) watch_data = "" fp = gzip.open(temp_deb, 'rb') for line in fp: if re.match(r"\+\+\+ .+?/debian/watch", line): fp.readline() # diff lines, don't care cur_line = fp.readline() while cur_line.startswith("+"): watch_data += cur_line[1:] cur_line = fp.readline() fp.close() os.unlink(temp_deb) return watch_data def handle_source(deb_url): _, temp_deb = mkstemp() temp_dir = os.path.dirname(temp_deb) logger.info(" Downloading debian source %s...", deb_url) urllib.urlretrieve(deb_url, temp_deb) tar = tarfile.open(temp_deb) watch_data = None try: tar.extract("debian/watch", temp_dir) except KeyError: pass else: debian_path = os.path.join(temp_dir, "debian") watch_path = os.path.join(debian_path, "watch") watch_data = open(os.path.join(watch_path)).read() shutil.rmtree(debian_path) os.unlink(temp_deb) return watch_data def get_deb_url(name): deb_url = None deb_type = None while not deb_url: url = "http://packages.debian.org/source/unstable/%s" % name opened = urllib.urlopen(url) content = opened.read() for link in BeautifulSoup(content, parseOnlyThese=SoupStrainer("a")): if re.match("[^\s]+\.debian\.tar\.(?:gz|bz2)", link.text): deb_url = link["href"] deb_type = "source" break if re.match("[^\s]+\.diff\.gz", link.text): deb_url = link["href"] deb_type = "diff" break if not deb_url: logger.error(" Cannot get package from %s" % url) name = raw_input(" Package name in Debian: ") return deb_url, deb_type def patch_metadata(package, watch_data, diff=False): logger.info(" Patching metadata file") metadata_path = package.metadata.metadata_path with open(metadata_path) as fp: original = fp.read() rindent, indent = guess_indent_values(original) data = original # clean watch_data watch_data = "\n".join([line for line in watch_data.split("\n") if not line.startswith("#")]) # comments watch_data = watch_data.replace("\\\n", "") # remove backslashes watch_tags = [] for watch_line in watch_data.split("\n"): # there can be multiple lines watch_line = " ".join(watch_line.split()) # remove extra spaces and \n version_parse = re.match("version=(\d+?)", watch_line) if version_parse: version = version_parse.group(1) continue if not watch_line: # skip empty lines continue # parse watch_line result = re.match( r'(?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)', watch_line ) opts_quote, opts, url = result.groups() opts = opts_quote or opts if opts: # clean opts, skip useless ones valid = ("uversionmangle", "versionmangle", "downloadurlmangle") cleaned_opts = [] for opt in opts.split(","): opt_name, opt_value = opt.split("=", 1) if opt_name in valid: if opt_name == "uversionmangle": opt_name = "versionmangle" cleaned_opts.append('%s="%s"' % (opt_name, opt_value)) opts = " ".join(cleaned_opts) # clean url from useless stuff. Just keep [] url_search = re.search(r"^([^\s]+)(?: ([^\s]*\([^\s]+\)[^\s]*))?", url) url = " ".join([x for x in url_search.groups() if x is not None]) if opts: watch_tag = '%s%s' % \ (indent, version, opts, url) else: watch_tag = '%s%s' % \ (indent, version, url) watch_tags.append(watch_tag) watch_tags = "\n".join(watch_tags) if '' in data: data = data.replace('', '\n%s' % watch_tags, 1) else: rep = '%s\n%s\n%s\n' % \ (rindent, watch_tags, rindent) data = data.replace('', rep, 1) if not diff: return data else: # Generate clean a/category/package/metadata.xml path n = metadata_path.find(package.category) if n != -1: metadata_path = metadata_path[n:] res = unified_diff( original.splitlines(True), data.splitlines(True), fromfile=os.path.join('a/', metadata_path), tofile=os.path.join('b/', metadata_path), ) return "".join([x for x in res]) def process_package(query, diff=False): try: matches = Query(query).smart_find( in_installed=True, in_porttree=True, in_overlay=True, include_masked=True, show_progress=False, no_matches_fatal=False, ) except AmbiguousPackageName: logger.error(" Ambiguous package name") return None if len(matches) == 0: logger.error(" Package not found") return None matches = sorted(matches) package = matches.pop() if '9999' in package.version and len(matches) > 0: package = matches.pop() watch_data = get_watch_data(package) if watch_data is None: logger.error(" No watch file found") else: return patch_metadata(package, watch_data, diff=diff) def main(): import optparse p = optparse.OptionParser( usage="usage: %prog [ [...]]", ) p.add_option('-d', '--diff', action="store_true", dest="diff", default=False, help="Outputs a diff") opts, packages = p.parse_args() logging.basicConfig(stream=sys.stderr, level=logging.INFO, format='%(message)s') for package in packages: logger.info("Processing %s..." % package) result = process_package(package, opts.diff) if result: sys.stdout.write(result) if __name__ == "__main__": main()