diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..1ce22c2 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,8 @@ +* euscan + Original author: Corentin Chary + Current maintainer: Corentin Chary + +* euscanwww + Original author: Corentin Chary + Current maintainer: Corentin Chary + diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..60549be --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..cf3139e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include AUTHORS +include COPYING +include NEWS +include README +include TODO +include setup.py +recursive-include bin * +recursive-include man * +recursive-include pym *.py diff --git a/README b/README new file mode 100644 index 0000000..5c785d3 --- /dev/null +++ b/README @@ -0,0 +1,46 @@ +Package: gentoolkit/gentoolkit-dev +Authors: Aron Griffis + Brandon Low + Ian Leitch + Karl Trygve Kalleberg + Marius Mauch + Paul Varner + See src//AUTHORS for tool-specific authors + +MOTIVATION + +The gentoolkit and gentoolkit-dev packages contain a collection of useful +administration scripts particular to the Gentoo Linux distribution. It contains +rough drafts and implementations of features that may in time make it into +Portage, or into full-fledged tools in their own right. + +The gentoolkit-dev package is intended primarily for Gentoo developers. + +CONTENTS + +gentoolkit +========== +eclean - tool to clean up outdated distfiles and packages +equery - replacement for etcat and qpkg +etcat - extracts auxillary information from portage (deprecated) +euse - tool to manage USE flags +glsa-check - tool to manage GLSA's (Gentoo Linux Security Advisory) +qpkg - convient package query tool (deprecated) +revdep-rebuild - scans/fixes broken shared libs and binaries + +gentoolkit-dev +============== +ebump - Ebuild revision bumper +echangelog - update portage ChangeLogs +ego - +ekeyword - modify package KEYWORDS +epkgmove - tool for moving and renaming packages in CVS +eviewcvs - generate viewcvs URLs +gensync - Overlay Sync Tool + +IMPROVEMENTS + +Any suggestions for improvements should be sent to tools-portage@gentoo.org, or +added as a bug assigned to us. + +We only accept new contributions if they are written in bash or python. diff --git a/THANKS b/THANKS new file mode 100644 index 0000000..e69de29 diff --git a/TODO b/TODO index b678a01..f7d4993 100644 --- a/TODO +++ b/TODO @@ -14,8 +14,6 @@ euscan Site Handlers ------------- -- python: PyPi -- PHP: PECL / PEAR - ftp.kde.org: doesn't scan the "unstable" tree - mysql: should use http://downloads.mysql.com/archives/ - mariadb: should use http://downloads.askmonty.org/MariaDB/+releases/ diff --git a/bin/euscan b/bin/euscan new file mode 100755 index 0000000..86c31f0 --- /dev/null +++ b/bin/euscan @@ -0,0 +1,220 @@ +#!/usr/bin/python + +"""Copyright 2011 Gentoo Foundation +Distributed under the terms of the GNU General Public License v2 +""" + +from __future__ import print_function + +""" Meta """ +__author__ = "Corentin Chary (iksaif)" +__email__ = "corentin.chary@gmail.com" +__version__ = "git" +__productname__ = "euscan" +__description__ = "A tool to detect new upstream releases." + +""" Imports """ + +import os +import sys +import getopt +import errno + +from portage.output import white, yellow, turquoise, green, EOutput + +import gentoolkit.pprinter as pp +from gentoolkit.eclean.search import (port_settings) + +from euscan import CONFIG, output +from euscan.scan import scan_upstream + +""" Globals """ + +def setupSignals(): + """ This block ensures that ^C interrupts are handled quietly. """ + import signal + + def exithandler(signum,frame): + signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + print () + sys.exit(errno.EINTR) + + signal.signal(signal.SIGINT, exithandler) + signal.signal(signal.SIGTERM, exithandler) + signal.signal(signal.SIGPIPE, signal.SIG_DFL) + + +def printVersion(): + """Output the version info.""" + print( "%s (%s) - %s" \ + % (__productname__, __version__, __description__)) + print() + print("Author: %s <%s>" % (__author__,__email__)) + print("Copyright 2011 Gentoo Foundation") + print("Distributed under the terms of the GNU General Public License v2") + + +def printUsage(_error=None, help=None): + """Print help message. May also print partial help to stderr if an + error from {'options'} is specified.""" + + out = sys.stdout + if _error: + out = sys.stderr + if not _error in ('global-options', 'packages',): + _error = None + if not _error and not help: help = 'all' + if _error in ('global-options',): + print( pp.error("Wrong option on command line."), file=out) + print( file=out) + if _error in ('packages',): + print( pp.error("You need to specify exactly one package."), file=out) + print( file=out) + print( white("Usage:"), file=out) + if _error in ('global-options', 'packages',) or help == 'all': + print( " "+turquoise(__productname__), + yellow("[options]"), + green(""), file=out) + if _error in ('global-options',) or help == 'all': + print( " "+turquoise(__productname__), + yellow("[--help, --version]"), file=out) + + print(file=out) + if _error in ('global-options',) or help: + print( "Available ", yellow("options")+":", file=out) + print( yellow(" -C, --nocolor")+ + " - turn off colors on output", file=out) + print( yellow(" -q, --quiet")+ + " - be as quiet as possible", file=out) + print( yellow(" -h, --help")+ \ + " - display the help screen", file=out) + print( yellow(" -V, --version")+ + " - display version info", file=out) + print( file=out) + print( yellow(" -1, --oneshot")+ + " - stop as soon as a new version is found", file=out) + print( yellow(" -b, --brute-force=")+ + " - define the brute force "+yellow("")+" (default: 2)\n" + + " " * 29 + "bigger levels will generate more versions numbers\n" + + " " * 29 + "0 means disabled", file=out) + print( file=out) + if _error in ('packages',) or help: + print( green(" package")+ + " - the package (or ebuild) you want to scan", file=out) + print( file=out) + '''print( "More detailed instruction can be found in", + turquoise("`man %s`" % __productname__), file=out)''' + + +class ParseArgsException(Exception): + """For parseArgs() -> main() communications.""" + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) + + +def parseArgs(): + """Parse the command line arguments. Raise exceptions on + errors. Returns package and affect the CONFIG dict. + """ + + def optionSwitch(opts): + """local function for interpreting command line options + and setting options accordingly""" + return_code = True + for o, a in opts: + if o in ("-h", "--help"): + raise ParseArgsException('help') + elif o in ("-V", "--version"): + raise ParseArgsException('version') + elif o in ("-C", "--nocolor"): + CONFIG['nocolor'] = True + pp.output.nocolor() + elif o in ("-q", "--quiet"): + CONFIG['quiet'] = True + CONFIG['verbose'] = False + elif o in ("-1", "--oneshot"): + CONFIG['oneshot'] = True + elif o in ("-b", "--brute-force"): + CONFIG['brute-force'] = int(a) + elif o in ("-v", "--verbose") and not CONFIG['quiet']: + CONFIG['verbose'] = True + else: + return_code = False + + return return_code + + ' here are the different allowed command line options (getopt args) ' + getopt_options = {'short':{}, 'long':{}} + getopt_options['short']['global'] = "hVCqv1b:" + getopt_options['long']['global'] = ["help", "version", "nocolor", "quiet", + "verbose", "oneshot", "brute-force="] + + short_opts = getopt_options['short']['global'] + long_opts = getopt_options['long']['global'] + opts_mode = 'global' + + ' apply getopts to command line, show partial help on failure ' + try: + opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts) + except: + raise ParseArgsException(opts_mode+'-options') + + ' set options accordingly ' + optionSwitch(opts) + + if len(args) != 1: + raise ParseArgsException('packages') + + return args[0] + +def main(): + """Parse command line and execute all actions.""" + CONFIG['nocolor'] = (port_settings["NOCOLOR"] in ('yes','true') + or not sys.stdout.isatty()) + if CONFIG['nocolor']: + pp.output.nocolor() + ' parse command line options and actions ' + try: + package = parseArgs() + except ParseArgsException as e: + if e.value == 'help': + printUsage(help='all') + sys.exit(0) + elif e.value[:5] == 'help-': + printUsage(help=e.value[5:]) + sys.exit(0) + elif e.value == 'version': + printVersion() + sys.exit(0) + else: + printUsage(e.value) + sys.exit(errno.EINVAL) + + """ Change euscan's output """ + output = EOutput(CONFIG['quiet']) + ret = scan_upstream(package) + + print () + + for url, version in ret: + print ("Upstream Version: " + + pp.number("%s" % version) + + pp.path(" %s" % url)) + + if not len(ret): + print (pp.warn("Didn't find any new version, " + + "check package's homepage for " + + "more informations")); + + +if __name__ == "__main__": + try: + setupSignals() + main() + except KeyboardInterrupt: + print( "Aborted.") + sys.exit(errno.EINTR) + sys.exit(0) diff --git a/euscan b/euscan deleted file mode 100755 index e3ef659..0000000 --- a/euscan +++ /dev/null @@ -1,825 +0,0 @@ -#!/usr/bin/python - -"""Copyright 2011 Gentoo Foundation -Distributed under the terms of the GNU General Public License v2 -""" - -from __future__ import print_function - -# Meta: -__author__ = "Corentin Chary (iksaif)" -__email__ = "corentin.chary@gmail.com" -__version__ = "git" -__productname__ = "euscan" -__description__ = "A tool to detect new upstream releases." - -# ======= -# Imports -# ======= - -import os -import sys -import re -import time -import getopt -import errno -import random -import urllib2 -import StringIO - -import pkg_resources - -import portage -import portage.versions -from portage import dep -from portage.dbapi import porttree -from portage.output import white, yellow, turquoise, green, teal, red, EOutput - -import gentoolkit.pprinter as pp -from gentoolkit import errors -from gentoolkit.query import Query -from gentoolkit.eclean.search import (port_settings) - -# ======= -# Globals -# ======= - -QUERY_OPTS = {"include_masked": True} - -BLACKLIST_VERSIONS = [ - # Compatibility package for running binaries linked against a pre gcc 3.4 libstdc++, won't be updated - '>=sys-libs/libstdc++-v3-3.4', -] - -BLACKLIST_PACKAGES = [ - # These kernels are almost dead - 'sys-kernel/usermode-sources', - 'sys-kernel/xbox-sources', - 'sys-kernel/cell-sources', -] - -SCANDIR_BLACKLIST_URLS = [ - 'mirror://rubygems/(.*)', # Not browsable - 'mirror://gentoo/(.*)' # Directory too big -] - -BRUTEFORCE_BLACKLIST_PACKAGES = [ - 'net-zope/plonepopoll' # infinite loop any http://plone.org/products/plonepopoll/releases/*/plonepopoll-2-6-1.tgz link will work - ] - -BRUTEFORCE_BLACKLIST_URLS = [ - 'http://(.*)dockapps.org/download.php/id/(.*)', # infinite loop - 'http://hydra.nixos.org/build/(.*)', # infinite loop - 'http://www.rennings.net/gentoo/distfiles/(.*)' # Doesn't respect 404, infinite loop -] - -def htop_vercmp(a, b): - def fixver(v): - if v in ['0.11', '0.12', '0.13']: - v = '0.1.' + v[3:] - return v - - return simple_vercmp(fixver(a), fixver(b)) - -VERSION_CMP_PACKAGE_QUIRKS = { - 'sys-process/htop' : htop_vercmp -} - -_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)*))' - -# ========= -# Functions -# ========= - -def cast_int_components(version): - for i, obj in enumerate(version): - try: - version[i] = int(obj) - except ValueError: - pass - return version - -def simple_vercmp(a, b): - if a == b: - return 0 - - # For sane versions - r = portage.versions.vercmp(a, b) - - if r is not None: - return r - - # Fallback - a = pkg_resources.parse_version(a) - b = pkg_resources.parse_version(b) - - if a < b: - return -1 - else: - return 1 - -def vercmp(package, a, b): - if package in VERSION_CMP_PACKAGE_QUIRKS: - return VERSION_CMP_PACKAGE_QUIRKS[package](a, b) - return simple_vercmp(a, b) - -def skipnightly(a, b): - a = pkg_resources.parse_version(a) - b = pkg_resources.parse_version(b) - - # Try to skip nightly builds when not wanted (www-apps/moodle) - if len(a) != len(b) and len(b) == 2 and len(b[0]) == len('yyyymmdd'): - return True - return False - -def generate_templates_vars(version): - ret = [] - - part = split_version(version) - for i in range(2, len(part)): - ver = [] - var = [] - for j in range(i): - ver.append(str(part[j])) - var.append('${%d}' % j) - - ret.append((".".join(ver), ".".join(var))) - ret.append((version, '${PV}')) - ret.reverse() - return ret - -def template_from_url(url, version): - prefix, chunks = url.split('://') - chunks = chunks.split('/') - - for i in range(len(chunks)): - chunk = chunks[i] - - subs = generate_templates_vars(version) - for sub in subs: - chunk = chunk.replace(sub[0], sub[1]) - - chunks[i] = chunk - - - return prefix + "://" + "/".join(chunks) - -def url_from_template(url, version): - components = split_version(version) - - url = url.replace('${PV}', version) - for i in range(len(components)): - url = url.replace('${%d}' % i, str(components[i])) - - return url - -# Stolen from distutils.LooseVersion -# Used for brute force to increment the version -def split_version(version): - component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE) - components = filter(lambda x: x and x != '.', component_re.split(version)) - for i in range(len(components)): - try: - components[i] = int(components[i]) - except ValueError: - pass - return components - -def join_version(components): - version = "" - for i in range(len(components)): - version += str(components[i]) - if i >= len(components) - 1: - break - if type(components[i]) != str and type(components[i + 1]) != str: - version += "." - return version - -def increment_version(components, level): - n = len(components) - - if level > n - 1 or level < 0: - raise Exception - - for i in range(n, level + 1, -1): - if type(components[i - 1]) == int: - components[i - 1] = 0 - - if type(components[level]) == int: - components[level] += 1 - - return components - -def gen_versions(components, level): - n = len(components) - depth = level - level = min(level, n) - - if not n: - return [] - - versions = [] - - for i in range(n, n - level, -1): - increment_version(components, i - 1) - for j in range(depth): - versions.append(list(components)) - increment_version(components, i - 1) - - return versions - -def tryurl(fileurl, output, template): - result = True - - output.ebegin("Trying: " + fileurl) - - try: - basename = os.path.basename(fileurl) - - fp = urllib2.urlopen(fileurl, None, 5) - headers = fp.info() - - if 'Content-disposition' in headers and basename not in headers['Content-disposition']: - result = None - elif 'Content-Length' in headers and headers['Content-Length'] == '0': - result = None - elif 'text/html' in headers['Content-Type']: - result = None - elif fp.geturl() != fileurl: - regex = regex_from_template(template) - baseregex = regex_from_template(os.path.basename(template)) - basename2 = os.path.basename(fp.geturl()) - - # Redirect to another (earlier?) version - if basename != basename2 and (re.match(regex, fp.geturl()) or re.match(baseregex, basename2)): - result = None - - - if result: - result = (fp.geturl(), fp.info()) - - except urllib2.URLError: - result = None - except IOError: - result = None - - output.eend(errno.ENOENT if not result else 0) - - return result - -def regex_from_template(template): - template = re.escape(template) - template = template.replace('\$\{', '${') - template = template.replace('\}', '}') - template = template.replace('}\.$', '}.$') - template = template.replace('${1}', r'([\d]+?)') - template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w]+?)', template) - #template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w]+?)', template) - #template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template) - #template = re.sub(r'(\$\{\d+\})+', '(.+?)', template) - template = template.replace('${PV}', _v) - template = template + r'/?$' - return template - -def basedir_from_template(template): - idx = template.find('${') - if idx == -1: - return template - - idx = template[0:idx].rfind('/') - if idx == -1: - return "" - - return template[0:idx] - -def generate_scan_paths(url): - prefix, chunks = url.split('://') - chunks = chunks.split('/') - - steps = [] - - path = prefix + ":/" - for chunk in chunks: - if '${' in chunk: - steps.append((path, regex_from_template(chunk))) - path = "" - else: - path += "/" - path += chunk - return steps - -def versionBlacklisted(cp, version, output=None): - rule = None - cpv = '%s-%s' % (cp, version) - - for bv in BLACKLIST_VERSIONS: - if dep.match_from_list(bv, [cpv]): - rule = bv - None - - if rule and output: - output.einfo("%s is blacklisted by rule %s" % (cpv, bv)) - return rule is not None - -def scan_directory_recursive(cpv, url, steps, vmin, vmax, output): - if not steps: - return [] - - cp, ver, rev = portage.pkgsplit(cpv) - url += steps[0][0] - pattern = steps[0][1] - - steps = steps[1:] - - output.einfo("Scanning: %s" % url) - - try: - fp = urllib2.urlopen(url, None, 5) - except urllib2.URLError: - return [] - except IOError: - return [] - - data = fp.read() - - results = [] - - if re.search("<\s*a\s+[^>]*href", data): - from BeautifulSoup import BeautifulSoup - - soup = BeautifulSoup(data) - - for link in soup.findAll('a'): - href = link.get("href") - if not href: - continue - if href.startswith(url): - href = href.replace(url, "", 1) - - match = re.match(pattern, href, re.I) - if match: - results.append((match.group(1), match.group(0))) - - elif url.startswith('ftp://'): # Probably a FTP Server - buf = StringIO.StringIO(data) - for line in buf.readlines(): - line = line.replace("\n", "").replace("\r", "") - match = re.search(pattern, line, re.I) - if match: - results.append((match.group(1), match.group(0))) - # add url - - versions = [] - - for version, path in results: - if vmin and vercmp(cp, version, vmin) <= 0: - continue - if vmax and vercmp(cp, version, vmax) >= 0: - continue - - if versionBlacklisted(cp, version, output): - continue - - if skipnightly(vmin, version): - continue - - if not url.endswith('/') and not path.startswith('/'): - path = url + '/' + path - else: - path = url + path - - versions.append((path, version)) - if steps: - ret = scan_directory_recursive(cpv, path, steps, vmin, vmax, output) - versions.extend(ret) - return versions - -''' -- python: PyPi -- PHP: PECL / PEAR -- ftp.kde.org: doesn't scan the "unstable" tree -- mysql: should use http://downloads.mysql.com/archives/ -- mariadb: should use http://downloads.askmonty.org/MariaDB/+releases/ -''' - -def scan_directory(cpv, url, options, output, limit=None): - # Ftp: list dir - # Handle mirrors - if not options["scan-dir"]: - return [] - - for bu in SCANDIR_BLACKLIST_URLS: - if re.match(bu, url): - output.einfo("%s is blacklisted by rule %s" % (url, bu)) - return [] - - resolved_url = parseMirror(url, output) - - catpkg, ver, rev = portage.pkgsplit(cpv) - - template = template_from_url(resolved_url, ver) - if '${' not in template: - output.einfo("Url doesn't seems to depend on version: %s not found in %s" - % (ver, fileurl)) - return [] - else: - output.einfo("Scanning: %s" % template) - - steps = generate_scan_paths(template) - return scan_directory_recursive(cpv, "", steps, ver, limit, output) - -def brute_force(cpv, fileurl, options, output, limit=None): - if options["brute-force"] <= 0: - return [] - - catpkg, ver, rev = portage.pkgsplit(cpv) - - for bp in BRUTEFORCE_BLACKLIST_PACKAGES: - if re.match(bp, catpkg): - output.einfo("%s is blacklisted by rule %s" % (catpkg, bp)) - return [] - - for bp in BRUTEFORCE_BLACKLIST_URLS: - if re.match(bp, fileurl): - output.einfo("%s is blacklisted by rule %s" % (catpkg, bp)) - return [] - - output.einfo("Generating version from " + ver) - - components = split_version(ver) - versions = gen_versions(components, options["brute-force"]) - - - """ Remove unwanted versions """ - for v in versions: - if vercmp(catpkg, ver, join_version(v)) >= 0: - versions.remove(v) - - if not versions: - output.einfo("Can't generate new versions from " + ver) - return [] - - template = template_from_url(fileurl, ver) - - if '${PV}' not in template: - output.einfo("Url doesn't seems to depend on full version: %s not found in %s" - % (ver, fileurl)) - return [] - else: - output.einfo("Brute forcing: %s" % template) - - result = [] - - i = 0 - done = [] - - while i < len(versions): - components = versions[i] - i += 1 - if components in done: - continue - done.append(tuple(components)) - - vstring = join_version(components) - - if versionBlacklisted(catpkg, vstring, output): - continue - - if limit and vercmp(catpkg, vstring, limit) >= 0: - continue - - url = url_from_template(template, vstring) - - infos = tryurl(url, output, template) - - if not infos: - continue - - result.append([url, vstring]) - - if options["brute-force-recursive"]: - for v in gen_versions(components, options["brute-force"]): - if v not in versions and tuple(v) not in done: - versions.append(v) - - if options["oneshot"]: - break - - return result - - -def parseMirror(uri, output): - from random import shuffle - - mirrors = portage.settings.thirdpartymirrors() - - if not uri.startswith("mirror://"): - return uri - - eidx = uri.find("/", 9) - if eidx == -1: - output.einfo("Invalid mirror definition in SRC_URI:\n") - output.einfo(" %s\n" % (uri)) - return None - - mirrorname = uri[9:eidx] - path = uri[eidx+1:] - - if mirrorname in mirrors: - mirrors = mirrors[mirrorname] - shuffle(mirrors) - uri = mirrors[0].strip("/") + "/" + path - else: - output.einfo("No known mirror by the name: %s\n" % (mirrorname)) - return None - - return uri - -def setupSignals(): - """ This block ensures that ^C interrupts are handled quietly. """ - import signal - - def exithandler(signum,frame): - signal.signal(signal.SIGINT, signal.SIG_IGN) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - print () - sys.exit(errno.EINTR) - - signal.signal(signal.SIGINT, exithandler) - signal.signal(signal.SIGTERM, exithandler) - signal.signal(signal.SIGPIPE, signal.SIG_DFL) - - -def printVersion(): - """Output the version info.""" - print( "%s (%s) - %s" \ - % (__productname__, __version__, __description__)) - print() - print("Author: %s <%s>" % (__author__,__email__)) - print("Copyright 2011 Gentoo Foundation") - print("Distributed under the terms of the GNU General Public License v2") - - -def printUsage(_error=None, help=None): - """Print help message. May also print partial help to stderr if an - error from {'options'} is specified.""" - - out = sys.stdout - if _error: - out = sys.stderr - if not _error in ('global-options', 'packages',): - _error = None - if not _error and not help: help = 'all' - if _error in ('global-options',): - print( pp.error("Wrong option on command line."), file=out) - print( file=out) - if _error in ('packages',): - print( pp.error("You need to specify exactly one package."), file=out) - print( file=out) - print( white("Usage:"), file=out) - if _error in ('global-options', 'packages',) or help == 'all': - print( " "+turquoise(__productname__), - yellow("[options]"), - green(""), file=out) - if _error in ('global-options',) or help == 'all': - print( " "+turquoise(__productname__), - yellow("[--help, --version]"), file=out) - - print(file=out) - if _error in ('global-options',) or help: - print( "Available ", yellow("options")+":", file=out) - print( yellow(" -C, --nocolor")+ - " - turn off colors on output", file=out) - print( yellow(" -q, --quiet")+ - " - be as quiet as possible", file=out) - print( yellow(" -h, --help")+ \ - " - display the help screen", file=out) - print( yellow(" -V, --version")+ - " - display version info", file=out) - print( file=out) - print( yellow(" -1, --oneshot")+ - " - stop as soon as a new version is found", file=out) - print( yellow(" -b, --brute-force=")+ - " - define the brute force "+yellow("")+" (default: 2)\n" + - " " * 29 + "bigger levels will generate more versions numbers\n" + - " " * 29 + "0 means disabled", file=out) - print( file=out) - if _error in ('packages',) or help: - print( green(" package")+ - " - the package (or ebuild) you want to scan", file=out) - print( file=out) - #print( "More detailed instruction can be found in", - # turquoise("`man %s`" % __productname__), file=out) - - -class ParseArgsException(Exception): - """For parseArgs() -> main() communications.""" - def __init__(self, value): - self.value = value # sdfgsdfsdfsd - def __str__(self): - return repr(self.value) - - -def parseArgs(options={}): - """Parse the command line arguments. Raise exceptions on - errors. Returns package and affect the options dict. - """ - - def optionSwitch(option,opts): - """local function for interpreting command line options - and setting options accordingly""" - return_code = True - for o, a in opts: - if o in ("-h", "--help"): - raise ParseArgsException('help') - elif o in ("-V", "--version"): - raise ParseArgsException('version') - elif o in ("-C", "--nocolor"): - options['nocolor'] = True - pp.output.nocolor() - elif o in ("-q", "--quiet"): - options['quiet'] = True - options['verbose'] = False - elif o in ("-1", "--oneshot"): - options['oneshot'] = True - elif o in ("-b", "--brute-force"): - options['brute-force'] = int(a) - elif o in ("-v", "--verbose") and not options['quiet']: - options['verbose'] = True - else: - return_code = False - - return return_code - - # here are the different allowed command line options (getopt args) - getopt_options = {'short':{}, 'long':{}} - getopt_options['short']['global'] = "hVCqv1b:" - getopt_options['long']['global'] = ["help", "version", "nocolor", "quiet", - "verbose", "oneshot", "brute-force="] - # set default options, except 'nocolor', which is set in main() - options['quiet'] = False - options['verbose'] = False - options['brute-force'] = 2 - options['oneshot'] = False - options['brute-force-recursive'] = True # FIXME add an option - options['scan-dir'] = True # FIXME add an option - - short_opts = getopt_options['short']['global'] - long_opts = getopt_options['long']['global'] - opts_mode = 'global' - - # apply getopts to command line, show partial help on failure - try: - opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts) - except: - raise ParseArgsException(opts_mode+'-options') - - # set options accordingly - optionSwitch(options,opts) - - if len(args) != 1: - raise ParseArgsException('packages') - - return args[0] - -def scanUpstream(options, package, output): - matches = Query(package).find( - include_masked=QUERY_OPTS['include_masked'], - in_installed=False - ) - - if not matches: - sys.stderr.write(pp.warn("No package matching '%s'" % pp.pkgquery(package))) - sys.exit(errno.ENOENT) - - matches = sorted(matches) - pkg = matches.pop() - - if '9999' in pkg.version: - if len(matches) == 0: - sys.stderr.write(pp.warn("Package '%s' only have a dev version (9999)" % pp.pkgquery(package))) - sys.exit(errno.ENOENT) - else: - pkg = matches.pop() - - if pkg.cp in BLACKLIST_PACKAGES: - sys.stderr.write(pp.warn("Package '%s' is blacklisted" % pp.pkgquery(package))) - sys.exit(errno.ENOENT) - - pp.uprint(" * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name()))) - pp.uprint() - - ebuild_path = pkg.ebuild_path() - if ebuild_path: - pp.uprint('Ebuild: ' + pp.path(os.path.normpath(ebuild_path))) - - pp.uprint('Repository: ' + pkg.repo_name()) - pp.uprint('Homepage: ' + pkg.environment("HOMEPAGE")) - pp.uprint('Description: ' + pkg.environment("DESCRIPTION")) - - cpv = pkg.cpv - metadata = { - "EAPI" : port_settings["EAPI"], - "SRC_URI" : pkg.environment("SRC_URI", False), - } - use = frozenset(port_settings["PORTAGE_USE"].split()) - try: - alist = porttree._parse_uri_map(cpv, metadata, use=use) - aalist = porttree._parse_uri_map(cpv, metadata) - except InvalidDependString as e: - sys.stderr.write(pp.warn("%s\n" % str(e))) - sys.stderr.write(pp.warn("Invalid SRC_URI for '%s'" % pp.pkgquery(cpv))) - sys.exit(errno.ENOENT) - - if "mirror" in portage.settings.features: - fetchme = aalist - else: - fetchme = alist - - versions = [] - - for filename in fetchme: - for url in fetchme[filename]: - print () - output.einfo("SRC_URI is '%s'" % url) - - if '://' not in url: - output.einfo("Invalid url '%s'" % url) - continue - - ''' Try normal scan ''' - versions.extend(scan_directory(cpv, url, options, output)) - - if versions and options['oneshot']: - break - - ''' Brute Force ''' - versions.extend(brute_force(cpv, url, options, output)) - - if versions and options['oneshot']: - break - - newversions = {} - - for url, version in versions: - ''' Try to keep the most specific urls (determinted by the length) ''' - if version in newversions and len(url) < len(newversions[version]): - continue - ''' Remove blacklisted versions ''' - if versionBlacklisted(pkg.cp, version, output): - continue - - newversions[version] = url - - print () - - for version in newversions: - print ("Upstream Version:" - + pp.number("%s" % version) - + pp.path(" %s" % newversions[version])) - - if not len(newversions): - print (pp.warn("Didn't find any new version," - + "check package's homepage for " - + "more informations")); - return versions - - -def main(): - """Parse command line and execute all actions.""" - # set default options - options = {} - options['nocolor'] = (port_settings["NOCOLOR"] in ('yes','true') - or not sys.stdout.isatty()) - if options['nocolor']: - pp.output.nocolor() - # parse command line options and actions - try: - package = parseArgs(options) - # filter exception to know what message to display - except ParseArgsException as e: - if e.value == 'help': - printUsage(help='all') - sys.exit(0) - elif e.value[:5] == 'help-': - printUsage(help=e.value[5:]) - sys.exit(0) - elif e.value == 'version': - printVersion() - sys.exit(0) - else: - printUsage(e.value) - sys.exit(errno.EINVAL) - - output = EOutput(options['quiet']) - scanUpstream(options, package, output) - - -if __name__ == "__main__": - try: - setupSignals() - main() - except KeyboardInterrupt: - print( "Aborted.") - sys.exit(errno.EINTR) - sys.exit(0) diff --git a/euscanwww/euscan/management/commands/scan-metadata.py b/euscanwww/euscan/management/commands/scan-metadata.py index ed09326..a7d207c 100644 --- a/euscanwww/euscan/management/commands/scan-metadata.py +++ b/euscanwww/euscan/management/commands/scan-metadata.py @@ -120,12 +120,11 @@ class Command(BaseCommand): herd, created = Herd.objects.get_or_create(herd=name) - if created or herd.email != email: - if not options['quiet']: - sys.stdout.write('+ [h] %s <%s>\n' % (name, email)) + if created and not options['quiet']: + sys.stdout.write('+ [h] %s <%s>\n' % (name, email)) - herd.email = email - herd.save() + herd.email = email + herd.save() return herd diff --git a/euscanwww/euscan/management/commands/scan-portage.py b/euscanwww/euscan/management/commands/scan-portage.py index da7e5a7..3ec4fc3 100644 --- a/euscanwww/euscan/management/commands/scan-portage.py +++ b/euscanwww/euscan/management/commands/scan-portage.py @@ -154,11 +154,6 @@ class Command(BaseCommand): ' Set all versions dead, then set found versions alive and delete old versions ' Version.objects.filter(package=obj, packaged=True).update(alive=False) - obj.n_packaged = 0 - obj.n_overlay = 0 - obj.n_versions = Version.objects.filter(package=obj).count() - obj.save() - return obj def store_version(self, options, package, cpv, slot, overlay): diff --git a/euscanwww/euscan/management/commands/scan-upstream.py b/euscanwww/euscan/management/commands/scan-upstream.py index 60570ec..241fc66 100644 --- a/euscanwww/euscan/management/commands/scan-upstream.py +++ b/euscanwww/euscan/management/commands/scan-upstream.py @@ -136,9 +136,6 @@ class Command(BaseCommand): ' Set all versions dead, then set found versions alive and delete old versions ' Version.objects.filter(package=obj, packaged=False).update(alive=False) - obj.n_versions = Version.objects.filter(package=obj).count() - obj.save() - return obj def store_version(self, options, package, ver, url): diff --git a/euscanwww/scripts/euscan-update.sh b/euscanwww/scripts/euscan-update.sh index c2c7e00..3f5760f 100644 --- a/euscanwww/scripts/euscan-update.sh +++ b/euscanwww/scripts/euscan-update.sh @@ -17,14 +17,14 @@ # eix-update ## Scan portage (packages, versions) -# python manage.py scan-portage --all --purge +# python manage.py scan-portage --all --purge-versions --purge-packages ## Scan metadata (herds, maintainers, homepages, ...) # python manage.py scan-metadata --all ## Scan uptsream packages # python manage.py scan-upstream --all -# eix --only-names -x | gparallel --jobs 400% euscan | python manage.py scan-upstream --feed +# eix --only-names -x | gparallel --jobs 400% euscan | python manage.py scan-upstream --feed --purge-versions ## Update counters # python manage.py update-counters \ No newline at end of file diff --git a/doc/euscan.1 b/man/euscan.1 similarity index 100% rename from doc/euscan.1 rename to man/euscan.1 diff --git a/pym/euscan/__init__.py b/pym/euscan/__init__.py new file mode 100644 index 0000000..6e5cc39 --- /dev/null +++ b/pym/euscan/__init__.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# +# Copyright 2011 Corentin Chary +# Distributed under the terms of the GNU General Public License v2 + +import sys + +from portage.output import EOutput + +CONFIG = { + 'nocolor': False, + 'quiet': False, + 'verbose': True, + 'debug': False, + 'brute-force': 3, + 'brute-force-recursive': True, + 'scan-dir': True, + 'oneshot': False, + 'user-agent' : 'Mozilla/5.0 (compatible; euscan; +http://euscan.iksaif.net)' +} + +output = EOutput(CONFIG['quiet']) + +BLACKLIST_VERSIONS = [ + # Compatibility package for running binaries linked against a pre gcc 3.4 libstdc++, won't be updated + '>=sys-libs/libstdc++-v3-3.4', +] + +BLACKLIST_PACKAGES = [ + # These kernels are almost dead + 'sys-kernel/usermode-sources', + 'sys-kernel/xbox-sources', + 'sys-kernel/cell-sources', +] + +SCANDIR_BLACKLIST_URLS = [ + 'mirror://rubygems/(.*)', # Not browsable + 'mirror://gentoo/(.*)' # Directory too big +] + +BRUTEFORCE_BLACKLIST_PACKAGES = [ + 'net-zope/plonepopoll' # infinite loop any http://plone.org/products/plonepopoll/releases/*/plonepopoll-2-6-1.tgz link will work + ] + +BRUTEFORCE_BLACKLIST_URLS = [ + 'http://(.*)dockapps.org/download.php/id/(.*)', # infinite loop + 'http://hydra.nixos.org/build/(.*)', # infinite loop + 'http://www.rennings.net/gentoo/distfiles/(.*)' # Doesn't respect 404, infinite loop +] diff --git a/pym/euscan/handlers/__init__.py b/pym/euscan/handlers/__init__.py new file mode 100644 index 0000000..f729fa2 --- /dev/null +++ b/pym/euscan/handlers/__init__.py @@ -0,0 +1,24 @@ +from euscan.handlers import generic +from euscan.handlers import php +from euscan.handlers import pypi +from euscan.handlers import rubygem + +handlers = [ php, pypi, rubygem, generic ] + +def find_best_handler(cpv, url): + for handler in handlers: + if handler.can_handle(cpv, url): + return handler + return None + +def scan(cpv, url): + handler = find_best_handler(cpv, url) + if handler: + return handler.scan(cpv, url) + return [] + +def brute_force(cpv, url): + handler = find_best_handler(cpv, url) + if handler: + return handler.brute_force(cpv, url) + return [] diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py new file mode 100644 index 0000000..bfbd2de --- /dev/null +++ b/pym/euscan/handlers/generic.py @@ -0,0 +1,183 @@ +import urllib2 +import re +import StringIO + +from BeautifulSoup import BeautifulSoup + +import portage + +from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output +from euscan import helpers + +def scan_html(data, url, pattern): + soup = BeautifulSoup(data) + results = [] + + for link in soup.findAll('a'): + href = link.get("href") + if not href: + continue + if href.startswith(url): + href = href.replace(url, "", 1) + + match = re.match(pattern, href, re.I) + if match: + results.append((match.group(1), match.group(0))) + + return results + +def scan_ftp(data, url, pattern): + buf = StringIO.StringIO(data) + results = [] + + for line in buf.readlines(): + line = line.replace("\n", "").replace("\r", "") + match = re.search(pattern, line, re.I) + if match: + results.append((match.group(1), match.group(0))) + + return results + +def scan_directory_recursive(cpv, url, steps): + if not steps: + return [] + + cp, ver, rev = portage.pkgsplit(cpv) + url += steps[0][0] + pattern = steps[0][1] + + steps = steps[1:] + + output.einfo("Scanning: %s" % url) + + try: + fp = helpers.urlopen(url) + except urllib2.URLError: + return [] + except IOError: + return [] + + data = fp.read() + + results = [] + + if re.search("<\s*a\s+[^>]*href", data): + results.extend(scan_html(data, url, pattern)) + elif url.startswith('ftp://'): + results.extend(scan_ftp(data, url, pattern)) + + versions = [] + + for version, path in results: + if helpers.version_filtered(cp, ver, version): + continue + + if not url.endswith('/') and not path.startswith('/'): + path = url + '/' + path + else: + path = url + path + + versions.append((path, version)) + + if steps: + ret = scan_directory_recursive(cpv, path, steps) + versions.extend(ret) + + return versions + +def scan(cpv, url): + for bu in SCANDIR_BLACKLIST_URLS: + if re.match(bu, url): + output.einfo("%s is blacklisted by rule %s" % (url, bu)) + return [] + + resolved_url = helpers.parse_mirror(url) + + cp, ver, rev = portage.pkgsplit(cpv) + + template = helpers.template_from_url(resolved_url, ver) + if '${' not in template: + output.einfo("Url doesn't seems to depend on version: %s not found in %s" + % (ver, resolved_url)) + return [] + else: + output.einfo("Scanning: %s" % template) + + steps = helpers.generate_scan_paths(template) + return scan_directory_recursive(cpv, "", steps) + +def brute_force(cpv, url): + cp, ver, rev = portage.pkgsplit(cpv) + + url = helpers.parse_mirror(url) + + for bp in BRUTEFORCE_BLACKLIST_PACKAGES: + if re.match(bp, cp): + output.einfo("%s is blacklisted by rule %s" % (cp, bp)) + return [] + + for bp in BRUTEFORCE_BLACKLIST_URLS: + if re.match(bp, url): + output.einfo("%s is blacklisted by rule %s" % (cp, bp)) + return [] + + output.einfo("Generating version from " + ver) + + components = helpers.split_version(ver) + versions = helpers.gen_versions(components, CONFIG["brute-force"]) + + """ Remove unwanted versions """ + for v in versions: + if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0: + versions.remove(v) + + if not versions: + output.einfo("Can't generate new versions from " + ver) + return [] + + template = helpers.template_from_url(url, ver) + + if '${PV}' not in template: + output.einfo("Url doesn't seems to depend on full version: %s not found in %s" + % (ver, url)) + return [] + else: + output.einfo("Brute forcing: %s" % template) + + result = [] + + i = 0 + done = [] + + while i < len(versions): + components = versions[i] + i += 1 + if components in done: + continue + done.append(tuple(components)) + + version = helpers.join_version(components) + + if helpers.version_filtered(cp, ver, version): + continue + + url = helpers.url_from_template(template, version) + infos = helpers.tryurl(url, template) + + if not infos: + continue + + result.append([url, version]) + + if CONFIG["brute-force-recursive"]: + for v in helpers.gen_versions(components, CONFIG["brute-force"]): + if v not in versions and tuple(v) not in done: + versions.append(v) + + if CONFIG["oneshot"]: + break + + return result + +def can_handle(cpv, url): + return True diff --git a/pym/euscan/handlers/php.py b/pym/euscan/handlers/php.py new file mode 100644 index 0000000..e70c877 --- /dev/null +++ b/pym/euscan/handlers/php.py @@ -0,0 +1,65 @@ +import re +import portage +import urllib2 +import xml.dom.minidom + +from euscan import helpers, output + +def can_handle(cpv, url): + if url.startswith('http://pear.php.net/get/'): + return True + if url.startswith('http://pecl.php.net/get/'): + return True + return False + +def guess_package_and_channel(cp, url): + match = re.search('http://(.*)/get/(.*)-(.*).tgz', url) + + if match: + host = match.group(1) + pkg = match.group(2) + else: + cat, pkg = cp.split("/") + + return pkg, host + +def scan(cpv, url): + pkg, channel = guess_package_and_channel(cpv, url) + + orig_url = url + url = 'http://%s/rest/r/%s/allreleases.xml' % (channel, pkg.lower()) + + output.einfo("Using: " + url) + + try: + fp = helpers.urlopen(url) + except urllib2.URLError: + return [] + except IOError: + return [] + + data = fp.read() + + dom = xml.dom.minidom.parseString(data) + + nodes = dom.getElementsByTagName("v") + ret = [] + + cp, ver, rev = portage.pkgsplit(cpv) + + for node in nodes: + version = node.childNodes[0].data + if helpers.version_filtered(cp, ver, version): + continue + + url = 'http://%s/get/%s-%s.tgz' % (channel, pkg, version) + + if url == orig_url: + continue + + ret.append(( url, version )) + + return ret + +def brute_force(cpv, url): + return [] diff --git a/pym/euscan/handlers/pypi.py b/pym/euscan/handlers/pypi.py new file mode 100644 index 0000000..59f7046 --- /dev/null +++ b/pym/euscan/handlers/pypi.py @@ -0,0 +1,51 @@ +import xmlrpclib +import pprint +import re + +import portage + +from euscan import helpers, output + +def can_handle(cpv, url): + return url.startswith('mirror://pypi/') + +def guess_package(cp, url): + match = re.search('mirror://pypi/\w+/(.*)/.*', url) + if match: + return match.group(1) + + cat, pkg = cp.split("/") + + return pkg + +def scan(cpv, url): + 'http://wiki.python.org/moin/PyPiXmlRpc' + + + package = guess_package(cpv, url) + + output.einfo("Using PyPi XMLRPC: " + package) + + client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi') + versions = client.package_releases(package) + + if not versions: + return versions + + versions.reverse() + + cp, ver, rev = portage.pkgsplit(cpv) + + ret = [] + + for version in versions: + if helpers.version_filtered(cp, ver, version): + continue + urls = client.release_urls(package, version) + urls = " ".join([ infos['url'] for infos in urls ]) + ret.append(( urls, version )) + + return ret + +def brute_force(cpv, url): + return [] diff --git a/pym/euscan/handlers/rubygem.py b/pym/euscan/handlers/rubygem.py new file mode 100644 index 0000000..a00f5ac --- /dev/null +++ b/pym/euscan/handlers/rubygem.py @@ -0,0 +1,56 @@ +import re +import portage +import json +import urllib2 + +from euscan import helpers, output + +def can_handle(cpv, url): + return url.startswith('mirror://rubygems/') + +def guess_gem(cpv, url): + match = re.search('mirror://rubygems/(.*).gem', url) + if match: + cpv = 'fake/%s' % match.group(1) + + cp, ver, rev = portage.pkgsplit(cpv) + cat, pkg = cp.split("/") + + return pkg + +def scan(cpv, url): + 'http://guides.rubygems.org/rubygems-org-api/#gemversion' + + gem = guess_gem(cpv, url) + url = 'http://rubygems.org/api/v1/versions/%s.json' % gem + + output.einfo("Using: " + url) + + try: + fp = helpers.urlopen(url, None, 5) + except urllib2.URLError: + return [] + except IOError: + return [] + + data = fp.read() + versions = json.loads(data) + + if not versions: + return [] + + cp, ver, rev = portage.pkgsplit(cpv) + + ret = [] + + for version in versions: + version = version['number'] + if helpers.version_filtered(cp, ver, version): + continue + url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, version) + ret.append(( url, version )) + + return ret + +def brute_force(cpv, url): + return [] diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py new file mode 100644 index 0000000..3838f8e --- /dev/null +++ b/pym/euscan/helpers.py @@ -0,0 +1,309 @@ +import urllib2 +import os +import re +import pkg_resources +import errno + +import portage +from portage import dep + +from euscan import CONFIG, BLACKLIST_VERSIONS, output + +def htop_vercmp(a, b): + def fixver(v): + if v in ['0.11', '0.12', '0.13']: + v = '0.1.' + v[3:] + return v + + return simple_vercmp(fixver(a), fixver(b)) + +VERSION_CMP_PACKAGE_QUIRKS = { + 'sys-process/htop' : htop_vercmp +} + +_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)*))' + +def cast_int_components(version): + for i, obj in enumerate(version): + try: + version[i] = int(obj) + except ValueError: + pass + return version + +def simple_vercmp(a, b): + if a == b: + return 0 + + # For sane versions + r = portage.versions.vercmp(a, b) + + if r is not None: + return r + + # Fallback + a = pkg_resources.parse_version(a) + b = pkg_resources.parse_version(b) + + if a < b: + return -1 + else: + return 1 + +def vercmp(package, a, b): + if package in VERSION_CMP_PACKAGE_QUIRKS: + return VERSION_CMP_PACKAGE_QUIRKS[package](a, b) + return simple_vercmp(a, b) + +def version_is_nightly(a, b): + a = pkg_resources.parse_version(a) + b = pkg_resources.parse_version(b) + + ''' Try to skip nightly builds when not wanted (www-apps/moodle) ''' + if len(a) != len(b) and len(b) == 2 and len(b[0]) == len('yyyymmdd'): + return True + return False + +def version_blacklisted(cp, version): + rule = None + cpv = '%s-%s' % (cp, version) + + ''' Check that the generated cpv can be used by portage ''' + if not portage.versions.catpkgsplit(cpv): + return False + + for bv in BLACKLIST_VERSIONS: + if dep.match_from_list(bv, [cpv]): + rule = bv + None + + if rule: + output.einfo("%s is blacklisted by rule %s" % (cpv, bv)) + return rule is not None + +def version_filtered(cp, base, version): + if vercmp(cp, base, version) >= 0: + return True + + if version_blacklisted(cp, version): + return True + + if version_is_nightly(base, version): + return True + + return False + +def generate_templates_vars(version): + ret = [] + + part = split_version(version) + for i in range(2, len(part)): + ver = [] + var = [] + for j in range(i): + ver.append(str(part[j])) + var.append('${%d}' % j) + + ret.append((".".join(ver), ".".join(var))) + ret.append((version, '${PV}')) + ret.reverse() + return ret + +def template_from_url(url, version): + prefix, chunks = url.split('://') + chunks = chunks.split('/') + + for i in range(len(chunks)): + chunk = chunks[i] + + subs = generate_templates_vars(version) + for sub in subs: + chunk = chunk.replace(sub[0], sub[1]) + + chunks[i] = chunk + + + return prefix + "://" + "/".join(chunks) + +def url_from_template(url, version): + components = split_version(version) + + url = url.replace('${PV}', version) + for i in range(len(components)): + url = url.replace('${%d}' % i, str(components[i])) + + return url + +# Stolen from distutils.LooseVersion +# Used for brute force to increment the version +def split_version(version): + component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE) + components = filter(lambda x: x and x != '.', component_re.split(version)) + for i in range(len(components)): + try: + components[i] = int(components[i]) + except ValueError: + pass + return components + +def join_version(components): + version = "" + for i in range(len(components)): + version += str(components[i]) + if i >= len(components) - 1: + break + if type(components[i]) != str and type(components[i + 1]) != str: + version += "." + return version + +def increment_version(components, level): + n = len(components) + + if level > n - 1 or level < 0: + raise Exception + + for i in range(n, level + 1, -1): + if type(components[i - 1]) == int: + components[i - 1] = 0 + + if type(components[level]) == int: + components[level] += 1 + + return components + +def gen_versions(components, level): + n = len(components) + depth = level + level = min(level, n) + + if not n: + return [] + + versions = [] + + for i in range(n, n - level, -1): + increment_version(components, i - 1) + for j in range(depth): + versions.append(list(components)) + increment_version(components, i - 1) + + return versions + +def urlopen(url, timeout=None): + + if not timeout: + if 'sourceforge' in url: + timeout = 15 + else: + timeout = 5 + + request = urllib2.Request(url) + request.add_header('User-Agent', CONFIG['user-agent']) + return urllib2.urlopen(request, None, timeout) + +def tryurl(fileurl, template): + result = True + + output.ebegin("Trying: " + fileurl) + + try: + basename = os.path.basename(fileurl) + + fp = urlopen(fileurl) + headers = fp.info() + + if 'Content-disposition' in headers and basename not in headers['Content-disposition']: + result = None + elif 'Content-Length' in headers and headers['Content-Length'] == '0': + result = None + elif 'text/html' in headers['Content-Type']: + result = None + elif fp.geturl() != fileurl: + regex = regex_from_template(template) + baseregex = regex_from_template(os.path.basename(template)) + basename2 = os.path.basename(fp.geturl()) + + # Redirect to another (earlier?) version + if basename != basename2 and (re.match(regex, fp.geturl()) or re.match(baseregex, basename2)): + result = None + + + if result: + result = (fp.geturl(), fp.info()) + + except urllib2.URLError: + result = None + except IOError: + result = None + + output.eend(errno.ENOENT if not result else 0) + + return result + +def regex_from_template(template): + template = re.escape(template) + template = template.replace('\$\{', '${') + template = template.replace('\}', '}') + template = template.replace('}\.$', '}.$') + template = template.replace('${1}', r'([\d]+?)') + template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w]+?)', template) + #template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w]+?)', template) + #template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template) + #template = re.sub(r'(\$\{\d+\})+', '(.+?)', template) + template = template.replace('${PV}', _v) + template = template + r'/?$' + return template + +def basedir_from_template(template): + idx = template.find('${') + if idx == -1: + return template + + idx = template[0:idx].rfind('/') + if idx == -1: + return "" + + return template[0:idx] + +def generate_scan_paths(url): + prefix, chunks = url.split('://') + chunks = chunks.split('/') + + steps = [] + + path = prefix + ":/" + for chunk in chunks: + if '${' in chunk: + steps.append((path, regex_from_template(chunk))) + path = "" + else: + path += "/" + path += chunk + return steps + +def parse_mirror(uri): + from random import shuffle + + mirrors = portage.settings.thirdpartymirrors() + + if not uri.startswith("mirror://"): + return uri + + eidx = uri.find("/", 9) + if eidx == -1: + output.einfo("Invalid mirror definition in SRC_URI:\n") + output.einfo(" %s\n" % (uri)) + return None + + mirrorname = uri[9:eidx] + path = uri[eidx+1:] + + if mirrorname in mirrors: + mirrors = mirrors[mirrorname] + shuffle(mirrors) + uri = mirrors[0].strip("/") + "/" + path + else: + output.einfo("No known mirror by the name: %s\n" % (mirrorname)) + return None + + return uri diff --git a/pym/euscan/scan.py b/pym/euscan/scan.py new file mode 100644 index 0000000..cf52801 --- /dev/null +++ b/pym/euscan/scan.py @@ -0,0 +1,130 @@ +from __future__ import print_function + +import os +import sys +import re +import time +import getopt +import random +import urllib2 +import StringIO + +import pkg_resources + +import portage +import portage.versions +from portage import dep +from portage.dbapi import porttree +from portage.output import white, yellow, turquoise, green, teal, red, EOutput + +import gentoolkit.pprinter as pp +from gentoolkit import errors +from gentoolkit.query import Query +from gentoolkit.eclean.search import (port_settings) + +from euscan import CONFIG, BLACKLIST_PACKAGES, output +from euscan import handlers +from euscan import helpers + +def filter_versions(cp, versions): + filtered = {} + + for url, version in versions: + + ''' Try to keep the most specific urls (determinted by the length) ''' + if version in filtered and len(url) < len(filtered[version]): + continue + + ''' Remove blacklisted versions ''' + if helpers.version_blacklisted(cp, version): + continue + + filtered[version] = url + + return [ (filtered[version], version) for version in filtered ] + +def scan_upstream_urls(cpv, urls): + versions = [] + + for filename in urls: + for url in urls[filename]: + print () + output.einfo("SRC_URI is '%s'" % url) + + if '://' not in url: + output.einfo("Invalid url '%s'" % url) + continue + + ''' Try normal scan ''' + if CONFIG["scan-dir"]: + versions.extend(handlers.scan(cpv, url)) + + if versions and CONFIG['oneshot']: + break + + ''' Brute Force ''' + if CONFIG["brute-force"] > 0: + versions.extend(handlers.brute_force(cpv, url)) + + if versions and CONFIG['oneshot']: + break + + cp, ver, rev = portage.pkgsplit(cpv) + return filter_versions(cp, versions) + + +def scan_upstream(query): + matches = Query(query).find( + include_masked=True, + in_installed=False + ) + + if not matches: + sys.stderr.write(pp.warn("No package matching '%s'" % pp.pkgquery(query))) + return [] + + matches = sorted(matches) + pkg = matches.pop() + + if '9999' in pkg.version: + if len(matches) == 0: + sys.stderr.write(pp.warn("Package '%s' only have a dev version (9999)" % pp.pkgquery(pkg.cp))) + return [] + else: + pkg = matches.pop() + + if pkg.cp in BLACKLIST_PACKAGES: + sys.stderr.write(pp.warn("Package '%s' is blacklisted" % pp.pkgquery(pkg.cp))) + return [] + + pp.uprint(" * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name()))) + pp.uprint() + + ebuild_path = pkg.ebuild_path() + if ebuild_path: + pp.uprint('Ebuild: ' + pp.path(os.path.normpath(ebuild_path))) + + pp.uprint('Repository: ' + pkg.repo_name()) + pp.uprint('Homepage: ' + pkg.environment("HOMEPAGE")) + pp.uprint('Description: ' + pkg.environment("DESCRIPTION")) + + cpv = pkg.cpv + metadata = { + "EAPI" : port_settings["EAPI"], + "SRC_URI" : pkg.environment("SRC_URI", False), + } + use = frozenset(port_settings["PORTAGE_USE"].split()) + try: + alist = porttree._parse_uri_map(cpv, metadata, use=use) + aalist = porttree._parse_uri_map(cpv, metadata) + except InvalidDependString as e: + sys.stderr.write(pp.warn("%s\n" % str(e))) + sys.stderr.write(pp.warn("Invalid SRC_URI for '%s'" % pp.pkgquery(cpv))) + return [] + + if "mirror" in portage.settings.features: + urls = aalist + else: + urls = alist + + return scan_upstream_urls(pkg.cpv, urls) diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..5a7899f --- /dev/null +++ b/setup.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import re +import sys +import distutils +from distutils import core, log +from glob import glob + +import os +import io + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'pym')) + +__version__ = os.getenv('VERSION', default='9999') + +cwd = os.getcwd() + +# Load EPREFIX from Portage, fall back to the empty string if it fails +try: + from portage.const import EPREFIX +except ImportError: + EPREFIX='/' + +# Python files that need `__version__ = ""` subbed, relative to this dir: +python_scripts = [os.path.join(cwd, path) for path in ( + 'bin/euscan', +)] + +packages = [ + str('.'.join(root.split(os.sep)[1:])) + for root, dirs, files in os.walk('pym/euscan') + if '__init__.py' in files +] + +core.setup( + name='euscan', + version=__version__, + description='Ebuild Upstream Scan tools.', + author='Corentin Chary', + author_email='corentin.chary@gmail.com', + maintainer='Corentin Chary', + maintainer_email='corentin.chary@gmail.com', + url='http://euscan.iksaif.net', + download_url='http://git.iksaif.net/?p=euscan.git;a=snapshot;h=HEAD;sf=tgz', + package_dir={'': 'pym'}, + packages=packages, + package_data = {}, + scripts=python_scripts, + data_files=( + (os.path.join(EPREFIX, 'usr/share/man/man1'), glob('man/*')), + ), +)