Change source layout

* In preparation for PEP517 transition.

Signed-off-by: Alfred Wingate <parona@protonmail.com>
This commit is contained in:
Alfred Wingate
2023-11-14 19:58:44 +02:00
parent ec7399752c
commit c873e1520d
25 changed files with 3 additions and 3 deletions

View File

@ -0,0 +1,216 @@
import os
import sys
import pkgutil
from euscan import CONFIG, output
from gentoolkit.metadata import MetaData
handlers = {'package': [], 'url': [], 'all': {}}
# autoimport all modules in this directory and append them to handlers list
for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
module = loader.find_module(module_name).load_module(module_name)
if not hasattr(module, 'HANDLER_NAME'):
continue
if hasattr(module, 'scan_url'):
handlers['url'].append(module)
if hasattr(module, 'scan_pkg'):
handlers['package'].append(module)
handlers['all'][module.HANDLER_NAME] = module
# sort handlers by priority
def sort_handlers(handlers):
return sorted(
handlers,
key=lambda handler: handler.PRIORITY,
reverse=True
)
handlers['package'] = sort_handlers(handlers['package'])
handlers['url'] = sort_handlers(handlers['url'])
def find_best_handler(kind, pkg, *args):
"""
Find the best handler for the given package
"""
for handler in handlers[kind]:
if (handler.HANDLER_NAME not in CONFIG["handlers-exclude"] and
handler.can_handle(pkg, *args)):
return handler
return None
def find_handlers(kind, names):
ret = []
for name in names:
# Does this handler exist, and handle this kind of thing ? (pkg / url)
if name in handlers['all'] and handlers['all'][name] in handlers[kind]:
ret.append(handlers['all'][name])
return ret
def get_metadata(pkg):
metadata = {}
pkg_metadata = None
meta_override = os.path.join('metadata', pkg.category, pkg.name,
'metadata.xml')
try:
if os.path.exists(meta_override):
pkg_metadata = MetaData(meta_override)
output.einfo('Using custom metadata: %s' % meta_override)
if not pkg_metadata:
pkg_metadata = pkg.metadata
except Exception as e:
output.ewarn('Error when fetching metadata: %s' % str(e))
if not pkg_metadata:
return {}
# Support multiple remote-id and multiple watch
for upstream in pkg_metadata._xml_tree.findall("upstream"):
for node in upstream.findall("watch"):
options = dict(node.attrib)
options['data'] = node.text
if "type" in options:
handler = options['type']
else:
handler = "url"
options['type'] = "url"
for key in ["versionmangle", "downloadurlmangle"]:
value = options.get(key, None)
if value:
options[key] = value.split(";")
if handler not in metadata:
metadata[handler] = []
metadata[handler].append(options)
for upstream in pkg_metadata._xml_tree.findall("upstream"):
for node in upstream.findall("remote-id"):
handler = node.attrib.get("type")
if not handler:
continue
if handler in metadata:
for i in range(len(metadata[handler])):
if not metadata[handler][i]['data']:
metadata[handler][i]['data'] = node.text
else:
metadata[handler] = [{'type': handler, 'data': node.text}]
return metadata
def scan_pkg(pkg_handler, pkg, options, on_progress=None):
versions = []
if on_progress:
on_progress(increment=35)
for o in options:
versions += pkg_handler.scan_pkg(pkg, o)
if on_progress:
on_progress(increment=35)
return versions
def scan_url(pkg, urls, options, on_progress=None):
versions = []
if on_progress:
progress_available = 70
num_urls = sum([len(urls[fn]) for fn in urls])
if num_urls > 0:
progress_increment = progress_available / num_urls
else:
progress_increment = 0
for filename in urls:
for url in urls[filename]:
if on_progress and progress_available > 0:
on_progress(increment=progress_increment)
progress_available -= progress_increment
output.einfo("SRC_URI is '%s'" % url)
if '://' not in url:
output.einfo("Invalid url '%s'" % url)
continue
try:
url_handler = find_best_handler('url', pkg, url)
if url_handler:
for o in options:
versions += url_handler.scan_url(pkg, url, o)
else:
output.eerror("Can't find a suitable handler!")
except Exception as e:
output.ewarn(
"Handler failed: [%s] %s" %
(e.__class__.__name__, str(e))
)
if versions and CONFIG['oneshot']:
break
if on_progress and progress_available > 0:
on_progress(increment=progress_available)
return versions
def scan(pkg, urls, on_progress=None):
"""
Scans upstream for the given package.
First tries if a package wide handler is available, then fallbacks
in url handling.
"""
if not CONFIG['quiet'] and not CONFIG['format']:
sys.stdout.write('\n')
metadata = get_metadata(pkg)
versions = []
pkg_handlers = find_handlers('package', list(metadata.keys()))
if not pkg_handlers:
pkg_handler = find_best_handler('package', pkg)
if pkg_handler:
pkg_handlers = [pkg_handler]
for pkg_handler in pkg_handlers:
options = metadata.get(pkg_handler.HANDLER_NAME, [{}])
versions += scan_pkg(pkg_handler, pkg, options, on_progress)
if not pkg_handlers:
versions += scan_url(pkg, urls, [{}], on_progress)
return versions
def mangle(kind, name, string):
if name not in handlers['all']:
return None
handler = handlers['all'][name]
if not hasattr(handler, 'mangle_%s' % kind):
return None
return getattr(handler, 'mangle_%s' % kind)(string)
def mangle_url(name, string):
return mangle('url', name, string)
def mangle_version(name, string):
return mangle('version', name, string)

View File

@ -0,0 +1,59 @@
import re
import urllib.request, urllib.parse, urllib.error
import portage
from euscan.helpers import regex_from_template
from euscan.handlers.url import process_scan as url_scan
from euscan import output
HANDLER_NAME = "berlios"
CONFIDENCE = 90
PRIORITY = 90
berlios_regex = r"mirror://berlios/([^/]+)/([^/]+)"
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return re.search(berlios_regex, url)
def scan_url(pkg, url, options):
output.einfo("Using BerliOS handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
project, filename = re.search(berlios_regex, url).groups()
project_page = "http://developer.berlios.de/projects/%s" % project
content = urllib.request.urlopen(project_page).read()
project_id = re.search(
r"/project/filelist.php\?group_id=(\d+)",
content
).group(1)
base_url = (
"http://developer.berlios.de/project/filelist.php?group_id=%s" %
project_id
)
file_pattern = regex_from_template(
filename.replace(ver, "${PV}")
)
result = url_scan(pkg, base_url, file_pattern)
ret = []
for found_url, pv, _, _ in result:
found_url = found_url.replace("prdownload", "download")
ret.append((found_url, pv, HANDLER_NAME, CONFIDENCE))
return ret

161
src/euscan/handlers/cpan.py Normal file
View File

@ -0,0 +1,161 @@
import re
import portage
import urllib.request, urllib.error, urllib.parse
import json
from euscan import helpers, output, mangling
HANDLER_NAME = "cpan"
CONFIDENCE = 100
PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
def can_handle(pkg, url=None):
return url and url.startswith('mirror://cpan/')
def guess_package(cp, url):
match = _cpan_package_name_re.search(url)
pkg = None
if match:
pkg = match.group(1)
try:
cp, ver, rev = portage.pkgsplit('fake/' + pkg)
except:
pass
cat, pkg = cp.split("/")
return pkg
def mangle_version(up_pv):
if up_pv.startswith('v'):
return up_pv[1:]
# clean
up_pv = up_pv.replace("._", "_") # e.g.: 0.999._002 -> 0.999_002
up_pv = up_pv.replace("_0.", "_") # e.g.: 0.30_0.1 -> 0.30_1
# Detect _rc versions
rc_part = ""
if up_pv.count("_") == 1:
up_pv, rc_part = up_pv.split("_")
# Gentoo creates groups of 3 digits, except for the first digit,
# or when last digit is 0. e.g.: 4.11 -> 4.110.0
splitted = up_pv.split(".")
if len(splitted) == 2: # Split second part is sub-groups
part = splitted.pop()
for i in range(0, len(part), 3):
splitted.append(part[i:i + 3])
if len(splitted) == 2: # add last group if it's missing
splitted.append("0")
groups = [splitted[0]]
for part in splitted[1:-1]:
groups.append(part.ljust(3, "0"))
if splitted[-1] == "0":
groups.append(splitted[-1])
else:
groups.append(splitted[-1].ljust(3, "0"))
# if there's a group with leading zeros strip it. e.g.: 002 -> 2
groups = [g.lstrip("0") if g != "0" else g for g in groups]
pv = ".".join(groups)
if rc_part:
pv = "%s_rc%s" % (pv, rc_part)
return pv
def cpan_mangle_version(pv):
pos = pv.find('.')
if pos <= 0:
return pv
up_pv = pv.replace('.', '')
up_pv = up_pv[0:pos] + '.' + up_pv[pos:]
return up_pv
def cpan_vercmp(cp, a, b):
try:
return float(a) - float(b)
except:
return helpers.simple_vercmp(a, b)
def scan_url(pkg, url, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv)
remote_pkg = guess_package(cp, url)
output.einfo("Using CPAN API: %s", remote_pkg)
return scan_pkg(pkg, {'data': remote_pkg})
def scan_pkg(pkg, options):
remote_pkg = options['data']
# Defaults to CPAN mangling rules
if 'versionmangle' not in options:
options['versionmangle'] = ['cpan', 'gentoo']
url = 'http://search.cpan.org/api/dist/%s' % remote_pkg
cp, ver, rev = pkg.cp, pkg.version, pkg.revision
m_ver = cpan_mangle_version(ver)
output.einfo("Using CPAN API: " + url)
try:
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
data = json.loads(data)
if 'releases' not in data:
return []
ret = []
for version in data['releases']:
#if version['status'] == 'testing':
# continue
up_pv = version['version']
pv = mangling.mangle_version(up_pv, options)
if up_pv.startswith('v'):
if helpers.version_filtered(cp, ver, pv):
continue
else:
m_pv = cpan_mangle_version(up_pv)
if helpers.version_filtered(cp, m_ver, m_pv, cpan_vercmp):
continue
url = 'mirror://cpan/authors/id/%s/%s/%s/%s' % (
version['cpanid'][0],
version['cpanid'][0:1],
version['cpanid'],
version['archive']
)
url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,53 @@
import urllib.request, urllib.parse, urllib.error
import re
import bz2
import zlib
import portage
from euscan import mangling, helpers, output
HANDLER_NAME = "deb"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return False
def scan_pkg(pkg, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv)
packages_url, package_name = options['data'].strip().split(" ", 1)
output.einfo("Using Debian Packages: " + packages_url)
fp = urllib.request.urlopen(packages_url)
content = fp.read()
# Support for .gz and .bz2 Packages file
if packages_url.endswith(".bz2"):
content = bz2.decompress(content)
if packages_url.endswith(".gz"):
content = zlib.decompress(content, 16 + zlib.MAX_WBITS)
content = content.split("\n\n")
result = []
for package_info in content:
package_line = re.search(r"^Package: (.*)$", package_info, re.M)
version_line = re.search(r"^Version: (.*)$", package_info, re.M)
if package_line and package_line.group(1) == package_name:
if version_line:
result.append(version_line.group(1))
ret = []
for up_pv in result:
url = "" # TODO: How to find the url?
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,48 @@
import urllib.request, urllib.parse, urllib.error
import re
import portage
from euscan import mangling, helpers, output
HANDLER_NAME = "freecode"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return False
def scan_pkg(pkg, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv)
package = options['data'].strip()
output.einfo("Using FreeCode handler: " + package)
fp = urllib.request.urlopen("http://freecode.com/projects/%s/releases" % package)
content = str(fp.read())
result = re.findall(
r'<a href="/projects/%s/releases/(\d+)">([^<]+)</a>' % package,
content
)
ret = []
for release_id, up_pv in result:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
fp = urllib.request.urlopen("http://freecode.com/projects/%s/releases/%s" %
(package, release_id))
content = str(fp.read())
download_page = re.findall(r'<a href="(/urls/[^"]+)"', content)[0]
fp = urllib.request.urlopen("http://freecode.com%s" % download_page)
content = str(fp.read())
url = re.findall(
r'In case it doesn\'t, click here: <a href="([^"]+)"',
content
)[0]
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,276 @@
from urllib.parse import urljoin, urlparse
import urllib.request, urllib.error, urllib.parse
import re
import io
import difflib
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
import portage
from euscan import output, helpers, mangling, CONFIG, SCANDIR_BLACKLIST_URLS, \
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS
HANDLER_NAME = "generic"
CONFIDENCE = 45
PRIORITY = 0
BRUTEFORCE_HANDLER_NAME = "brute_force"
BRUTEFORCE_CONFIDENCE = 30
def confidence_score(found, original, minimum=CONFIDENCE):
found_p = urlparse(found)
original_p = urlparse(original)
# check if the base url is the same
if found_p.netloc != original_p.netloc:
return minimum
# check if the directory depth is the same
if len(found_p.path.split("/")) != len(original_p.path.split("/")):
return minimum
# strip numbers
found_path = re.sub(r"[\d+\.]?", "", found_p.path)
original_path = re.sub(r"[\d+\.]?", "", original_p.path)
# strip the first equal part of the path
i = 0
max_i = len(found_path)
while i < max_i and found_path[i] == original_path[i]:
i += 1
found_path = found_path[i:]
original_path = original_path[i:]
# calculate difference ratio
diff = difflib.SequenceMatcher(None, found_path, original_path).ratio()
return int(minimum + minimum * diff) # maximum score is minimum * 2
def scan_html(data, url, pattern):
soup = BeautifulSoup(data, features="lxml")
results = []
for link in soup.findAll('a'):
href = link.get("href")
if not href:
continue
if href.startswith(url):
href = href.replace(url, "", 1)
match = re.search(pattern, href, re.I)
if match:
results.append(
(".".join([x for x in match.groups() if x is not None]),
match.group(0))
)
return results
def scan_ftp(data, url, pattern):
buf = io.StringIO(data)
results = []
for line in buf.readlines():
line = line.replace("\n", "").replace("\r", "")
match = re.search(pattern, line, re.I)
if match:
results.append(
(".".join([x for x in match.groups() if x is not None]),
match.group(0))
)
return results
def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options):
if not steps:
return []
url += steps[0][0]
pattern = steps[0][1]
steps = steps[1:]
output.einfo("Scanning: %s" % url)
try:
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
results = []
if re.search(b"<\s*a\s+[^>]*href", data, re.I):
results.extend(scan_html(data, url, pattern))
elif url.startswith('ftp://'):
results.extend(scan_ftp(data, url, pattern))
versions = []
for up_pv, path in results:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
if not url.endswith("/"):
url = url + "/"
path = urljoin(url, path)
if not steps and path not in orig_url:
confidence = confidence_score(path, orig_url)
path = mangling.mangle_url(path, options)
versions.append((path, pv, HANDLER_NAME, confidence))
if steps:
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url,
options)
versions.extend(ret)
return versions
def scan_url(pkg, url, options):
if CONFIG["scan-dir"]:
for bu in SCANDIR_BLACKLIST_URLS:
if re.match(bu, url):
output.einfo("%s is blacklisted by rule %s" % (url, bu))
return []
resolved_url = helpers.parse_mirror(url)
if not resolved_url:
return []
cp, ver, rev = portage.pkgsplit(pkg.cpv)
# 'Hack' for _beta/_rc versions where _ is used instead of -
if ver not in resolved_url:
newver = helpers.version_change_end_sep(ver)
if newver and newver in resolved_url:
output.einfo(
"Version: using %s instead of %s" % (newver, ver)
)
ver = newver
template = helpers.template_from_url(resolved_url, ver)
if '${' not in template:
output.einfo(
"Url doesn't seems to depend on version: %s not found in %s" %
(ver, resolved_url)
)
return []
else:
output.einfo("Scanning: %s" % template)
steps = helpers.generate_scan_paths(template)
ret = scan_directory_recursive(cp, ver, rev, "", steps, url, options)
if not ret:
ret = brute_force(pkg, url)
return ret
def brute_force(pkg, url):
if CONFIG["brute-force"] == 0:
return []
cp, ver, rev = portage.pkgsplit(pkg.cpv)
url = helpers.parse_mirror(url)
if not url:
return []
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
if re.match(bp, cp):
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
return []
for bp in BRUTEFORCE_BLACKLIST_URLS:
if re.match(bp, url):
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
return []
output.einfo("Generating version from " + ver)
components = helpers.split_version(ver)
versions = helpers.gen_versions(components, CONFIG["brute-force"])
# Remove unwanted versions
for v in versions:
if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0:
versions.remove(v)
if not versions:
output.einfo("Can't generate new versions from " + ver)
return []
template = helpers.template_from_url(url, ver)
if '${PV}' not in template:
output.einfo(
"Url doesn't seems to depend on full version: %s not found in %s" %
(ver, url))
return []
else:
output.einfo("Brute forcing: %s" % template)
result = []
i = 0
done = []
while i < len(versions):
components = versions[i]
i += 1
if components in done:
continue
done.append(tuple(components))
version = helpers.join_version(components)
if helpers.version_filtered(cp, ver, version):
continue
try_url = helpers.url_from_template(template, version)
infos = helpers.tryurl(try_url, template)
if not infos:
continue
confidence = confidence_score(try_url, url,
minimum=BRUTEFORCE_CONFIDENCE)
result.append([try_url, version, BRUTEFORCE_HANDLER_NAME, confidence])
if len(result) > CONFIG['brute-force-false-watermark']:
output.einfo(
"Broken server detected ! Skipping brute force."
)
return []
if CONFIG["brute-force-recursive"]:
for v in helpers.gen_versions(list(components),
CONFIG["brute-force"]):
if v not in versions and tuple(v) not in done:
versions.append(v)
if CONFIG["oneshot"]:
break
return result
def can_handle(pkg, url):
return True

View File

@ -0,0 +1,59 @@
import json
import urllib.request, urllib.error, urllib.parse
import re
import portage
from euscan import helpers, output, mangling
HANDLER_NAME = "github"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('mirror://github/')
def guess_package(cp, url):
match = re.search('^mirror://github/(.*?)/(.*?)/(.*)$', url)
assert(match)
return (match.group(1), match.group(2), match.group(3))
def scan_url(pkg, url, options):
'http://developer.github.com/v3/repos/downloads/'
user, project, filename = guess_package(pkg.cpv, url)
# find out where version is expected to be found
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in filename:
return
# now create a filename-matching regexp
# XXX: supposedly replace first with (?P<foo>...)
# and remaining ones with (?P=foo)
fnre = re.compile('^%s$' % \
re.escape(filename).replace(re.escape(ver), '(.*?)'))
output.einfo("Using github API for: project=%s user=%s filename=%s" % \
(project, user, filename))
dlreq = urllib.request.urlopen('https://api.github.com/repos/%s/%s/downloads' % \
(user, project))
dls = json.load(dlreq)
ret = []
for dl in dls:
m = fnre.match(dl['name'])
if m:
pv = mangling.mangle_version(m.group(1), options)
if helpers.version_filtered(cp, ver, pv):
continue
url = mangling.mangle_url(dl['html_url'], options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
import re
import urllib.request, urllib.error, urllib.parse
try:
import simplejson as json
except ImportError:
import json
import portage
from euscan import mangling, helpers, output
HANDLER_NAME = "gnome"
CONFIDENCE = 100
PRIORITY = 90
GNOME_URL_SOURCE = 'http://ftp.gnome.org/pub/GNOME/sources'
def can_handle(_pkg, url=None):
return url and url.startswith('mirror://gnome/')
def guess_package(cp, url):
match = re.search('mirror://gnome/sources/([^/]+)/.*', url)
if match:
return match.group(1)
_cat, pkg = cp.split("/")
return pkg
def scan_url(pkg, url, options):
'http://ftp.gnome.org/pub/GNOME/sources/'
package = {
'data': guess_package(pkg.cpv, url),
'type': 'gnome',
}
return scan_pkg(pkg, package)
def scan_pkg(pkg, options):
package = options['data']
output.einfo("Using Gnome json cache: " + package)
fp = urllib.request.urlopen('/'.join([GNOME_URL_SOURCE, package, 'cache.json']))
content = fp.read()
fp.close()
cache = json.loads(content, encoding='ascii')
if cache[0] != 4:
output.eerror('Unknow cache format detected')
return []
versions = cache[2][package]
if not versions:
return []
versions.reverse()
cp, ver, _rev = portage.pkgsplit(pkg.cpv)
ret = []
for up_pv in versions:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
up_files = cache[1][package][up_pv]
for tarball_comp in ('tar.xz', 'tar.bz2', 'tar.gz'):
if tarball_comp in up_files:
url = '/'.join([GNOME_URL_SOURCE, package,
up_files[tarball_comp]])
break
else:
output.ewarn('No tarball for release %s' % up_pv)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,43 @@
import re
import portage
from euscan import output
from euscan.helpers import regex_from_template
from euscan.handlers.url import process_scan as url_scan
HANDLER_NAME = "google-code"
CONFIDENCE = 90
PRIORITY = 90
package_name_regex = r"http://(.+).googlecode.com/files/.+"
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return re.match(package_name_regex, url)
def scan_url(pkg, url, options):
output.einfo("Using Google Code handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
package_name = re.match(package_name_regex, url).group(1)
base_url = "http://code.google.com/p/%s/downloads/list" % package_name
file_pattern = regex_from_template(
url.split("/")[-1].replace(ver, "${PV}")
)
result = url_scan(pkg, base_url, file_pattern)
ret = []
for url, pv, _, _ in result:
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,38 @@
from euscan.handlers import generic
PRIORITY = 90
HANDLER_NAME = "kde"
def can_handle(pkg, url):
return url and url.startswith('mirror://kde/')
def clean_results(results):
ret = []
for path, version, _, confidence in results:
if version == '5SUMS':
continue
ret.append((path, version, HANDLER_NAME, confidence))
return ret
def scan_url(pkg, url, options):
results = generic.scan(pkg.cpv, url)
if generic.startswith('mirror://kde/unstable/'):
url = generic.replace('mirror://kde/unstable/', 'mirror://kde/stable/')
results += generic.scan(pkg.cpv, url)
if not results: # if nothing was found go brute forcing
results = generic.brute_force(pkg.cpv, url)
if generic.startswith('mirror://kde/unstable/'):
url = generic.replace('mirror://kde/unstable/',
'mirror://kde/stable/')
results += generic.brute_force(pkg.cpv, url)
return clean_results(results)

View File

@ -0,0 +1,12 @@
from euscan.handlers import php
HANDLER_NAME = "pear"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME)
scan_url = php.scan_url
scan_pkg = php.scan_pkg

View File

@ -0,0 +1,11 @@
from euscan.handlers import php
HANDLER_NAME = "pecl"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('http://%s.php.net/get/' % HANDLER_NAME)
scan_url = php.scan_url
scan_pkg = php.scan_pkg

View File

@ -0,0 +1,69 @@
import re
import portage
import urllib.request, urllib.error, urllib.parse
import xml.dom.minidom
from euscan import helpers, output, mangling
HANDLER_NAME = "php"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return False
def guess_package_and_channel(cp, url):
match = re.search('http://(.*)\.php\.net/get/(.*)-(.*).tgz', url)
if match:
host = match.group(1)
pkg = match.group(2)
else:
cat, pkg = cp.split("/")
return pkg, host
def scan_url(pkg, url, options):
package, channel = guess_package_and_channel(pkg.cp, url)
return scan_pkg(pkg, {'type' : channel, 'data' : package })
def scan_pkg(pkg, options):
cp, ver, rev = pkg.cp, pkg.version, pkg.revision
package = options['data']
channel = options['type']
url = 'http://%s.php.net/rest/r/%s/allreleases.xml' % (channel, package.lower())
output.einfo("Using: " + url)
try:
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
dom = xml.dom.minidom.parseString(data)
nodes = dom.getElementsByTagName("v")
ret = []
for node in nodes:
up_pv = node.childNodes[0].data
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
url = 'http://%s.php.net/get/%s-%s.tgz' % (channel, package, up_pv)
url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,58 @@
import xmlrpc.client
import re
import portage
from euscan import mangling, helpers, output
HANDLER_NAME = "pypi"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('mirror://pypi/')
def guess_package(cp, url):
match = re.search('mirror://pypi/\w+/(.*)/.*', url)
if match:
return match.group(1)
cat, pkg = cp.split("/")
return pkg
def scan_url(pkg, url, options):
'http://wiki.python.org/moin/PyPiXmlRpc'
package = guess_package(pkg.cpv, url)
return scan_pkg(pkg, {'data': package})
def scan_pkg(pkg, options):
package = options['data']
output.einfo("Using PyPi XMLRPC: " + package)
client = xmlrpc.client.ServerProxy('https://pypi.python.org/pypi')
versions = client.package_releases(package)
if not versions:
return versions
versions.reverse()
cp, ver, rev = portage.pkgsplit(pkg.cpv)
ret = []
for up_pv in versions:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = client.release_urls(package, up_pv)
urls = " ".join([mangling.mangle_url(infos['url'], options)
for infos in urls])
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,75 @@
import re
import portage
import json
import urllib.request, urllib.error, urllib.parse
from euscan import helpers, output, mangling
HANDLER_NAME = "rubygems"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith('mirror://rubygems/')
def guess_gem(cpv, url):
match = re.search('mirror://rubygems/(.*).gem', url)
if match:
cpv = 'fake/%s' % match.group(1)
ret = portage.pkgsplit(cpv)
if not ret:
return None
cp, ver, rev = ret
cat, pkg = cp.split("/")
return pkg
def scan_url(pkg, url, options):
'http://guides.rubygems.org/rubygems-org-api/#gemversion'
gem = guess_gem(pkg.cpv, url)
if not gem:
output.eerror("Can't guess gem name using %s and %s" % \
(pkg.cpv, url))
return []
output.einfo("Using RubyGem API: %s" % gem)
return scan_pkg(pkg, {'data': gem})
def scan_pkg(pkg, options):
gem = options['data']
url = 'http://rubygems.org/api/v1/versions/%s.json' % gem
try:
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
versions = json.loads(data)
cp, ver, rev = portage.pkgsplit(pkg.cpv)
ret = []
for version in versions:
up_pv = version['number']
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv)
url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,45 @@
import re
import portage
from euscan.helpers import regex_from_template
from euscan.handlers.url import process_scan as url_scan
from euscan import output
HANDLER_NAME = "sourceforge"
CONFIDENCE = 90
PRIORITY = 90
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return "mirror://sourceforge/" in url
def scan_url(pkg, url, options):
output.einfo("Using SourceForge handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
project, filename = re.search(
"mirror://sourceforge/([^/]+)/(?:.*/)?([^/]+)",
url
).groups()
base_url = "http://qa.debian.org/watch/sf.php/%s" % project
file_pattern = regex_from_template(
filename.replace(ver, "${PV}")
)
result = url_scan(pkg, base_url, file_pattern)
ret = []
for url, pv, _, _ in result:
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

104
src/euscan/handlers/url.py Normal file
View File

@ -0,0 +1,104 @@
import re
import urllib.request, urllib.error, urllib.parse
from euscan.handlers import generic
from euscan import output, helpers
PRIORITY = 100
HANDLER_NAME = "url"
CONFIDENCE = 100.0
is_pattern = r"\([^\/]+\)"
def can_handle(*args):
return False
def handle_directory_patterns(base, file_pattern):
"""
Directory pattern matching
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
"""
splitted = base.split("/")
i = 0
basedir = []
for elem in splitted:
if re.search(is_pattern, elem):
break
basedir.append(elem)
i += 1
basedir = "/".join(basedir)
directory_pattern = splitted[i]
final = "/".join(splitted[i + 1:])
try:
fp = helpers.urlopen(basedir)
except urllib.error.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
if basedir.startswith("ftp://"):
scan_data = generic.scan_ftp(data, basedir, directory_pattern)
else:
scan_data = generic.scan_html(data, basedir, directory_pattern)
return [("/".join((basedir, path, final)), file_pattern)
for _, path in scan_data]
def read_options(options):
try:
base, file_pattern = options['data'].split(" ")[:2]
except ValueError:
base, file_pattern = options['data'], None
# the file pattern can be in the base url
pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
match = re.search(pattern_regex, base)
if match:
file_pattern = match.group(1)
base = base.replace(file_pattern, "")
# handle sf.net specially
base = base.replace(
"http://sf.net/", "http://qa.debian.org/watch/sf.php/"
)
return base, file_pattern
def process_scan(pkg, base, file_pattern, options=None):
if options is None:
options = {}
cp, ver, rev = pkg.cp, pkg.version, pkg.revision
results = []
if not re.search(is_pattern, base):
steps = [(base, file_pattern)]
results = generic.scan_directory_recursive(
cp, ver, rev, "", steps, base, options
)
else:
for step in handle_directory_patterns(base, file_pattern):
results += generic.scan_directory_recursive(
cp, ver, rev, "", [step], base, options
)
return results
def scan_pkg(pkg, options):
output.einfo("Using watch data")
base, file_pattern = read_options(options)
return process_scan(pkg, base, file_pattern, options)