Compare commits

..

No commits in common. "master" and "1.0.1_alpha1" have entirely different histories.

27 changed files with 418 additions and 298 deletions

View File

@ -1,15 +1,15 @@
repos:
- repo: https://github.com/psf/black
rev: 24.2.0
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.0
rev: v0.1.4
hooks:
- id: ruff

View File

@ -1,8 +1,8 @@
include AUTHORS
include CHANGELOG.rst
include LICENSE
include README.rst
include TODO
include pyproject.toml
include setup.py
recursive-include bin *
recursive-include src *.py
recursive-include man *
recursive-include pym *.py

60
TODO
View File

@ -42,7 +42,61 @@ euscan
- remote-id type deb repository:
-- find out how to get download url (not sure it's possible)
### bugs or unwanted behavior
### remote-id
- Propose new remote-id: deb
e.g.: <remote-id type="deb">
http://mysite.com/deb/dists/stable/main/binary-i386/Packages
</remote-id>
- Propose new remote-id: freecode
e.g.: <remote-id type="freecode">projectname</remote-id>
- Parsing docs and accepting 404's
-- net-analyzer/sensu
euscanwww
---------
### misc
- Really fix mails: better formating
- Always keep in db all found versions (when using an API only?). But don't display them if older than current packaged version, except maybe in the "upstream_version" column.
### packages
- Ignore alpha/beta if current is not alpha/beta: per-package setting using metadata.xml ?
- ~arch / stable support: see "models: keywords"
- stabilisation candidates: check stabilizations rules, and see how this can be automated
- set upstream version by hand: will be done after uscan compatiblity
### logs
- Move log models into djeuscanhistory ?
### models
- Repository (added or not, from layman + repositories.xml)
- Arches and Keyword
- Metadata, herds, maintainers and homepage are per-version, not per package. Store it in Version instead.
### djportage (LOW-PRIORITY))
- Create standalone application to scan and represent portage trees in models using work done in:
-- euscan
-- p.g.o: https://github.com/bacher09/gentoo-packages
-- gentoostats: https://github.com/gg7/gentoostats_server/blob/master/gentoostats/stats/models.py
The application should be easy to use, and we should be able to launch the scan process in a celery worker using "logging" for logs.
The application should also be usable for p.g.o and gentoostats later...
The scan process should be faster than the one using euscan. gentoo-packages have some interesting ideas for that (keeping metadata and ebuild hash, etc..)
### API (LOW-PRIORITY)
- Move to tastypie
### Overlays
/!\ blocked by "djportage" application
Currently, overlay handling in euscan sucks (it's simply a column nothing more, and they are mostly handled by hand by layman). I'd like to be able to add and remove overlays (overlay name + svn/git/cvs/rsync url). Using a new model and layman API should make this task easy.
/!\ could be done earlier using a simple "overlay" table ... but how to pre-compute everything per-overlay ?
Once done, a great feature would be to be able to select displayed overlay on euscan (as a global setting: for all pages). This is actually a lot of work, so you should work on that on a separated branch.
Note that this is more complicated that it seems, because a lot of things are precalculated (number of version for this herd, number of outdated versions, etc..), and selecting overlays would break all this. So you'll really need to experiment solutions for this one.

View File

@ -199,7 +199,7 @@ def print_usage(_error=None, help=None):
# turquoise("`man %s`" % __productname__), file=out)
class ParseArgsError(Exception):
class ParseArgsException(Exception):
"""For parseArgs() -> main() communications."""
def __init__(self, value):
@ -220,9 +220,9 @@ def parse_args():
return_code = True
for o, a in opts:
if o in ("-h", "--help"):
raise ParseArgsError("help")
raise ParseArgsException("help")
elif o in ("-V", "--version"):
raise ParseArgsError("version")
raise ParseArgsException("version")
elif o in ("-C", "--nocolor"):
CONFIG["nocolor"] = True
pp.output.nocolor()
@ -283,14 +283,14 @@ def parse_args():
# apply getopts to command line, show partial help on failure
try:
opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts)
except getopt.GetoptError as exc:
raise ParseArgsError(opts_mode + "-options") from exc
except getopt.GetoptError:
raise ParseArgsException(opts_mode + "-options")
# set options accordingly
option_switch(opts)
if len(args) < 1:
raise ParseArgsError("packages")
raise ParseArgsException("packages")
return args
@ -306,7 +306,7 @@ def main():
# parse command line options and actions
try:
queries = parse_args()
except ParseArgsError as e:
except ParseArgsException as e:
if e.value == "help":
print_usage(help="all")
exit_helper(0)
@ -362,7 +362,7 @@ def main():
exit_helper(1)
except GentoolkitException as err:
output.eerror(f"{query}: {str(err)}")
output.eerror("%s: %s" % (query, str(err)))
exit_helper(1)
except Exception as err:
@ -372,7 +372,7 @@ def main():
traceback.print_exc(file=sys.stderr)
print("-" * 60)
output.eerror(f"{query}: {str(err)}")
output.eerror("%s: %s" % (query, str(err)))
exit_helper(1)
if not ret and not CONFIG["quiet"]:

View File

@ -30,10 +30,10 @@ def guess_indent_values(before):
def guess_for_tags(tags):
for tag in tags:
for i in [0, 2, 4, 6, 8, 12, 16]:
if f"\n{' ' * i}<{tag}" in before:
if "\n%s<%s" % (" " * i, tag) in before:
return i, False
for i in [0, 1, 2]:
if f"\n{'\t' * i}<{tag}" in before:
if "\n%s<%s" % ("\t" * i, tag) in before:
return i, True
return -1, False
@ -119,11 +119,11 @@ def get_deb_url(name):
content = opened.read()
for link in BeautifulSoup(content, parseOnlyThese=SoupStrainer("a")):
if re.match(r"[^\s]+\.debian\.tar\.(?:gz|bz2)", link.text):
if re.match("[^\s]+\.debian\.tar\.(?:gz|bz2)", link.text):
deb_url = link["href"]
deb_type = "source"
break
if re.match(r"[^\s]+\.diff\.gz", link.text):
if re.match("[^\s]+\.diff\.gz", link.text):
deb_url = link["href"]
deb_type = "diff"
break
@ -157,7 +157,7 @@ def patch_metadata(package, watch_data, diff=False):
for watch_line in watch_data.split("\n"): # there can be multiple lines
watch_line = " ".join(watch_line.split()) # remove extra spaces and \n
version_parse = re.match(r"version=(\d+?)", watch_line)
version_parse = re.match("version=(\d+?)", watch_line)
if version_parse:
version = version_parse.group(1)
continue
@ -180,7 +180,7 @@ def patch_metadata(package, watch_data, diff=False):
if opt_name in valid:
if opt_name == "uversionmangle":
opt_name = "versionmangle"
cleaned_opts.append(f'{opt_name}="{opt_value}"')
cleaned_opts.append('%s="%s"' % (opt_name, opt_value))
opts = " ".join(cleaned_opts)
# clean url from useless stuff. Just keep <base> [<filepattern>]
@ -188,9 +188,14 @@ def patch_metadata(package, watch_data, diff=False):
url = " ".join([x for x in url_search.groups() if x is not None])
if opts:
watch_tag = f'{indent}<watch version="{version}" {opts}>{url}</watch>'
watch_tag = '%s<watch version="%s" %s>%s</watch>' % (
indent,
version,
opts,
url,
)
else:
watch_tag = f'{indent}<watch version="{version}">{url}</watch>'
watch_tag = '%s<watch version="%s">%s</watch>' % (indent, version, url)
watch_tags.append(watch_tag)
watch_tags = "\n".join(watch_tags)
@ -198,7 +203,11 @@ def patch_metadata(package, watch_data, diff=False):
if "<upstream>" in data:
data = data.replace("<upstream>", "<upstream>\n%s" % watch_tags, 1)
else:
rep = f"{rindent}<upstream>\n{watch_tags}\n{rindent}</upstream>\n</pkgmetadata>"
rep = "%s<upstream>\n%s\n%s</upstream>\n</pkgmetadata>" % (
rindent,
watch_tags,
rindent,
)
data = data.replace("</pkgmetadata>", rep, 1)
if not diff:

View File

@ -16,14 +16,12 @@ description = "Ebuild upstream scan utility."
license = {text = "GPL-2.0"}
dependencies = [
"portage",
"beautifulsoup4>=4.8.2",
"packaging"
"beautifulsoup4>=4.8.2"
]
dynamic = ["version"]
[project.urls]
homepage = "https://gitlab.com/src_prepare/euscan-ng"
changelog = "https://gitlab.com/src_prepare/euscan-ng/-/blob/master/CHANGELOG.rst"
[tool.setuptools]
script-files = ["bin/euscan"]
@ -41,6 +39,3 @@ src_paths = ["bin/euscan", "src/euscan/"]
[tool.ruff]
extend-include = ["bin/euscan", "bin/euscan_patch_metadata"]
[tool.ruff.lint]
extend-select = ["B", "E", "N", "UP", "W"]

View File

@ -51,13 +51,8 @@ BLACKLIST_PACKAGES = [
]
SCANDIR_BLACKLIST_URLS = [
"https://rubygems.org/(.*)", # Not browsable
"mirror://rubygems/(.*)", # Not browsable
"mirror://gentoo/(.*)", # Directory too big
"https://dev.gentoo.org/(.*)", # There shouldn't be releases here
# Waste of time to go through
"https://crates.io/(.*)",
"https://api.nuget.org/(.*)",
"https://myget.org/(.*)",
]
BRUTEFORCE_BLACKLIST_PACKAGES = [
@ -79,13 +74,13 @@ BRUTEFORCE_BLACKLIST_URLS = [
ROBOTS_TXT_BLACKLIST_DOMAINS = [
"(.*)sourceforge(.*)",
"(.*)github.com",
r"(.*)qt\.nokia\.com(.*)",
r"(.*)chromium\.org(.*)",
r"(.*)nodejs\.org(.*)",
r"(.*)download\.mono-project\.com(.*)",
r"(.*)fedorahosted\.org(.*)",
r"(.*)download\.tuxfamily\.org(.*)",
r"(.*)festvox\.org(.*)",
"(.*)qt\.nokia\.com(.*)",
"(.*)chromium\.org(.*)",
"(.*)nodejs\.org(.*)",
"(.*)download\.mono-project\.com(.*)",
"(.*)fedorahosted\.org(.*)",
"(.*)download\.tuxfamily\.org(.*)",
"(.*)festvox\.org(.*)",
]
from euscan.out import EuscanOutput # noqa: E402

View File

@ -71,7 +71,7 @@ def package_from_ebuild(ebuild):
return False
ebuild_split = ebuild.split("/")
cpv = f"{ebuild_split[-3]}/{pf}"
cpv = "%s/%s" % (ebuild_split[-3], pf)
if not portage.catpkgsplit(cpv):
return False

View File

@ -13,7 +13,7 @@ from euscan import CONFIG, output
handlers = {"package": [], "url": [], "all": {}}
# autoimport all modules in this directory and append them to handlers list
for loader, module_name, _is_pkg in pkgutil.walk_packages(__path__):
for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
module = loader.find_spec(module_name).loader.load_module(module_name)
if not hasattr(module, "HANDLER_NAME"):
continue
@ -157,7 +157,7 @@ def scan_url(pkg, urls, options, on_progress=None):
else:
output.eerror("Can't find a suitable handler!")
except Exception as e:
output.ewarn(f"Handler failed: [{e.__class__.__name__}] {str(e)}")
output.ewarn("Handler failed: [%s] %s" % (e.__class__.__name__, str(e)))
if versions and CONFIG["oneshot"]:
break

View File

@ -0,0 +1,59 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import re
import urllib.error
import urllib.parse
import urllib.request
import portage
from euscan import output
from euscan.handlers.url import process_scan as url_scan
from euscan.helpers import regex_from_template
HANDLER_NAME = "berlios"
CONFIDENCE = 90
PRIORITY = 90
berlios_regex = r"mirror://berlios/([^/]+)/([^/]+)"
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return re.search(berlios_regex, url)
def scan_url(pkg, url, options):
output.einfo("Using BerliOS handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
project, filename = re.search(berlios_regex, url).groups()
project_page = "http://developer.berlios.de/projects/%s" % project
content = urllib.request.urlopen(project_page).read()
project_id = re.search(r"/project/filelist.php\?group_id=(\d+)", content).group(1)
base_url = (
"http://developer.berlios.de/project/filelist.php?group_id=%s" % project_id
)
file_pattern = regex_from_template(filename.replace(ver, "${PV}"))
result = url_scan(pkg, base_url, file_pattern)
ret = []
for found_url, pv, _, _ in result:
found_url = found_url.replace("prdownload", "download")
ret.append((found_url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -81,7 +81,7 @@ def mangle_version(up_pv):
pv = ".".join(groups)
if rc_part:
pv = f"{pv}_rc{rc_part}"
pv = "%s_rc%s" % (pv, rc_part)
return pv
@ -128,7 +128,7 @@ def scan_pkg(pkg, options):
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except OSError:
except IOError:
return []
if not fp:
@ -157,7 +157,13 @@ def scan_pkg(pkg, options):
if helpers.version_filtered(cp, m_ver, m_pv, cpan_vercmp):
continue
url = f"mirror://cpan/authors/id/{version['cpanid'][0]}/{version['cpanid'][0:1]}/{version['cpanid']}/{version['archive']}"
url = "mirror://cpan/authors/id/%s/%s/%s/%s" % (
version["cpanid"][0],
version["cpanid"][0:1],
version["cpanid"],
version["archive"],
)
url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))

View File

@ -0,0 +1,53 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import re
import urllib.error
import urllib.parse
import urllib.request
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "freecode"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return False
def scan_pkg(pkg, options):
cp, ver, rev = portage.pkgsplit(pkg.cpv)
package = options["data"].strip()
output.einfo("Using FreeCode handler: " + package)
fp = urllib.request.urlopen("http://freecode.com/projects/%s/releases" % package)
content = str(fp.read())
result = re.findall(
r'<a href="/projects/%s/releases/(\d+)">([^<]+)</a>' % package, content
)
ret = []
for release_id, up_pv in result:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
fp = urllib.request.urlopen(
"http://freecode.com/projects/%s/releases/%s" % (package, release_id)
)
content = str(fp.read())
download_page = re.findall(r'<a href="(/urls/[^"]+)"', content)[0]
fp = urllib.request.urlopen("http://freecode.com%s" % download_page)
content = str(fp.read())
url = re.findall(
r'In case it doesn\'t, click here: <a href="([^"]+)"', content
)[0]
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -8,11 +8,14 @@ import re
import urllib.error
import urllib.parse
import urllib.request
import warnings
from urllib.parse import urljoin, urlparse
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
import portage
from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
from euscan import (
BRUTEFORCE_BLACKLIST_PACKAGES,
@ -62,7 +65,6 @@ def confidence_score(found, original, minimum=CONFIDENCE):
def scan_html(data, url, pattern):
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
soup = BeautifulSoup(data, features="lxml")
results = []
@ -112,7 +114,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options):
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except OSError:
except IOError:
return []
if not fp:
@ -122,7 +124,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options):
results = []
if re.search(rb"<\s*a\s+[^>]*href", data, re.I):
if re.search(b"<\s*a\s+[^>]*href", data, re.I):
results.extend(scan_html(data, url, pattern))
elif url.startswith("ftp://"):
results.extend(scan_ftp(data, url, pattern))
@ -154,7 +156,7 @@ def scan_url(pkg, url, options):
if CONFIG["scan-dir"]:
for bu in SCANDIR_BLACKLIST_URLS:
if re.match(bu, url):
output.einfo(f"{url} is blacklisted by rule {bu}")
output.einfo("%s is blacklisted by rule %s" % (url, bu))
return []
resolved_url = helpers.parse_mirror(url)
@ -167,15 +169,14 @@ def scan_url(pkg, url, options):
if ver not in resolved_url:
newver = helpers.version_change_end_sep(ver)
if newver and newver in resolved_url:
output.einfo(f"Version: using {newver} instead of {ver}")
output.einfo("Version: using %s instead of %s" % (newver, ver))
ver = newver
template = helpers.template_from_url(resolved_url, ver)
if "${" not in template:
output.einfo(
"Url doesn't seems to depend on version: {} not found in {}".format(
ver, resolved_url
)
"Url doesn't seems to depend on version: %s not found in %s"
% (ver, resolved_url)
)
return []
else:
@ -202,12 +203,12 @@ def brute_force(pkg, url):
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
if re.match(bp, cp):
output.einfo(f"{cp} is blacklisted by rule {bp}")
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
return []
for bp in BRUTEFORCE_BLACKLIST_URLS:
if re.match(bp, url):
output.einfo(f"{cp} is blacklisted by rule {bp}")
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
return []
output.einfo("Generating version from " + ver)
@ -228,7 +229,8 @@ def brute_force(pkg, url):
if "${PV}" not in template:
output.einfo(
f"Url doesn't seems to depend on full version: {ver} not found in {url}"
"Url doesn't seems to depend on full version: %s not found in %s"
% (ver, url)
)
return []
else:

View File

@ -1,70 +0,0 @@
# Copyright 2020-2024 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "gitea"
CONFIDENCE = 100
PRIORITY = 90
# Forgejo strives to be compatible with Gitea API
# https://forgejo.org/2024-02-forking-forward/
_gitea_instances = [
"codeberg.org",
"git.osgeo.org",
"gitea.com",
"gitea.ladish.org",
"gitea.osmocom.org",
"gitea.treehouse.systems",
]
gitea_patterns = [
re.compile(rf"https://(?P<domain>{domain})/(?P<repository>[^/]+/[^/]+)")
for domain in _gitea_instances
]
def can_handle(pkg, url=None):
return url and any([re.search(pattern, url) for pattern in gitea_patterns])
def scan_url(pkg, url, options):
"https://docs.gitea.com/api/1.20/#tag/repository/operation/repoListReleases"
match = [
re.search(pattern, url)
for pattern in gitea_patterns
if re.search(pattern, url) is not None
][0]
domain = match.group("domain")
repository = match.group("repository")
output.einfo(f"Using Gitea API in {domain}: {repository}")
request = helpers.urlopen(f"https://{domain}/api/v1/repos/{repository}/releases")
data = json.load(request)
versions = [release["tag_name"] for release in data]
cp, ver, rev = portage.pkgsplit(pkg.cpv)
ret = []
for up_pv in versions:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = " ".join(
mangling.mangle_url(release["tarball_url"], options)
for release in data
if release["tag_name"] == up_pv
)
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,67 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import urllib.error
import urllib.parse
import urllib.request
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "github"
CONFIDENCE = 100
PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith("mirror://github/")
def guess_package(cp, url):
match = re.search("^mirror://github/(.*?)/(.*?)/(.*)$", url)
assert match
return (match.group(1), match.group(2), match.group(3))
def scan_url(pkg, url, options):
"http://developer.github.com/v3/repos/downloads/"
user, project, filename = guess_package(pkg.cpv, url)
# find out where version is expected to be found
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in filename:
return
# now create a filename-matching regexp
# XXX: supposedly replace first with (?P<foo>...)
# and remaining ones with (?P=foo)
fnre = re.compile("^%s$" % re.escape(filename).replace(re.escape(ver), "(.*?)"))
output.einfo(
"Using github API for: project=%s user=%s filename=%s"
% (project, user, filename)
)
dlreq = urllib.request.urlopen(
"https://api.github.com/repos/%s/%s/downloads" % (user, project)
)
dls = json.load(dlreq)
ret = []
for dl in dls:
m = fnre.match(dl["name"])
if m:
pv = mangling.mangle_version(m.group(1), options)
if helpers.version_filtered(cp, ver, pv):
continue
url = mangling.mangle_url(dl["html_url"], options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -1,82 +0,0 @@
# Copyright 2020-2024 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "gitlab"
CONFIDENCE = 100
PRIORITY = 90
_gitlab_instances = [
"gitlab.com",
"gitlab.freedesktop.org",
"invent.kde.org/",
"gitlab.gnome.org",
"gitlab.kitware.com",
"gitlab.xfce.org",
"code.videolan.org",
"gitlab.xiph.org",
]
gitlab_patterns = [
# Regular expression adapted from pkgcheck
# https://docs.gitlab.com/ee/user/reserved_names.html
re.compile(
rf"https://(?P<domain>{domain})/(?P<repository>((?!api/)\w[^/]*/)+(?!raw/)\w[^/]*)"
)
for domain in _gitlab_instances
]
def can_handle(pkg, url=None):
return url and any([re.search(pattern, url) for pattern in gitlab_patterns])
def scan_url(pkg, url, options):
"https://docs.gitlab.com/ee/api/releases/index.html"
match = [
re.search(pattern, url)
for pattern in gitlab_patterns
if re.search(pattern, url) is not None
][0]
domain = match.group("domain")
repository = match.group("repository")
output.einfo(f"Using GitLab REST API in {domain}: {repository}")
request = helpers.urlopen(
f"https://{domain}/api/v4/projects/{repository.replace('/', '%2F')}/releases"
)
data = json.load(request)
versions = [release["tag_name"] for release in data]
cp, ver, rev = portage.pkgsplit(pkg.cpv)
ret = []
for up_pv in versions:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = " ".join(
[
mangling.mangle_url(source["url"], options)
for source in [
release["assets"]["sources"]
for release in data
if release["tag_name"] == up_pv
][0]
# prefer tar.bz2
if source["format"] == "tar.bz2"
]
)
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -20,7 +20,7 @@ HANDLER_NAME = "gnome"
CONFIDENCE = 100
PRIORITY = 90
GNOME_URL_SOURCE = "https://download.gnome.org/sources"
GNOME_URL_SOURCE = "http://ftp.gnome.org/pub/GNOME/sources"
def can_handle(_pkg, url=None):
@ -38,7 +38,7 @@ def guess_package(cp, url):
def scan_url(pkg, url, options):
"https://download.gnome.org/sources/"
"http://ftp.gnome.org/pub/GNOME/sources/"
package = {
"data": guess_package(pkg.cpv, url),
"type": "gnome",
@ -55,7 +55,7 @@ def scan_pkg(pkg, options):
content = fp.read()
fp.close()
cache = json.loads(content)
cache = json.loads(content, encoding="ascii")
if cache[0] != 4:
output.eerror("Unknow cache format detected")

View File

@ -0,0 +1,47 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import re
import portage
from euscan import output
from euscan.handlers.url import process_scan as url_scan
from euscan.helpers import regex_from_template
HANDLER_NAME = "google-code"
CONFIDENCE = 90
PRIORITY = 90
package_name_regex = r"http://(.+).googlecode.com/files/.+"
def can_handle(pkg, url=None):
if not url:
return False
cp, ver, rev = portage.pkgsplit(pkg.cpv)
if ver not in url:
return False
return re.match(package_name_regex, url)
def scan_url(pkg, url, options):
output.einfo("Using Google Code handler")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
package_name = re.match(package_name_regex, url).group(1)
base_url = "http://code.google.com/p/%s/downloads/list" % package_name
file_pattern = regex_from_template(url.split("/")[-1].replace(ver, "${PV}"))
result = url_scan(pkg, base_url, file_pattern)
ret = []
for url, pv, _, _ in result:
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -25,17 +25,17 @@ def clean_results(results):
def scan_url(pkg, url, options):
results = generic.scan_url(pkg, url, options)
results = generic.scan(pkg.cpv, url)
if url.startswith("mirror://kde/unstable/"):
url = url.replace("mirror://kde/unstable/", "mirror://kde/stable/")
results += generic.scan_url(pkg, url, options)
if generic.startswith("mirror://kde/unstable/"):
url = generic.replace("mirror://kde/unstable/", "mirror://kde/stable/")
results += generic.scan(pkg.cpv, url)
if not results: # if nothing was found go brute forcing
results = generic.brute_force(pkg.cpv, url)
if url.startswith("mirror://kde/unstable/"):
url = url.replace("mirror://kde/unstable/", "mirror://kde/stable/")
if generic.startswith("mirror://kde/unstable/"):
url = generic.replace("mirror://kde/unstable/", "mirror://kde/stable/")
results += generic.brute_force(pkg.cpv, url)
return clean_results(results)

View File

@ -20,7 +20,7 @@ def can_handle(pkg, url=None):
def guess_package_and_channel(cp, url):
match = re.search(r"http://(.*)\.php\.net/get/(.*)-(.*).tgz", url)
match = re.search("http://(.*)\.php\.net/get/(.*)-(.*).tgz", url)
if match:
host = match.group(1)
@ -42,7 +42,7 @@ def scan_pkg(pkg, options):
package = options["data"]
channel = options["type"]
url = f"http://{channel}.php.net/rest/r/{package.lower()}/allreleases.xml"
url = "http://%s.php.net/rest/r/%s/allreleases.xml" % (channel, package.lower())
output.einfo("Using: " + url)
@ -50,7 +50,7 @@ def scan_pkg(pkg, options):
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except OSError:
except IOError:
return []
if not fp:
@ -69,7 +69,7 @@ def scan_pkg(pkg, options):
if helpers.version_filtered(cp, ver, pv):
continue
url = f"http://{channel}.php.net/get/{package}-{up_pv}.tgz"
url = "http://%s.php.net/get/%s-%s.tgz" % (channel, package, up_pv)
url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))

View File

@ -1,13 +1,11 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2024 src_prepare group
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import urllib.error
import xmlrpc.client
import portage
from packaging.version import parse
from euscan import helpers, mangling, output
@ -17,11 +15,11 @@ PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith("https://files.pythonhosted.org/packages/source/p/")
return url and url.startswith("mirror://pypi/")
def guess_package(cp, url):
match = re.search(r"https://files.pythonhosted.org/packages/source/p/(.*)/.*", url)
match = re.search("mirror://pypi/\w+/(.*)/.*", url)
if match:
return match.group(1)
@ -31,7 +29,7 @@ def guess_package(cp, url):
def scan_url(pkg, url, options):
"https://peps.python.org/pep-0691/"
"http://wiki.python.org/moin/PyPiXmlRpc"
package = guess_package(pkg.cpv, url)
return scan_pkg(pkg, {"data": package})
@ -40,23 +38,15 @@ def scan_url(pkg, url, options):
def scan_pkg(pkg, options):
package = options["data"]
output.einfo("Using PyPi JSON API: " + package)
output.einfo("Using PyPi XMLRPC: " + package)
try:
fp = helpers.urlopen(f"https://pypi.org/pypi/{package}/json/")
except urllib.error.URLError:
return []
except OSError:
return []
client = xmlrpc.client.ServerProxy("https://pypi.python.org/pypi")
versions = client.package_releases(package)
if not fp:
return []
if not versions:
return versions
data = json.loads(fp.read())
versions = list(data["releases"].keys())
versions.sort(key=parse, reverse=True)
versions.reverse()
cp, ver, rev = portage.pkgsplit(pkg.cpv)
@ -65,12 +55,7 @@ def scan_pkg(pkg, options):
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = " ".join(
[
mangling.mangle_url(file["url"], options)
for file in data["releases"][up_pv]
if file["packagetype"] == "sdist"
]
)
urls = client.release_urls(package, up_pv)
urls = " ".join([mangling.mangle_url(infos["url"], options) for infos in urls])
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -1,5 +1,5 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2024 src_prepare group
# Copyright 2020-2023 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
@ -18,11 +18,11 @@ PRIORITY = 90
def can_handle(pkg, url=None):
return url and url.startswith("https://rubygems.org/")
return url and url.startswith("mirror://rubygems/")
def guess_gem(cpv, url):
match = re.search("https://rubygems.org/gems/(.*).gem", url)
match = re.search("mirror://rubygems/(.*).gem", url)
if match:
cpv = "fake/%s" % match.group(1)
@ -42,7 +42,7 @@ def scan_url(pkg, url, options):
gem = guess_gem(pkg.cpv, url)
if not gem:
output.eerror(f"Can't guess gem name using {pkg.cpv} and {url}")
output.eerror("Can't guess gem name using %s and %s" % (pkg.cpv, url))
return []
output.einfo("Using RubyGem API: %s" % gem)
@ -58,7 +58,7 @@ def scan_pkg(pkg, options):
fp = helpers.urlopen(url)
except urllib.error.URLError:
return []
except OSError:
except IOError:
return []
if not fp:
@ -75,7 +75,7 @@ def scan_pkg(pkg, options):
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
url = f"http://rubygems.org/gems/{gem}-{up_pv}.gem"
url = "http://rubygems.org/gems/%s-%s.gem" % (gem, up_pv)
url = mangling.mangle_url(url, options)
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -24,7 +24,7 @@ def can_handle(*args):
def handle_directory_patterns(base, file_pattern):
r"""
"""
Directory pattern matching
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
@ -45,7 +45,7 @@ def handle_directory_patterns(base, file_pattern):
fp = helpers.urlopen(basedir)
except urllib.error.URLError:
return []
except OSError:
except IOError:
return []
if not fp:

View File

@ -83,7 +83,7 @@ def version_is_nightly(a, b):
def version_blacklisted(cp, version):
rule = None
cpv = f"{cp}-{version}"
cpv = "%s-%s" % (cp, version)
# Check that the generated cpv can be used by portage
if not portage.versions.catpkgsplit(cpv):
@ -92,9 +92,10 @@ def version_blacklisted(cp, version):
for bv in BLACKLIST_VERSIONS:
if dep.match_from_list(bv, [cpv]):
rule = bv
None
if rule:
euscan.output.einfo(f"{cpv} is blacklisted by rule {rule}")
euscan.output.einfo("%s is blacklisted by rule %s" % (cpv, rule))
return rule is not None
@ -222,7 +223,7 @@ def gen_versions(components, level):
for i in range(n, n - level, -1):
increment_version(components, i - 1)
for _j in range(depth):
for j in range(depth):
versions.append(list(components))
increment_version(components, i - 1)
@ -263,7 +264,7 @@ def urlallowed(url):
if protocol == "ftp":
return True
baseurl = f"{protocol}://{domain}"
baseurl = "%s://%s" % (protocol, domain)
robotsurl = urllib.parse.urljoin(baseurl, "robots.txt")
if baseurl in rpcache:
@ -279,7 +280,7 @@ def urlallowed(url):
try:
rp.read()
rpcache[baseurl] = rp
except OSError:
except IOError:
rp = None
setdefaulttimeout(timeout)
@ -289,7 +290,7 @@ def urlallowed(url):
def urlopen(url, timeout=None, verb="GET"):
if not urlallowed(url):
euscan.output.einfo(f"Url '{url}' blocked by robots.txt")
euscan.output.einfo("Url '%s' blocked by robots.txt" % url)
return None
if not timeout:
@ -369,7 +370,7 @@ def tryurl(fileurl, template):
except urllib.error.URLError:
result = None
except OSError:
except IOError:
result = None
euscan.output.eend(errno.ENOENT if not result else 0)
@ -382,9 +383,9 @@ def regex_from_template(template):
regexp = re.escape(template)
# Unescape specific stuff
regexp = regexp.replace(r"\$\{", "${")
regexp = regexp.replace(r"\}", "}")
regexp = regexp.replace(r"}\.$", "}.$")
regexp = regexp.replace("\$\{", "${")
regexp = regexp.replace("\}", "}")
regexp = regexp.replace("}\.$", "}.$")
# Replace ${\d+}
# regexp = regexp.replace('${0}', r'([\d]+?)')

View File

@ -19,7 +19,7 @@ from euscan.helpers import dict_to_xml
mirrors_ = None
class ProgressHandler:
class ProgressHandler(object):
def __init__(self, progress_bar):
self.curval = 0
self.maxval = 0
@ -74,7 +74,7 @@ def progress_bar():
def clean_colors(string):
if isinstance(string, str):
string = re.sub(r"\033\[[0-9;]+m", "", string)
string = re.sub("\033\[[0-9;]+m", "", string)
string = re.sub(r"\\u001b\[[0-9;]+m", "", string)
string = re.sub(r"\x1b\[[0-9;]+m", "", string)
return string
@ -90,9 +90,9 @@ def transform_url(config, cpv, url):
def to_ebuild_uri(cpv, url):
cat, pkg, ver, rev = portage.catpkgsplit(cpv)
p = f"{pkg}-{ver}"
pvr = f"{ver}{f'-{rev}' if rev != 'r0' else ''}"
pf = f"{pkg}-{pvr}"
p = "%s-%s" % (pkg, ver)
pvr = "%s%s" % (ver, "-%s" % rev if rev != "r0" else "")
pf = "%s-%s" % (pkg, pvr)
evars = (
(p, "P"),
(pkg, "PN"),
@ -140,8 +140,10 @@ def to_mirror(url):
for mirror_url in mirrors_[mirror_name]:
if url.startswith(mirror_url):
url_part = url.split(mirror_url)[1]
return "mirror://{}{}{}".format(
mirror_name, "" if url_part.startswith("/") else "/", url_part
return "mirror://%s%s%s" % (
mirror_name,
"" if url_part.startswith("/") else "/",
url_part,
)
return url
@ -152,17 +154,17 @@ class EOutputMem(EOutput):
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
super(EOutputMem, self).__init__(*args, **kwargs)
self.out = StringIO()
def getvalue(self):
return self.out.getvalue()
def _write(self, f, msg):
super()._write(self.out, msg)
super(EOutputMem, self)._write(self.out, msg)
class EuscanOutput:
class EuscanOutput(object):
"""
Class that handles output for euscan
"""
@ -218,7 +220,7 @@ class EuscanOutput:
def result(self, cp, version, urls, handler, confidence):
from euscan.version import get_version_type
cpv = f"{cp}-{version}"
cpv = "%s-%s" % (cp, version)
urls = " ".join(transform_url(self.config, cpv, url) for url in urls.split())
if self.config["format"] in ["json", "dict"]:
@ -237,13 +239,13 @@ class EuscanOutput:
print("Upstream Version:", pp.number("%s" % version), end=" ")
print(pp.path(" %s" % urls))
else:
print(pp.cpv(f"{cp}-{version}") + ":", pp.path(urls))
print(pp.cpv("%s-%s" % (cp, version)) + ":", pp.path(urls))
def metadata(self, key, value, show=True):
if self.config["format"]:
self.queries[self.current_query]["metadata"][key] = value
elif show:
print(f"{key.capitalize()}: {value}")
print("%s: %s" % (key.capitalize(), value))
def __getattr__(self, key):
if not self.config["quiet"] and self.current_query is not None:

View File

@ -76,14 +76,14 @@ def reload_gentoolkit():
if not hasattr(gentoolkit.package, "PORTDB"):
return
portdb = portage.db[portage.root]["porttree"].dbapi
PORTDB = portage.db[portage.root]["porttree"].dbapi
if hasattr(gentoolkit.dbapi, "PORTDB"):
gentoolkit.dbapi.PORTDB = portdb
gentoolkit.dbapi.PORTDB = PORTDB
if hasattr(gentoolkit.package, "PORTDB"):
gentoolkit.package.PORTDB = portdb
gentoolkit.package.PORTDB = PORTDB
if hasattr(gentoolkit.query, "PORTDB"):
gentoolkit.query.PORTDB = portdb
gentoolkit.query.PORTDB = PORTDB
def scan_upstream(query, on_progress=None):
@ -134,7 +134,7 @@ def scan_upstream(query, on_progress=None):
if not CONFIG["quiet"]:
if not CONFIG["format"]:
pp.uprint(f" * {pp.cpv(pkg.cpv)} [{pp.section(pkg.repo_name())}]")
pp.uprint(" * %s [%s]" % (pp.cpv(pkg.cpv), pp.section(pkg.repo_name())))
pp.uprint()
else:
output.metadata("overlay", pp.section(pkg.repo_name()))
@ -153,9 +153,6 @@ def scan_upstream(query, on_progress=None):
else:
uris = pkg.environment("SRC_URI")
# Roundabout way to handle $'' strings
uris = uris.encode("raw_unicode_escape").decode("unicode_escape")
cpv = pkg.cpv
uris = parse_src_uri(uris)

View File

@ -22,7 +22,7 @@ def get_version_type(version):
if "9999" in version or "99999999" in version:
return "live"
for token in re.findall(r"[\._-]([a-zA-Z]+)", version):
for token in re.findall("[\._-]([a-zA-Z]+)", version):
if token in gentoo_types:
types.append(token)
if types: