Remove all protocols and tlds on URL-cleaning
Also make removal of protocol, tld and 'www' more robust against fails-positives by considering the position in the url.
This commit is contained in:
parent
4d4e88bfa2
commit
50c95f1bef
@ -1858,20 +1858,17 @@ endfunction
|
|||||||
|
|
||||||
|
|
||||||
function! s:clean_url(url)
|
function! s:clean_url(url)
|
||||||
let url = split(a:url, '/\|=\|-\|&\|?\|\.')
|
" remove protocol and tld
|
||||||
|
let url = substitute(a:url, '^\a\+://', '', '')
|
||||||
|
let url = substitute(url, '^\([^/]\+\).\a\{2,4}/', '\1/', '')
|
||||||
|
let url = split(url, '/\|=\|-\|&\|?\|\.')
|
||||||
let url = filter(url, 'v:val !=# ""')
|
let url = filter(url, 'v:val !=# ""')
|
||||||
let url = filter(url, 'v:val !=# "www"')
|
if url[0] == "www"
|
||||||
let url = filter(url, 'v:val !=# "com"')
|
let url = url[1:]
|
||||||
let url = filter(url, 'v:val !=# "org"')
|
endif
|
||||||
let url = filter(url, 'v:val !=# "net"')
|
if url[-1] =~ '^\(htm\|html\|php\)$'
|
||||||
let url = filter(url, 'v:val !=# "edu"')
|
let url = url[0:-2]
|
||||||
let url = filter(url, 'v:val !=# "http\:"')
|
endif
|
||||||
let url = filter(url, 'v:val !=# "https\:"')
|
|
||||||
let url = filter(url, 'v:val !=# "file\:"')
|
|
||||||
let url = filter(url, 'v:val !=# "xml\:"')
|
|
||||||
let url = filter(url, 'v:val !=# "html"')
|
|
||||||
let url = filter(url, 'v:val !=# "htm"')
|
|
||||||
let url = filter(url, 'v:val !=# "php"')
|
|
||||||
" remove words consisting of only hexadecimal digits or non-word characters
|
" remove words consisting of only hexadecimal digits or non-word characters
|
||||||
let url = filter(url, 'v:val !~ "^\\A\\{4,}$"')
|
let url = filter(url, 'v:val !~ "^\\A\\{4,}$"')
|
||||||
let url = filter(url, 'v:val !~ "^\\x\\{4,}$" || v:val !~ "\\d"')
|
let url = filter(url, 'v:val !~ "^\\x\\{4,}$" || v:val !~ "\\d"')
|
||||||
|
Loading…
Reference in New Issue
Block a user