[AlphaPorno] ERROR: Unable to extract video url #24843

Open
opened 2026-02-21 12:13:53 -05:00 by deekerman · 1 comment
Owner

Originally created by @romfetchr on GitHub (Jan 3, 2022).

$ youtube-dl --verbose https://www.alphaporno.com/videos/amazing-inches-hammering-her-pussy-in-such-addictive-ways/
[debug] System config: []
[debug] User config: []
[debug] Custom config: []
[debug] Command-line args: ['--verbose', 'https://www.alphaporno.com/videos/amazing-inches-hammering-her-pussy-in-such-addictive-ways/']
[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8
[debug] youtube-dl version 2021.12.17
[debug] Python version 3.8.10 (CPython) - Linux-5.4.0-91-generic-x86_64-with-glibc2.29
[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4
[debug] Proxy map: {}
[AlphaPorno] amazing-inches-hammering-her-pussy-in-such-addictive-ways: Downloading webpage
ERROR: Unable to extract video url; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; type youtube-dl -U to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.
Traceback (most recent call last):
File "/usr/local/bin/youtube-dl/youtube_dl/YoutubeDL.py", line 815, in wrapper
return func(self, *args, **kwargs)
File "/usr/local/bin/youtube-dl/youtube_dl/YoutubeDL.py", line 836, in __extract_info
ie_result = ie.extract(url)
File "/usr/local/bin/youtube-dl/youtube_dl/extractor/common.py", line 534, in extract
ie_result = self._real_extract(url)
File "/usr/local/bin/youtube-dl/youtube_dl/extractor/alphaporno.py", line 41, in _real_extract
video_url = self._search_regex(
File "/usr/local/bin/youtube-dl/youtube_dl/extractor/common.py", line 1012, in _search_regex
raise RegexNotFoundError('Unable to extract %s' % _name)
youtube_dl.utils.RegexNotFoundError: Unable to extract video url; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; type youtube-dl -U to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.

Originally created by @romfetchr on GitHub (Jan 3, 2022). $ youtube-dl --verbose https://www.alphaporno.com/videos/amazing-inches-hammering-her-pussy-in-such-addictive-ways/ [debug] System config: [] [debug] User config: [] [debug] Custom config: [] [debug] Command-line args: ['--verbose', 'https://www.alphaporno.com/videos/amazing-inches-hammering-her-pussy-in-such-addictive-ways/'] [debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 [debug] youtube-dl version 2021.12.17 [debug] Python version 3.8.10 (CPython) - Linux-5.4.0-91-generic-x86_64-with-glibc2.29 [debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 [debug] Proxy map: {} [AlphaPorno] amazing-inches-hammering-her-pussy-in-such-addictive-ways: Downloading webpage ERROR: Unable to extract video url; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; type youtube-dl -U to update. Be sure to call youtube-dl with the --verbose flag and include its complete output. Traceback (most recent call last): File "/usr/local/bin/youtube-dl/youtube_dl/YoutubeDL.py", line 815, in wrapper return func(self, *args, **kwargs) File "/usr/local/bin/youtube-dl/youtube_dl/YoutubeDL.py", line 836, in __extract_info ie_result = ie.extract(url) File "/usr/local/bin/youtube-dl/youtube_dl/extractor/common.py", line 534, in extract ie_result = self._real_extract(url) File "/usr/local/bin/youtube-dl/youtube_dl/extractor/alphaporno.py", line 41, in _real_extract video_url = self._search_regex( File "/usr/local/bin/youtube-dl/youtube_dl/extractor/common.py", line 1012, in _search_regex raise RegexNotFoundError('Unable to extract %s' % _name) youtube_dl.utils.RegexNotFoundError: Unable to extract video url; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; type youtube-dl -U to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.
Author
Owner

@dirkf commented on GitHub (Jan 3, 2022):

Fortunately no JS is needed to analyse the new page structure.

This patch restores functionality, although some fields (tbr, filesize_approx) may no longer be extracted:

--- old/youtube-dl/youtube_dl/extractor/alphaporno.py
+++ new/youtube-dl/youtube_dl/extractor/alphaporno.py
@@ -1,11 +1,15 @@
 from __future__ import unicode_literals
+
+import re
 
 from .common import InfoExtractor
 from ..utils import (
-    parse_iso8601,
+    int_or_none,
     parse_duration,
     parse_filesize,
-    int_or_none,
+    parse_resolution,
+    unified_timestamp,
+    urljoin,
 )
 
 
@@ -13,18 +17,17 @@
     _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
-        'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+        'md5': '7e6a1cdd48fa67362a5a11d7039164e7',
         'info_dict': {
             'id': '258807',
             'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
             'ext': 'mp4',
             'title': 'Sensual striptease porn with Samantha Alexandra',
+            'description': 'md5:3c6d31008980654acaeb11451454a62c',
             'thumbnail': r're:https?://.*\.jpg$',
-            'timestamp': 1418694611,
+            'timestamp': 1418701811,
             'upload_date': '20141216',
             'duration': 387,
-            'filesize_approx': 54120000,
-            'tbr': 1145,
             'categories': list,
             'age_limit': 18,
         }
@@ -33,40 +36,73 @@
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, display_id)
+        webpage, urlh = self._download_webpage_handle(url, display_id)
 
+        info = {
+            'display_id': display_id,
+        }
         video_id = self._search_regex(
             r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
+        if video_id:
+            info['url'] = self._search_regex(
+                r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+            info['ext'] = self._html_search_meta(
+                'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+        else:
+            video_id = self._search_regex(
+                r"video_id=(\d+)\b", webpage, 'video id')
+            formats = []
+            for video_url, res in re.findall(
+                    (r'''(%s)(?:'|"|\b)\s''' % 
+                     urljoin(urlh.geturl(), r'/get_file/\d.+?/%(video_id)s/%(video_id)s_(\w+)\..+?' % locals())),
+                    webpage):
+                fmt = {
+                    'format_id': 'f%s' %res,
+                    'url': video_url,
+                }
+                fmt.update(parse_resolution(res) or {})
+                formats.append(fmt)
+            self._sort_formats(formats)
+            info['formats'] = formats
+    
 
-        video_url = self._search_regex(
-            r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
-        ext = self._html_search_meta(
-            'encodingFormat', webpage, 'ext', default='.mp4')[1:]
-
-        title = self._search_regex(
-            [r'<meta content="([^"]+)" itemprop="description">',
-             r'class="title" itemprop="name">([^<]+)<'],
-            webpage, 'title')
-        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
-        timestamp = parse_iso8601(self._html_search_meta(
-            'uploadDate', webpage, 'upload date'))
-        duration = parse_duration(self._html_search_meta(
-            'duration', webpage, 'duration'))
+        title = (
+            self._html_search_regex(r'<title[^>]*>([^<]+?)(?:\s*-\s*Alpha\s*Porno\s*)?<', webpage, 'title', default=None)
+            or self._og_search_title(webpage, default=None)
+            or self._search_regex(
+                (r'<meta content="([^"]+)" itemprop="description">',
+                 r'class="title" itemprop="name">([^<]+)<'),
+                webpage, 'title')
+        )
+        description = (
+            self._og_search_description(webpage) or
+            self._search_regex(
+                r'<meta content="([^"]+)" itemprop="description">',
+                webpage, 'description')
+        )
+        thumbnail = (
+            self._og_search_thumbnail(webpage)
+            or self._html_search_meta('thumbnail', webpage, 'thumbnail')
+        )
+        timestamp = unified_timestamp(self._og_search_property('video:release_date', webpage)
+            or self._html_search_meta('uploadDate', webpage, 'upload date'))
+        duration = parse_duration(self._og_search_property('video:duration', webpage)
+            or self._html_search_meta('duration', webpage, 'duration'))
         filesize_approx = parse_filesize(self._html_search_meta(
-            'contentSize', webpage, 'file size'))
+            'contentSize', webpage, 'file size', default=None))
         bitrate = int_or_none(self._html_search_meta(
-            'bitrate', webpage, 'bitrate'))
-        categories = self._html_search_meta(
-            'keywords', webpage, 'categories', default='').split(',')
+            'bitrate', webpage, 'bitrate', default=None))
+        categories = re.split(
+            r'\s*,\s*', 
+            self._html_search_meta(
+                'keywords', webpage, 'categories', default=''))
 
         age_limit = self._rta_search(webpage)
 
-        return {
+        info.update({
             'id': video_id,
-            'display_id': display_id,
-            'url': video_url,
-            'ext': ext,
             'title': title,
+            'description': description,
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'duration': duration,
@@ -74,4 +110,5 @@
             'tbr': bitrate,
             'categories': categories,
             'age_limit': age_limit,
-        }
+        })
+        return info
@dirkf commented on GitHub (Jan 3, 2022): Fortunately no JS is needed to analyse the new page structure. This patch restores functionality, although some fields (`tbr`, `filesize_approx`) may no longer be extracted: ``` --- old/youtube-dl/youtube_dl/extractor/alphaporno.py +++ new/youtube-dl/youtube_dl/extractor/alphaporno.py @@ -1,11 +1,15 @@ from __future__ import unicode_literals + +import re from .common import InfoExtractor from ..utils import ( - parse_iso8601, + int_or_none, parse_duration, parse_filesize, - int_or_none, + parse_resolution, + unified_timestamp, + urljoin, ) @@ -13,18 +17,17 @@ _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)' _TEST = { 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/', - 'md5': 'feb6d3bba8848cd54467a87ad34bd38e', + 'md5': '7e6a1cdd48fa67362a5a11d7039164e7', 'info_dict': { 'id': '258807', 'display_id': 'sensual-striptease-porn-with-samantha-alexandra', 'ext': 'mp4', 'title': 'Sensual striptease porn with Samantha Alexandra', + 'description': 'md5:3c6d31008980654acaeb11451454a62c', 'thumbnail': r're:https?://.*\.jpg$', - 'timestamp': 1418694611, + 'timestamp': 1418701811, 'upload_date': '20141216', 'duration': 387, - 'filesize_approx': 54120000, - 'tbr': 1145, 'categories': list, 'age_limit': 18, } @@ -33,40 +36,73 @@ def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + webpage, urlh = self._download_webpage_handle(url, display_id) + info = { + 'display_id': display_id, + } video_id = self._search_regex( r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None) + if video_id: + info['url'] = self._search_regex( + r"video_url\s*:\s*'([^']+)'", webpage, 'video url') + info['ext'] = self._html_search_meta( + 'encodingFormat', webpage, 'ext', default='.mp4')[1:] + else: + video_id = self._search_regex( + r"video_id=(\d+)\b", webpage, 'video id') + formats = [] + for video_url, res in re.findall( + (r'''(%s)(?:'|"|\b)\s''' % + urljoin(urlh.geturl(), r'/get_file/\d.+?/%(video_id)s/%(video_id)s_(\w+)\..+?' % locals())), + webpage): + fmt = { + 'format_id': 'f%s' %res, + 'url': video_url, + } + fmt.update(parse_resolution(res) or {}) + formats.append(fmt) + self._sort_formats(formats) + info['formats'] = formats + - video_url = self._search_regex( - r"video_url\s*:\s*'([^']+)'", webpage, 'video url') - ext = self._html_search_meta( - 'encodingFormat', webpage, 'ext', default='.mp4')[1:] - - title = self._search_regex( - [r'<meta content="([^"]+)" itemprop="description">', - r'class="title" itemprop="name">([^<]+)<'], - webpage, 'title') - thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail') - timestamp = parse_iso8601(self._html_search_meta( - 'uploadDate', webpage, 'upload date')) - duration = parse_duration(self._html_search_meta( - 'duration', webpage, 'duration')) + title = ( + self._html_search_regex(r'<title[^>]*>([^<]+?)(?:\s*-\s*Alpha\s*Porno\s*)?<', webpage, 'title', default=None) + or self._og_search_title(webpage, default=None) + or self._search_regex( + (r'<meta content="([^"]+)" itemprop="description">', + r'class="title" itemprop="name">([^<]+)<'), + webpage, 'title') + ) + description = ( + self._og_search_description(webpage) or + self._search_regex( + r'<meta content="([^"]+)" itemprop="description">', + webpage, 'description') + ) + thumbnail = ( + self._og_search_thumbnail(webpage) + or self._html_search_meta('thumbnail', webpage, 'thumbnail') + ) + timestamp = unified_timestamp(self._og_search_property('video:release_date', webpage) + or self._html_search_meta('uploadDate', webpage, 'upload date')) + duration = parse_duration(self._og_search_property('video:duration', webpage) + or self._html_search_meta('duration', webpage, 'duration')) filesize_approx = parse_filesize(self._html_search_meta( - 'contentSize', webpage, 'file size')) + 'contentSize', webpage, 'file size', default=None)) bitrate = int_or_none(self._html_search_meta( - 'bitrate', webpage, 'bitrate')) - categories = self._html_search_meta( - 'keywords', webpage, 'categories', default='').split(',') + 'bitrate', webpage, 'bitrate', default=None)) + categories = re.split( + r'\s*,\s*', + self._html_search_meta( + 'keywords', webpage, 'categories', default='')) age_limit = self._rta_search(webpage) - return { + info.update({ 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'ext': ext, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, @@ -74,4 +110,5 @@ 'tbr': bitrate, 'categories': categories, 'age_limit': age_limit, - } + }) + return info ```
Sign in to join this conversation.
No milestone
No project
No assignees
1 participant
Notifications
Due date
The due date is invalid or out of range. Please use the format "yyyy-mm-dd".

No due date set.

Dependencies

No dependencies set.

Reference
starred/youtube-dl#24843
No description provided.