mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[Motherless] Pull from yt-dlp, etc
* use username field * loosen regexes * warn on page count 0 in group * avoid reloading group page 1 Closes #29626
This commit is contained in:
		| @@ -126,9 +126,10 @@ class MotherlessIE(InfoExtractor): | ||||
|                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} | ||||
|                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') | ||||
|  | ||||
|         comment_count = webpage.count('class="media-comment-contents"') | ||||
|         comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage)) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''', | ||||
|             (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''', | ||||
|              r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''), | ||||
|             webpage, 'uploader_id') | ||||
|  | ||||
|         categories = self._html_search_meta('keywords', webpage, default=None) | ||||
| @@ -171,6 +172,17 @@ class MotherlessGroupIE(InfoExtractor): | ||||
|                            'any kind!' | ||||
|         }, | ||||
|         'playlist_mincount': 0, | ||||
|         'expected_warnings': [ | ||||
|             'This group has no videos.', | ||||
|         ] | ||||
|     }, { | ||||
|         'url': 'https://motherless.com/g/beautiful_cock', | ||||
|         'info_dict': { | ||||
|             'id': 'beautiful_cock', | ||||
|             'title': 'Beautiful Cock', | ||||
|             'description': 'Group for lovely cocks yours, mine, a friends anything human', | ||||
|         }, | ||||
|         'playlist_mincount': 2500, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -211,10 +223,17 @@ class MotherlessGroupIE(InfoExtractor): | ||||
|             'description', webpage, fatal=False) | ||||
|         page_count = str_to_int(self._search_regex( | ||||
|             r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b', | ||||
|             webpage, 'page_count', default='1')) | ||||
|             webpage, 'page_count', default=0)) | ||||
|         if not page_count: | ||||
|             message = self._search_regex( | ||||
|                 r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''', | ||||
|                 webpage, 'error_msg', default=None) or 'This group has no videos.' | ||||
|             self.report_warning(message, group_id) | ||||
|             page_count = 1 | ||||
|         PAGE_SIZE = 80 | ||||
|  | ||||
|         def _get_page(idx): | ||||
|             if idx > 0: | ||||
|                 webpage = self._download_webpage( | ||||
|                     page_url, group_id, query={'page': idx + 1}, | ||||
|                     note='Downloading page %d/%d' % (idx + 1, page_count) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user