mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[facebook] add support for group posts with multiple videos(closes #19131)
This commit is contained in:
		| @@ -72,6 +72,7 @@ class FacebookIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|         'skip': 'Requires logging in', |         'skip': 'Requires logging in', | ||||||
|     }, { |     }, { | ||||||
|  |         # data.video | ||||||
|         'url': 'https://www.facebook.com/video.php?v=274175099429670', |         'url': 'https://www.facebook.com/video.php?v=274175099429670', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '274175099429670', |             'id': '274175099429670', | ||||||
| @@ -133,6 +134,7 @@ class FacebookIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         # have 1080P, but only up to 720p in swf params |         # have 1080P, but only up to 720p in swf params | ||||||
|  |         # data.video.story.attachments[].media | ||||||
|         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', |         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', | ||||||
|         'md5': '9571fae53d4165bbbadb17a94651dcdc', |         'md5': '9571fae53d4165bbbadb17a94651dcdc', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -147,6 +149,7 @@ class FacebookIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall |         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall | ||||||
|  |         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||||
|         'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', |         'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1417995061575415', |             'id': '1417995061575415', | ||||||
| @@ -174,6 +177,7 @@ class FacebookIE(InfoExtractor): | |||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||||
|         'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', |         'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '1396382447100162', |             'id': '1396382447100162', | ||||||
| @@ -193,18 +197,23 @@ class FacebookIE(InfoExtractor): | |||||||
|         'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', |         'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.mediaset.currMedia.edges | ||||||
|         'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', |         'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.video.story.attachments[].media | ||||||
|         'url': 'facebook:544765982287235', |         'url': 'facebook:544765982287235', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||||
|         'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', |         'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.video.creation_story.attachments[].media | ||||||
|         'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', |         'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.video | ||||||
|         'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', |         'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
| @@ -212,6 +221,7 @@ class FacebookIE(InfoExtractor): | |||||||
|         'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', |         'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|  |         # data.video | ||||||
|         'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/', |         'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '359649331226507', |             'id': '359649331226507', | ||||||
| @@ -222,6 +232,13 @@ class FacebookIE(InfoExtractor): | |||||||
|         'params': { |         'params': { | ||||||
|             'skip_download': True, |             'skip_download': True, | ||||||
|         }, |         }, | ||||||
|  |     }, { | ||||||
|  |         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media | ||||||
|  |         'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '106560053808006', | ||||||
|  |         }, | ||||||
|  |         'playlist_count': 2, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @@ -330,9 +347,7 @@ class FacebookIE(InfoExtractor): | |||||||
|                 return extract_video_data(try_get( |                 return extract_video_data(try_get( | ||||||
|                     js_data, lambda x: x['jsmods']['instances'], list) or []) |                     js_data, lambda x: x['jsmods']['instances'], list) or []) | ||||||
|  |  | ||||||
|         formats = [] |         def extract_dash_manifest(video, formats): | ||||||
|  |  | ||||||
|         def extract_dash_manifest(video): |  | ||||||
|             dash_manifest = video.get('dash_manifest') |             dash_manifest = video.get('dash_manifest') | ||||||
|             if dash_manifest: |             if dash_manifest: | ||||||
|                 formats.extend(self._parse_mpd_formats( |                 formats.extend(self._parse_mpd_formats( | ||||||
| @@ -351,7 +366,10 @@ class FacebookIE(InfoExtractor): | |||||||
|                 webpage, 'graphql data', default='{}'), video_id, fatal=False) or {} |                 webpage, 'graphql data', default='{}'), video_id, fatal=False) or {} | ||||||
|             for require in (graphql_data.get('require') or []): |             for require in (graphql_data.get('require') or []): | ||||||
|                 if require[0] == 'RelayPrefetchedStreamCache': |                 if require[0] == 'RelayPrefetchedStreamCache': | ||||||
|  |                     entries = [] | ||||||
|  |  | ||||||
|                     def parse_graphql_video(video): |                     def parse_graphql_video(video): | ||||||
|  |                         formats = [] | ||||||
|                         q = qualities(['sd', 'hd']) |                         q = qualities(['sd', 'hd']) | ||||||
|                         for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: |                         for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: | ||||||
|                             playable_url = video.get('playable_url' + suffix) |                             playable_url = video.get('playable_url' + suffix) | ||||||
| @@ -362,7 +380,7 @@ class FacebookIE(InfoExtractor): | |||||||
|                                 'quality': q(format_id), |                                 'quality': q(format_id), | ||||||
|                                 'url': playable_url, |                                 'url': playable_url, | ||||||
|                             }) |                             }) | ||||||
|                         extract_dash_manifest(video) |                         extract_dash_manifest(video, formats) | ||||||
|                         self._sort_formats(formats) |                         self._sort_formats(formats) | ||||||
|                         v_id = video.get('videoId') or video.get('id') or video_id |                         v_id = video.get('videoId') or video.get('id') or video_id | ||||||
|                         info = { |                         info = { | ||||||
| @@ -382,7 +400,12 @@ class FacebookIE(InfoExtractor): | |||||||
|                             }) |                             }) | ||||||
|                         else: |                         else: | ||||||
|                             info['title'] = description or 'Facebook video #%s' % v_id |                             info['title'] = description or 'Facebook video #%s' % v_id | ||||||
|                         return webpage, info |                         entries.append(info) | ||||||
|  |  | ||||||
|  |                     def parse_attachment(attachment, key='media'): | ||||||
|  |                         media = attachment.get(key) or {} | ||||||
|  |                         if media.get('__typename') == 'Video': | ||||||
|  |                             return parse_graphql_video(media) | ||||||
|  |  | ||||||
|                     data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} |                     data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} | ||||||
|  |  | ||||||
| @@ -392,13 +415,22 @@ class FacebookIE(InfoExtractor): | |||||||
|                         lambda x: x['node']['comet_sections']['content']['story']['attachments'] |                         lambda x: x['node']['comet_sections']['content']['story']['attachments'] | ||||||
|                     ], list) or [] |                     ], list) or [] | ||||||
|                     for attachment in attachments: |                     for attachment in attachments: | ||||||
|                         media = attachment.get('media') or try_get(attachment, lambda x: x['style_type_renderer']['attachment']['media'], dict) or {} |                         attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) or attachment | ||||||
|                         if media.get('__typename') == 'Video': |                         nodes = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or [] | ||||||
|                             return parse_graphql_video(media) |                         for node in nodes: | ||||||
|  |                             parse_attachment(node) | ||||||
|  |                         parse_attachment(attachment) | ||||||
|  |  | ||||||
|                     video = data.get('video') or {} |                     edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] | ||||||
|                     if video: |                     for edge in edges: | ||||||
|                         return parse_graphql_video(video) |                         parse_attachment(edge, key='node') | ||||||
|  |  | ||||||
|  |                     if not entries: | ||||||
|  |                         video = data.get('video') or {} | ||||||
|  |                         if video: | ||||||
|  |                             parse_graphql_video(video) | ||||||
|  |  | ||||||
|  |                     return webpage, self.playlist_result(entries, video_id) | ||||||
|  |  | ||||||
|         if not video_data: |         if not video_data: | ||||||
|             if not fatal_if_no_video: |             if not fatal_if_no_video: | ||||||
| @@ -440,6 +472,7 @@ class FacebookIE(InfoExtractor): | |||||||
|         if not video_data: |         if not video_data: | ||||||
|             raise ExtractorError('Cannot parse data') |             raise ExtractorError('Cannot parse data') | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|         subtitles = {} |         subtitles = {} | ||||||
|         for f in video_data: |         for f in video_data: | ||||||
|             format_id = f['stream_type'] |             format_id = f['stream_type'] | ||||||
| @@ -459,7 +492,7 @@ class FacebookIE(InfoExtractor): | |||||||
|                             'url': src, |                             'url': src, | ||||||
|                             'preference': preference, |                             'preference': preference, | ||||||
|                         }) |                         }) | ||||||
|             extract_dash_manifest(f[0]) |             extract_dash_manifest(f[0], formats) | ||||||
|             subtitles_src = f[0].get('subtitles_src') |             subtitles_src = f[0].get('subtitles_src') | ||||||
|             if subtitles_src: |             if subtitles_src: | ||||||
|                 subtitles.setdefault('en', []).append({'url': subtitles_src}) |                 subtitles.setdefault('en', []).append({'url': subtitles_src}) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user