mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[scte] Add extractor (closes #22975)
This commit is contained in:
		| @@ -980,6 +980,10 @@ from .sbs import SBSIE | ||||
| from .screencast import ScreencastIE | ||||
| from .screencastomatic import ScreencastOMaticIE | ||||
| from .scrippsnetworks import ScrippsNetworksWatchIE | ||||
| from .scte import ( | ||||
|     SCTEIE, | ||||
|     SCTECourseIE, | ||||
| ) | ||||
| from .seeker import SeekerIE | ||||
| from .senateisvp import SenateISVPIE | ||||
| from .sendtonews import SendtoNewsIE | ||||
|   | ||||
							
								
								
									
										144
									
								
								youtube_dl/extractor/scte.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								youtube_dl/extractor/scte.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     decode_packed_codes, | ||||
|     ExtractorError, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SCTEBaseIE(InfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' | ||||
|     _NETRC_MACHINE = 'scte' | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_popup = self._download_webpage( | ||||
|             self._LOGIN_URL, None, 'Downloading login popup') | ||||
|  | ||||
|         def is_logged(webpage): | ||||
|             return any(re.search(p, webpage) for p in ( | ||||
|                 r'class=["\']welcome\b', r'>Sign Out<')) | ||||
|  | ||||
|         # already logged in | ||||
|         if is_logged(login_popup): | ||||
|             return | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_popup) | ||||
|  | ||||
|         login_form.update({ | ||||
|             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, | ||||
|             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, | ||||
|             'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', | ||||
|         }) | ||||
|  | ||||
|         response = self._download_webpage( | ||||
|             self._LOGIN_URL, None, 'Logging in', | ||||
|             data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         if '|pageRedirect|' not in response and not is_logged(response): | ||||
|             error = self._html_search_regex( | ||||
|                 r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</', | ||||
|                 response, 'error message', default=None) | ||||
|             if error: | ||||
|                 raise ExtractorError('Unable to login: %s' % error, expected=True) | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|  | ||||
| class SCTEIE(SCTEBaseIE): | ||||
|     _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', | ||||
|         'info_dict': { | ||||
|             'title': 'Introduction to DOCSIS Engineering Professional', | ||||
|             'id': '31484', | ||||
|         }, | ||||
|         'playlist_count': 5, | ||||
|         'skip': 'Requires account credentials', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title') | ||||
|  | ||||
|         context_id = self._search_regex(r'context-(\d+)', webpage, video_id) | ||||
|         content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id | ||||
|         context = decode_packed_codes(self._download_webpage( | ||||
|             '%smobile/data.js' % content_base, video_id)) | ||||
|  | ||||
|         data = self._parse_xml( | ||||
|             self._search_regex( | ||||
|                 r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), | ||||
|             video_id) | ||||
|  | ||||
|         entries = [] | ||||
|         for asset in data.findall('.//asset'): | ||||
|             asset_url = asset.get('url') | ||||
|             if not asset_url or not asset_url.endswith('.mp4'): | ||||
|                 continue | ||||
|             asset_id = self._search_regex( | ||||
|                 r'video_([^_]+)_', asset_url, 'asset id', default=None) | ||||
|             if not asset_id: | ||||
|                 continue | ||||
|             entries.append({ | ||||
|                 'id': asset_id, | ||||
|                 'title': title, | ||||
|                 'url': content_base + asset_url, | ||||
|             }) | ||||
|  | ||||
|         return self.playlist_result(entries, video_id, title) | ||||
|  | ||||
|  | ||||
| class SCTECourseIE(SCTEBaseIE): | ||||
|     _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://learning.scte.org/course/view.php?id=3639', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://learning.scte.org/course/view.php?id=3073', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         course_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, course_id) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'<h1>(.+?)</h1>', webpage, 'title', default=None) | ||||
|  | ||||
|         entries = [] | ||||
|         for mobj in re.finditer( | ||||
|                 r'''(?x) | ||||
|                     <a[^>]+ | ||||
|                         href=(["\']) | ||||
|                         (?P<url> | ||||
|                             https?://learning\.scte\.org/mod/ | ||||
|                             (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*? | ||||
|                             \bid=\d+ | ||||
|                         ) | ||||
|                     ''', | ||||
|                 webpage): | ||||
|             item_url = mobj.group('url') | ||||
|             if item_url == url: | ||||
|                 continue | ||||
|             ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' | ||||
|                   else SCTECourseIE.ie_key()) | ||||
|             entries.append(self.url_result(item_url, ie=ie)) | ||||
|  | ||||
|         return self.playlist_result(entries, course_id, title) | ||||
		Reference in New Issue
	
	Block a user