mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			521 Commits
		
	
	
		
			2016.07.26
			...
			2016.09.15
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | f5e008d134 | ||
|  | e6bf3621e7 | ||
|  | 490b755769 | ||
|  | 1dec2c8a0e | ||
|  | dcce092e0a | ||
|  | 32443dd346 | ||
|  | 2133565cec | ||
|  | 1da50aa34e | ||
|  | d2522b86ac | ||
|  | 537f753399 | ||
|  | c849836854 | ||
|  | eb5b1fc021 | ||
|  | 95be29e1c6 | ||
|  | c035dba19e | ||
|  | 87148bb711 | ||
|  | 797c636bcb | ||
|  | 0002962f3f | ||
|  | 3e4185c396 | ||
|  | f6717dec8a | ||
|  | a942d6cb48 | ||
|  | 961516bfd1 | ||
|  | 6db354a9f4 | ||
|  | 353f340e11 | ||
|  | 014b7e6b25 | ||
|  | 925194022c | ||
|  | b690ea15eb | ||
|  | 5712c0f426 | ||
|  | 86d68f906e | ||
|  | 4875ff6847 | ||
|  | 1b6712ab23 | ||
|  | 8414c2da31 | ||
|  | 45396dd2ed | ||
|  | 7a7309219c | ||
|  | fcba157e80 | ||
|  | a6ccc3e518 | ||
|  | 1d16035bb4 | ||
|  | e8bcd982cc | ||
|  | a5ff05df1a | ||
|  | d002e91986 | ||
|  | 546edb2efa | ||
|  | be45730226 | ||
|  | ee7e672eb0 | ||
|  | 0307d6fba6 | ||
|  | fc150cba1d | ||
|  | d667ab7fad | ||
|  | eb87d4545a | ||
|  | 1c81476cbb | ||
|  | bc9186c882 | ||
|  | 6599c72527 | ||
|  | 6bb05b32a9 | ||
|  | fea74acad8 | ||
|  | f01115c933 | ||
|  | 2cdbc06a1f | ||
|  | 2cb93afcd8 | ||
|  | bfcda07a27 | ||
|  | 001a5fd3d7 | ||
|  | 1e35999c1e | ||
|  | 2512b17493 | ||
|  | 56c0ead4d3 | ||
|  | 7324243750 | ||
|  | 84a18e9b90 | ||
|  | b29f842e0e | ||
|  | f009fcac0d | ||
|  | 6c3affcb18 | ||
|  | 1e19ff2984 | ||
|  | c6129feb7f | ||
|  | bb5ebd4453 | ||
|  | cb9cbd84ed | ||
|  | 4d5726b0d7 | ||
|  | 4614ad7b59 | ||
|  | b717837190 | ||
|  | 2abad67e52 | ||
|  | ad0e2b3359 | ||
|  | 37720844f6 | ||
|  | 6cfcb8ac36 | ||
|  | 7a979da8cb | ||
|  | 2fdc7b0e04 | ||
|  | 010d034fca | ||
|  | 02e552886f | ||
|  | 25042f7372 | ||
|  | 3f612f0767 | ||
|  | 17bf6e71cc | ||
|  | 881f35479d | ||
|  | 89f257d6e5 | ||
|  | e78a5428b6 | ||
|  | 6656a82481 | ||
|  | d7e794928d | ||
|  | 9c27188988 | ||
|  | b84d311d53 | ||
|  | f87feb4b68 | ||
|  | 2841bdcebb | ||
|  | 84b91dd4e3 | ||
|  | 92c9c2a88b | ||
|  | 9d54b02bae | ||
|  | 846d8b76a0 | ||
|  | aa3f9fe695 | ||
|  | 8258f4457c | ||
|  | 948cd5b72d | ||
|  | 8d3737cda7 | ||
|  | 155bc674c4 | ||
|  | c33c962adf | ||
|  | bdcc046d12 | ||
|  | a493f10208 | ||
|  | f3eeaacb4e | ||
|  | b4d6a85d60 | ||
|  | 0b36a96212 | ||
|  | bc22a79694 | ||
|  | 340e31ca74 | ||
|  | 973dee491f | ||
|  | 1f85029d82 | ||
|  | 95be19d436 | ||
|  | 95843da529 | ||
|  | abf2c79f95 | ||
|  | b49ad71ce1 | ||
|  | 9127e1533d | ||
|  | 78e762d23c | ||
|  | 4809490108 | ||
|  | 8112bfeaba | ||
|  | d9606d9b6c | ||
|  | 433af6ad30 | ||
|  | feaa5ad787 | ||
|  | 100bd86a68 | ||
|  | 0def758782 | ||
|  | 919cf1a62f | ||
|  | b29cd56591 | ||
|  | 622638512b | ||
|  | 37c7490ac6 | ||
|  | 091624f9da | ||
|  | 7e5dc339de | ||
|  | 4a69fa04e0 | ||
|  | 2e99cd30c3 | ||
|  | 25afc2a783 | ||
|  | 9603b66012 | ||
|  | 45aab4d30b | ||
|  | ed2bfe93aa | ||
|  | cdc783510b | ||
|  | cf0efe9636 | ||
|  | dedb177029 | ||
|  | 86c3bbbced | ||
|  | 4b3a607658 | ||
|  | 3a7d35b982 | ||
|  | 6496ccb413 | ||
|  | 3fcce30289 | ||
|  | c2b2c7e138 | ||
|  | dacb3a864a | ||
|  | 6066d03db0 | ||
|  | 6562d34a8c | ||
|  | 5e9e3d0f6b | ||
|  | 349fc5c705 | ||
|  | 2c3e0af93e | ||
|  | 6150502e47 | ||
|  | b207d5ebd4 | ||
|  | 4191779dcd | ||
|  | f97ec8bcb9 | ||
|  | 8276d3b87a | ||
|  | af95ee94b4 | ||
|  | 8fb6af6bba | ||
|  | f6af0f888b | ||
|  | e816c9d158 | ||
|  | 9250181f37 | ||
|  | f096ec2625 | ||
|  | 4c8ab6fd71 | ||
|  | 05d4612947 | ||
|  | 746a695b36 | ||
|  | 165c54e97d | ||
|  | 2896dd73bc | ||
|  | f8fd510eb4 | ||
|  | 7a3e849f6e | ||
|  | 196c6ba067 | ||
|  | 165620e320 | ||
|  | 4fd350611c | ||
|  | 263fef43de | ||
|  | a249ab83cb | ||
|  | f7043ef39c | ||
|  | 64fc49aba0 | ||
|  | 245023a861 | ||
|  | 3c77a54d5d | ||
|  | da30a20a4d | ||
|  | 1fe48afea5 | ||
|  | 42e05be867 | ||
|  | fe45b0e060 | ||
|  | a06e1498aa | ||
|  | 5a80e7b43a | ||
|  | 3fb2a23029 | ||
|  | 7be15d4097 | ||
|  | cd10b3ea63 | ||
|  | 547993dcd0 | ||
|  | 6c9b71bc08 | ||
|  | 93b8404599 | ||
|  | 9ba1e1dcc0 | ||
|  | b8079a40bc | ||
|  | 5bc8a73af6 | ||
|  | b3eaeded12 | ||
|  | ec65b391cb | ||
|  | 2982514072 | ||
|  | 98908bcf7c | ||
|  | 04b32c8f96 | ||
|  | 40eec6b15c | ||
|  | 39efc6e3e0 | ||
|  | 1198fe14a1 | ||
|  | 71e90766b5 | ||
|  | d7aae610f6 | ||
|  | 92c27a0dbf | ||
|  | d181cff685 | ||
|  | 3b4b82d4ce | ||
|  | 545ef4f531 | ||
|  | 906b87cf5f | ||
|  | b281aad2dc | ||
|  | 6b18a24e6e | ||
|  | c9de980106 | ||
|  | f9b373afda | ||
|  | 298a120ab7 | ||
|  | e3faecde30 | ||
|  | a0f071a50d | ||
|  | 20bad91d76 | ||
|  | b54a2da433 | ||
|  | dc2c37f316 | ||
|  | c1f62dd338 | ||
|  | 5a3efcd27c | ||
|  | 4c8f9c2577 | ||
|  | f26a298247 | ||
|  | ea01cdbf61 | ||
|  | 6a76b53355 | ||
|  | d37708fc86 | ||
|  | 5c13c28566 | ||
|  | f70e9229e6 | ||
|  | 30afe4aeb2 | ||
|  | 75fa990dc6 | ||
|  | f39ffc5877 | ||
|  | 07ea9c9b05 | ||
|  | 073ac1225f | ||
|  | 0c6422cdd6 | ||
|  | 08773689f3 | ||
|  | 0c75abbb7b | ||
|  | 97653f81b2 | ||
|  | d38b27dd9b | ||
|  | 6d94cbd2f4 | ||
|  | 30317f4887 | ||
|  | 8c3e35dd44 | ||
|  | c86f51ee38 | ||
|  | 6e52bbb413 | ||
|  | 05bddcc512 | ||
|  | 1212e9972f | ||
|  | ccb6570e9e | ||
|  | 18b6216150 | ||
|  | fb009b7f53 | ||
|  | 3083e4dc07 | ||
|  | 7367bdef23 | ||
|  | ad31642584 | ||
|  | c7c43a93ba | ||
|  | 96229e5f95 | ||
|  | 55d119e2a1 | ||
|  | 6d2679ee26 | ||
|  | afbab5688e | ||
|  | 3d897cc791 | ||
|  | cf143c4d97 | ||
|  | ad120ae1c5 | ||
|  | d0fa172e5f | ||
|  | f97f9f71e5 | ||
|  | 526656726b | ||
|  | 9b8c554ea7 | ||
|  | d13bfc07b7 | ||
|  | efe470e261 | ||
|  | e3f6b56909 | ||
|  | b1e676fde8 | ||
|  | 92d4cfa358 | ||
|  | 3d47ee0a9e | ||
|  | d164a0d41b | ||
|  | db29af6d36 | ||
|  | 2c6acdfd2d | ||
|  | fddaa76a59 | ||
|  | a809446750 | ||
|  | d8f30a7e66 | ||
|  | 5b1d85754e | ||
|  | e25586e471 | ||
|  | 292a2301bf | ||
|  | dabe15701b | ||
|  | 4245f55880 | ||
|  | 5b9d187cc6 | ||
|  | 39e1c4f08c | ||
|  | 19f35402c5 | ||
|  | 70852b47ca | ||
|  | a9a3b4a081 | ||
|  | ecc90093f9 | ||
|  | 520251c093 | ||
|  | 55af45fcab | ||
|  | b82232036a | ||
|  | e4659b4547 | ||
|  | 9e5751b9fe | ||
|  | bd1bcd3ea0 | ||
|  | 93a63b36f1 | ||
|  | 8b2dc4c328 | ||
|  | 850837b67a | ||
|  | 13585d7682 | ||
|  | fd3ec986a4 | ||
|  | b0d578ff7b | ||
|  | b0c8f2e9c8 | ||
|  | 51815886a9 | ||
|  | 08a42f9c74 | ||
|  | e15ad9ef09 | ||
|  | 4e9fee1015 | ||
|  | 7273e5849b | ||
|  | b505e98784 | ||
|  | 92cd9fd565 | ||
|  | b3d7dce429 | ||
|  | a44694ab4e | ||
|  | ab19b46b88 | ||
|  | 8804f10e6b | ||
|  | 6be17c0870 | ||
|  | 8652770bd2 | ||
|  | 2a1321a272 | ||
|  | 9c0fa60bf3 | ||
|  | 502d87c546 | ||
|  | b35b0d73d8 | ||
|  | 6e7e4a6edf | ||
|  | 53fef319f1 | ||
|  | 2cabee2a7d | ||
|  | 11f502fac1 | ||
|  | 98affc1a48 | ||
|  | 70a2829fee | ||
|  | 837e56c8ee | ||
|  | b5ddee8c77 | ||
|  | fb64adcbd3 | ||
|  | 4f640f2890 | ||
|  | 254e64a20a | ||
|  | 818ac213eb | ||
|  | cbef4d5c9f | ||
|  | bf90c46790 | ||
|  | 69eb4d699f | ||
|  | 6d8ec8c3b7 | ||
|  | 760845ce99 | ||
|  | 5c2d087221 | ||
|  | b6c4e36728 | ||
|  | 1a57b8c18c | ||
|  | 24eb13b1c6 | ||
|  | 525e0316c0 | ||
|  | 7e60ce9cf7 | ||
|  | e811bcf8f8 | ||
|  | 6103f59095 | ||
|  | 9fa5789279 | ||
|  | d2ac04674d | ||
|  | 1fd6e30988 | ||
|  | 884cdb6cd9 | ||
|  | 9771b1f901 | ||
|  | 2118fdd1a9 | ||
|  | 320d597c21 | ||
|  | aaf44a2f47 | ||
|  | fafabc0712 | ||
|  | 409760a932 | ||
|  | 097eba019d | ||
|  | 73a85620ee | ||
|  | a560f28c98 | ||
|  | 5ec5461e1a | ||
|  | 542130a5d9 | ||
|  | 82997dad57 | ||
|  | 647a7bf5e8 | ||
|  | 77afa008dd | ||
|  | db535435b3 | ||
|  | c2a453b461 | ||
|  | cd29eaab95 | ||
|  | 52aa7e7476 | ||
|  | e97c55ee6a | ||
|  | acfccacad5 | ||
|  | 5f2c2b7936 | ||
|  | cb55908e51 | ||
|  | e581224843 | ||
|  | f50365e91c | ||
|  | c366f8d30a | ||
|  | 6a26c5f9d5 | ||
|  | bd6fb007de | ||
|  | b69b2ff736 | ||
|  | 794e5dcd7e | ||
|  | f0d3669437 | ||
|  | 98e698f1ff | ||
|  | 3cddb8d6a7 | ||
|  | 990d533ee4 | ||
|  | b0081562d2 | ||
|  | fff37cfd4f | ||
|  | a3be69b7f0 | ||
|  | 0fd1b1624c | ||
|  | 367976d49f | ||
|  | 0aef0771f8 | ||
|  | 0c070681c5 | ||
|  | 30b25d382d | ||
|  | e5f878c205 | ||
|  | e2e84aed7e | ||
|  | b1927f4e8a | ||
|  | 3b9323d96e | ||
|  | 7f832413d6 | ||
|  | 7f2ed47595 | ||
|  | c3fa77bdef | ||
|  | 57ce8a6d08 | ||
|  | 69d8eeeec5 | ||
|  | 81c13222c6 | ||
|  | b1ce2ba197 | ||
|  | 5c8411e968 | ||
|  | cc9c8ce5df | ||
|  | 20ef4123b9 | ||
|  | 4e62d26aa2 | ||
|  | b657816684 | ||
|  | 9778b3e7ee | ||
|  | 25dd58ca6a | ||
|  | 5e42f8a0ad | ||
|  | 1ad6b891b2 | ||
|  | 7aa589a5e1 | ||
|  | 065bc35489 | ||
|  | 3a380766d1 | ||
|  | affaea0688 | ||
|  | 77426a087b | ||
|  | 8991844ea2 | ||
|  | 082395d0a0 | ||
|  | e8ed7354e6 | ||
|  | 1e7f602e2a | ||
|  | 522f6c066d | ||
|  | 321b5e082a | ||
|  | 3711fa1eb2 | ||
|  | 395c74615c | ||
|  | 3dc240e8c6 | ||
|  | a41a6c5094 | ||
|  | d71207121d | ||
|  | b1c6f21c74 | ||
|  | 412abb8760 | ||
|  | f17d5f6d14 | ||
|  | 6bb801cfaf | ||
|  | de02d1f4e9 | ||
|  | e1f93a0a76 | ||
|  | d21a661bb4 | ||
|  | b2bd968f4b | ||
|  | 4a01befb34 | ||
|  | 845dfcdc40 | ||
|  | d92cb46305 | ||
|  | a8795327ca | ||
|  | d34995a9e3 | ||
|  | 958849275f | ||
|  | 998f094452 | ||
|  | aaa42cf0cf | ||
|  | 9fb64c04cd | ||
|  | f9622868e7 | ||
|  | 37768f9242 | ||
|  | a1aadd09a4 | ||
|  | b47a75017b | ||
|  | e37b54b140 | ||
|  | c1decda58c | ||
|  | d3f8e038fe | ||
|  | ad152e2d95 | ||
|  | b0af12154e | ||
|  | d16b3c6677 | ||
|  | c57244cdb1 | ||
|  | a7e5f27412 | ||
|  | 089a40955c | ||
|  | d73ebac100 | ||
|  | e563c0d73b | ||
|  | 491c42e690 | ||
|  | 7f2339c617 | ||
|  | 8122e79fef | ||
|  | fe3ad1d456 | ||
|  | 038a5e1a65 | ||
|  | 84bc23b41b | ||
|  | 46933a15d6 | ||
|  | 3859ebeee6 | ||
|  | d50aca41f8 | ||
|  | 0ca057b965 | ||
|  | 5ca968d0a6 | ||
|  | f0d31c624e | ||
|  | 08c655906c | ||
|  | 5a993e1692 | ||
|  | a7d2953073 | ||
|  | fdd0b8f8e0 | ||
|  | f65dc41b72 | ||
|  | 962250f7ea | ||
|  | 7dc2a74e0a | ||
|  | b02b960c6b | ||
|  | 4f427c4be8 | ||
|  | 8a00ea567b | ||
|  | 8895be01fc | ||
|  | 52e7fcfeb7 | ||
|  | 2396062c74 | ||
|  | 14704aeff6 | ||
|  | 3c2c3af059 | ||
|  | 1891ea2d76 | ||
|  | 1094074c04 | ||
|  | 217d5ae013 | ||
|  | 8b40854529 | ||
|  | 6bb0fbf9fb | ||
|  | 8d3b226b83 | ||
|  | 42b7a5afe0 | ||
|  | 899d2bea63 | ||
|  | 9cb0e65d7e | ||
|  | b070564efb | ||
|  | ce28252c48 | ||
|  | 3aa9a73554 | ||
|  | 6a9b3b61ea | ||
|  | 45408eb075 | ||
|  | eafc66855d | ||
|  | e03d3e6453 | ||
|  | a70e45f80a | ||
|  | 697655a7c0 | ||
|  | e382b953f0 | ||
|  | 116e7e0d04 | ||
|  | cf03e34ad3 | ||
|  | 2903137292 | ||
|  | 9361f2169c | ||
|  | 35aa6c538f | ||
|  | fa9f1d16b8 | ||
|  | 485fedf6fd | ||
|  | da0baba5c8 | ||
|  | bb9f3bfedf | ||
|  | dbc0b39b91 | ||
|  | 481c5c5137 | ||
|  | 0cacae2807 | ||
|  | d9d56deadf | ||
|  | 74ba450a81 | ||
|  | db19df6ca0 | ||
|  | fbdf8d15d1 | ||
|  | 94aae01548 | ||
|  | 39eef54cf0 | ||
|  | 05c8268c81 | ||
|  | 289a16b4f3 | ||
|  | 7935926baa | ||
|  | dcbb07c35a | ||
|  | 40090e8d51 | 
							
								
								
									
										8
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.24** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2016.07.24 | ||||
| [debug] youtube-dl version 2016.09.15 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
| @@ -55,4 +55,4 @@ $ youtube-dl -v <your command line> | ||||
| ### Description of your *issue*, suggested solution and other information | ||||
|  | ||||
| Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. | ||||
| If work on your *issue* required an account credentials please provide them or explain how one can obtain them. | ||||
| If work on your *issue* requires account credentials please provide them or explain how one can obtain them. | ||||
|   | ||||
							
								
								
									
										2
									
								
								.github/ISSUE_TEMPLATE_tmpl.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ISSUE_TEMPLATE_tmpl.md
									
									
									
									
										vendored
									
									
								
							| @@ -55,4 +55,4 @@ $ youtube-dl -v <your command line> | ||||
| ### Description of your *issue*, suggested solution and other information | ||||
|  | ||||
| Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. | ||||
| If work on your *issue* required an account credentials please provide them or explain how one can obtain them. | ||||
| If work on your *issue* requires account credentials please provide them or explain how one can obtain them. | ||||
|   | ||||
							
								
								
									
										6
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -179,3 +179,9 @@ Jakub Adam Wieczorek | ||||
| Aleksandar Topuzović | ||||
| Nehal Patel | ||||
| Rob van Bekkum | ||||
| Petr Zvoníček | ||||
| Pratyush Singh | ||||
| Aleksander Nitecki | ||||
| Sebastian Blunt | ||||
| Matěj Cepl | ||||
| Xie Yanbo | ||||
|   | ||||
| @@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S | ||||
|  | ||||
| ###  Why are existing options not enough? | ||||
|  | ||||
| Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. | ||||
| Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. | ||||
|  | ||||
| ###  Is there enough context in your bug report? | ||||
|  | ||||
|   | ||||
							
								
								
									
										658
									
								
								ChangeLog
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										658
									
								
								ChangeLog
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,658 @@ | ||||
| version 2016.09.15 | ||||
|  | ||||
| Core | ||||
| * Improve _hidden_inputs | ||||
| + Introduce improved explicit Adobe Pass support | ||||
| + Add --ap-mso to provide multiple-system operator identifier | ||||
| + Add --ap-username to provide MSO account username | ||||
| + Add --ap-password to provide MSO account password | ||||
| + Add --ap-list-mso to list all supported MSOs | ||||
| + Add support for Rogers Cable multiple-system operator (#10606) | ||||
|  | ||||
| Extractors | ||||
| * [crunchyroll] Fix authentication (#10655) | ||||
| * [twitch] Fix API calls (#10654, #10660) | ||||
| + [bellmedia] Add support for more Bell Media Television sites | ||||
| * [franceinter] Fix extraction (#10538, #2105) | ||||
| * [kuwo] Improve error detection (#10650) | ||||
| + [go] Add support for free full episodes (#10439) | ||||
| * [bilibili] Fix extraction for specific videos (#10647) | ||||
| * [nhk] Fix extraction (#10633) | ||||
| * [kaltura] Improve audio detection | ||||
| * [kaltura] Skip chun format | ||||
| + [vimeo:ondemand] Pass Referer along with embed URL (#10624) | ||||
| + [nbc] Add support for NBC Olympics (#10361) | ||||
|  | ||||
|  | ||||
| version 2016.09.11.1 | ||||
|  | ||||
| Extractors | ||||
| + [tube8] Extract categories and tags (#10579) | ||||
| + [pornhub] Extract categories and tags (#10499) | ||||
| * [openload] Temporary fix (#10408) | ||||
| + [foxnews] Add support Fox News articles (#10598) | ||||
| * [viafree] Improve video id extraction (#10615) | ||||
| * [iwara] Fix extraction after relaunch (#10462, #3215) | ||||
| + [tfo] Add extractor for tfo.org | ||||
| * [lrt] Fix audio extraction (#10566) | ||||
| * [9now] Fix extraction (#10561) | ||||
| + [canalplus] Add support for c8.fr (#10577) | ||||
| * [newgrounds] Fix uploader extraction (#10584) | ||||
| + [polskieradio:category] Add support for category lists (#10576) | ||||
| + [ketnet] Add extractor for ketnet.be (#10343) | ||||
| + [canvas] Add support for een.be (#10605) | ||||
| + [telequebec] Add extractor for telequebec.tv (#1999) | ||||
| * [parliamentliveuk] Fix extraction (#9137) | ||||
|  | ||||
|  | ||||
| version 2016.09.08 | ||||
|  | ||||
| Extractors | ||||
| + [jwplatform] Extract height from format label | ||||
| + [yahoo] Extract Brightcove Legacy Studio embeds (#9345) | ||||
| * [videomore] Fix extraction (#10592) | ||||
| * [foxgay] Fix extraction (#10480) | ||||
| + [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709) | ||||
| * [gamestar] Fix metadata extraction (#10479) | ||||
| * [puls4] Fix extraction (#10583) | ||||
| + [cctv] Add extractor for CCTV and CNTV (#8153) | ||||
| + [lci] Add extractor for lci.fr (#10573) | ||||
| + [wat] Extract DASH formats | ||||
| + [viafree] Improve video id detection (#10569) | ||||
| + [trutv] Add extractor for trutv.com (#10519) | ||||
| + [nick] Add support for nickelodeon.nl (#10559) | ||||
| + [abcotvs:clips] Add support for clips.abcotvs.com | ||||
| + [abcotvs] Add support for ABC Owned Television Stations sites (#9551) | ||||
| + [miaopai] Add extractor for miaopai.com (#10556) | ||||
| * [gamestar] Fix metadata extraction (#10479) | ||||
| + [bilibili] Add support for episodes (#10190) | ||||
| + [tvnoe] Add extractor for tvnoe.cz (#10524) | ||||
|  | ||||
|  | ||||
| version 2016.09.04.1 | ||||
|  | ||||
| Core | ||||
| * In DASH downloader if the first segment fails, abort the whole download | ||||
|   process to prevent throttling (#10497) | ||||
| + Add support for --skip-unavailable-fragments and --fragment retries in | ||||
|   hlsnative downloader (#10165, #10448). | ||||
| + Add support for --skip-unavailable-fragments in DASH downloader | ||||
| + Introduce --skip-unavailable-fragments option for fragment based downloaders | ||||
|   that allows to skip fragments unavailable due to a HTTP error | ||||
| * Fix extraction of video/audio entries with src attribute in | ||||
|   _parse_html5_media_entries (#10540) | ||||
|  | ||||
| Extractors | ||||
| * [theplatform] Relax URL regular expression (#10546) | ||||
| * [youtube:playlist] Extend URL regular expression | ||||
| * [rottentomatoes] Delegate extraction to internetvideoarchive extractor | ||||
| * [internetvideoarchive] Extract all formats | ||||
| * [pornvoisines] Fix extraction (#10469) | ||||
| * [rottentomatoes] Fix extraction (#10467) | ||||
| * [espn] Extend URL regular expression (#10549) | ||||
| * [vimple] Extend URL regular expression (#10547) | ||||
| * [youtube:watchlater] Fix extraction (#10544) | ||||
| * [youjizz] Fix extraction (#10437) | ||||
| + [foxnews] Add support for FoxNews Insider (#10445) | ||||
| + [fc2] Recognize Flash player URLs (#10512) | ||||
|  | ||||
|  | ||||
| version 2016.09.03 | ||||
|  | ||||
| Core | ||||
| * Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in | ||||
|   _extract_m3u8_formats (#10522) | ||||
| * Handle semicolon in mimetype2ext | ||||
|  | ||||
| Extractors | ||||
| + [youtube] Add support for rental videos' previews (#10532) | ||||
| * [youtube:playlist] Fallback to video extraction for video/playlist URLs when | ||||
|   no playlist is actually served (#10537) | ||||
| + [drtv] Add support for dr.dk/nyheder (#10536) | ||||
| + [facebook:plugins:video] Add extractor (#10530) | ||||
| + [go] Add extractor for *.go.com sites | ||||
| * [adobepass] Check for authz_token expiration (#10527) | ||||
| * [nytimes] improve extraction | ||||
| * [thestar] Fix extraction (#10465) | ||||
| * [glide] Fix extraction (#10478) | ||||
| - [exfm] Remove extractor (#10482) | ||||
| * [youporn] Fix categories and tags extraction (#10521) | ||||
| + [curiositystream] Add extractor for app.curiositystream.com | ||||
| - [thvideo] Remove extractor (#10464) | ||||
| * [movingimage] Fix for the new site name (#10466) | ||||
| + [cbs] Add support for once formats (#10515) | ||||
| * [limelight] Skip ism snd duplicate manifests | ||||
| + [porncom] Extract categories and tags (#10510) | ||||
| + [facebook] Extract timestamp (#10508) | ||||
| + [yahoo] Extract more formats | ||||
|  | ||||
|  | ||||
| version 2016.08.31 | ||||
|  | ||||
| Extractors | ||||
| * [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) | ||||
| * [bandcamp:album] Fix title extraction (#10455) | ||||
| * [pyvideo] Fix extraction (#10468) | ||||
| + [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016) | ||||
| * [9c9media] Extract more metadata | ||||
| * [9c9media] Fix multiple stacks extraction (#10016) | ||||
| * [adultswim] Improve video info extraction (#10492) | ||||
| * [vodplatform] Improve embed regular expression | ||||
| - [played] Remove extractor (#10470) | ||||
| + [tbs] Add extractor for tbs.com and tntdrama.com (#10222) | ||||
| + [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110) | ||||
| * [adultswim] Rework in terms of turner extractor | ||||
| * [cnn] Rework in terms of turner extractor | ||||
| * [nba] Rework in terms of turner extractor | ||||
| + [turner] Add base extractor for Turner Broadcasting System based sites | ||||
| * [bilibili] Fix extraction (#10375) | ||||
| * [openload] Fix extraction (#10408) | ||||
|  | ||||
|  | ||||
| version 2016.08.28 | ||||
|  | ||||
| Core | ||||
| + Add warning message that ffmpeg doesn't support SOCKS | ||||
| * Improve thumbnail sorting | ||||
| + Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats | ||||
| * Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative | ||||
| + Add ac-3 to the list of audio codecs in parse_codecs | ||||
|  | ||||
| Extractors | ||||
| * [periscope:user] Fix extraction (#10453) | ||||
| * [douyutv] Fix extraction (#10153, #10318, #10444) | ||||
| + [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424) | ||||
| - [trutube] Remove extractor (#10438) | ||||
| + [usanetwork] Add extractor for usanetwork.com | ||||
| * [crackle] Fix extraction (#10333) | ||||
| * [spankbang] Fix description and uploader extraction (#10339) | ||||
| * [discoverygo] Detect cable provider restricted videos (#10425) | ||||
| + [cbc] Add support for watch.cbc.ca | ||||
| * [kickstarter] Silent the warning for og:description (#10415) | ||||
| * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) | ||||
|  | ||||
|  | ||||
| version 2016.08.24.1 | ||||
|  | ||||
| Extractors | ||||
| + [pluralsight] Add support for subtitles (#9681) | ||||
|  | ||||
|  | ||||
| version 2016.08.24 | ||||
|  | ||||
| Extractors | ||||
| * [youtube] Fix authentication (#10392) | ||||
| * [openload] Fix extraction (#10408) | ||||
| + [bravotv] Add support for Adobe Pass (#10407) | ||||
| * [bravotv] Fix clip info extraction (#10407) | ||||
| * [eagleplatform] Improve embedded videos detection (#10409) | ||||
| * [awaan] Fix extraction | ||||
| * [mtvservices:embedded] Update config URL | ||||
| + [abc:iview] Add extractor (#6148) | ||||
|  | ||||
|  | ||||
| version 2016.08.22 | ||||
|  | ||||
| Core | ||||
| * Improve formats and subtitles extension auto calculation | ||||
| + Recognize full unit names in parse_filesize | ||||
| + Add support for m3u8 manifests in HTML5 multimedia tags | ||||
| * Fix octal/hexadecimal number detection in js_to_json | ||||
|  | ||||
| Extractors | ||||
| + [ivi] Add support for 720p and 1080p | ||||
| + [charlierose] Add new extractor (#10382) | ||||
| * [1tv] Fix extraction (#9249) | ||||
| * [twitch] Renew authentication | ||||
| * [kaltura] Improve subtitles extension calculation | ||||
| + [zingmp3] Add support for video clips | ||||
| * [zingmp3] Fix extraction (#10041) | ||||
| * [kaltura] Improve subtitles extraction (#10279) | ||||
| * [cultureunplugged] Fix extraction (#10330) | ||||
| + [cnn] Add support for money.cnn.com (#2797) | ||||
| * [cbsnews] Fix extraction (#10362) | ||||
| * [cbs] Fix extraction (#10393) | ||||
| + [litv] Support 'promo' URLs (#10385) | ||||
| * [snotr] Fix extraction (#10338) | ||||
| * [n-tv.de] Fix extraction (#10331) | ||||
| * [globo:article] Relax URL and video id regular expressions (#10379) | ||||
|  | ||||
|  | ||||
| version 2016.08.19 | ||||
|  | ||||
| Core | ||||
| - Remove output template description from --help | ||||
| * Recognize lowercase units in parse_filesize | ||||
|  | ||||
| Extractors | ||||
| + [porncom] Add extractor for porn.com (#2251, #10251) | ||||
| + [generic] Add support for DBTV embeds | ||||
| * [vk:wallpost] Fix audio extraction for new site layout | ||||
| * [vk] Fix authentication | ||||
| + [hgtvcom:show] Add extractor for hgtv.com shows (#10365) | ||||
| + [discoverygo] Add support for another GO network sites | ||||
|  | ||||
|  | ||||
| version 2016.08.17 | ||||
|  | ||||
| Core | ||||
| + Add _get_netrc_login_info | ||||
|  | ||||
| Extractors | ||||
| * [mofosex] Extract all formats (#10335) | ||||
| + [generic] Add support for vbox7 embeds | ||||
| + [vbox7] Add support for embed URLs | ||||
| + [viafree] Add extractor (#10358) | ||||
| + [mtg] Add support for viafree URLs (#10358) | ||||
| * [theplatform] Extract all subtitles per language | ||||
| + [xvideos] Fix HLS extraction (#10356) | ||||
| + [amcnetworks] Add extractor | ||||
| + [bbc:playlist] Add support for pagination (#10349) | ||||
| + [fxnetworks] Add extractor (#9462) | ||||
| * [cbslocal] Fix extraction for SendtoNews-based videos | ||||
| * [sendtonews] Fix extraction | ||||
| * [jwplatform] Extract video id from JWPlayer data | ||||
| - [zippcast] Remove extractor (#10332) | ||||
| + [viceland] Add extractor (#8799) | ||||
| + [adobepass] Add base extractor for Adobe Pass Authentication | ||||
| * [life:embed] Improve extraction | ||||
| * [vgtv] Detect geo restricted videos (#10348) | ||||
| + [uplynk] Add extractor | ||||
| * [xiami] Fix extraction (#10342) | ||||
|  | ||||
|  | ||||
| version 2016.08.13 | ||||
|  | ||||
| Core | ||||
| * Show progress for curl external downloader | ||||
| * Forward more options to curl external downloader | ||||
|  | ||||
| Extractors | ||||
| * [pbs] Fix description extraction | ||||
| * [franceculture] Fix extraction (#10324) | ||||
| * [pornotube] Fix extraction (#10322) | ||||
| * [4tube] Fix metadata extraction (#10321) | ||||
| * [imgur] Fix width and height extraction (#10325) | ||||
| * [expotv] Improve extraction | ||||
| + [vbox7] Fix extraction (#10309) | ||||
| - [tapely] Remove extractor (#10323) | ||||
| * [muenchentv] Fix extraction (#10313) | ||||
| + [24video] Add support for .me and .xxx TLDs | ||||
| * [24video] Fix comment count extraction | ||||
| * [sunporno] Add support for embed URLs | ||||
| * [sunporno] Fix metadata extraction (#10316) | ||||
| + [hgtv] Add extractor for hgtv.ca (#3999) | ||||
| - [pbs] Remove request to unavailable API | ||||
| + [pbs] Add support for high quality HTTP formats | ||||
| + [crunchyroll] Add support for HLS formats (#10301) | ||||
|  | ||||
|  | ||||
| version 2016.08.12 | ||||
|  | ||||
| Core | ||||
| * Subtitles are now written as is. Newline conversions are disabled. (#10268) | ||||
| + Recognize more formats in unified_timestamp | ||||
|  | ||||
| Extractors | ||||
| - [goldenmoustache] Remove extractor (#10298) | ||||
| * [drtuber] Improve title extraction | ||||
| * [drtuber] Make dislike count optional (#10297) | ||||
| * [chirbit] Fix extraction (#10296) | ||||
| * [francetvinfo] Relax URL regular expression | ||||
| * [rtlnl] Relax URL regular expression (#10282) | ||||
| * [formula1] Relax URL regular expression (#10283) | ||||
| * [wat] Improve extraction (#10281) | ||||
| * [ctsnews] Fix extraction | ||||
|  | ||||
|  | ||||
| version 2016.08.10 | ||||
|  | ||||
| Core | ||||
| * Make --metadata-from-title non fatal when title does not match the pattern | ||||
| * Introduce options for randomized sleep before each download | ||||
|   --min-sleep-interval and --max-sleep-interval (#9930) | ||||
| * Respect default in _search_json_ld | ||||
|  | ||||
| Extractors | ||||
| + [uol] Add extractor for uol.com.br (#4263) | ||||
| * [rbmaradio] Fix extraction and extract all formats (#10242) | ||||
| + [sonyliv] Add extractor for sonyliv.com (#10258) | ||||
| * [aparat] Fix extraction | ||||
| * [cwtv] Extract HTTP formats | ||||
| + [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253) | ||||
| * [kuwo:singer] Fix extraction | ||||
|  | ||||
|  | ||||
| version 2016.08.07 | ||||
|  | ||||
| Core | ||||
| + Add support for TV Parental Guidelines ratings in parse_age_limit | ||||
| + Add decode_png (#9706) | ||||
| + Add support for partOfTVSeries in JSON-LD | ||||
| * Lower master M3U8 manifest preference for better format sorting | ||||
|  | ||||
| Extractors | ||||
| + [discoverygo] Add extractor (#10245) | ||||
| * [flipagram] Make JSON-LD extraction non fatal | ||||
| * [generic] Make JSON-LD extraction non fatal | ||||
| + [bbc] Add support for morph embeds (#10239) | ||||
| * [tnaflixnetworkbase] Improve title extraction | ||||
| * [tnaflix] Fix metadata extraction (#10249) | ||||
| * [fox] Fix theplatform release URL query | ||||
| * [openload] Fix extraction (#9706) | ||||
| * [bbc] Skip duplicate manifest URLs | ||||
| * [bbc] Improve format code | ||||
| + [bbc] Add support for DASH and F4M | ||||
| * [bbc] Improve format sorting and listing | ||||
| * [bbc] Improve playlist extraction | ||||
| + [pokemon] Add extractor (#10093) | ||||
| + [condenast] Add fallback scenario for video info extraction | ||||
|  | ||||
|  | ||||
| version 2016.08.06 | ||||
|  | ||||
| Core | ||||
| * Add support for JSON-LD root list entries (#10203) | ||||
| * Improve unified_timestamp | ||||
| * Lower preference of RTSP formats in generic sorting | ||||
| + Add support for multiple properties in _og_search_property | ||||
| * Improve password hiding from verbose output | ||||
|  | ||||
| Extractors | ||||
| + [adultswim] Add support for trailers (#10235) | ||||
| * [archiveorg] Improve extraction (#10219) | ||||
| + [jwplatform] Add support for playlists | ||||
| + [jwplatform] Add support for relative URLs | ||||
| * [jwplatform] Improve audio detection | ||||
| + [tvplay] Capture and output native error message | ||||
| + [tvplay] Extract series metadata | ||||
| + [tvplay] Add support for subtitles (#10194) | ||||
| * [tvp] Improve extraction (#7799) | ||||
| * [cbslocal] Fix timestamp parsing (#10213) | ||||
| + [naver] Add support for subtitles (#8096) | ||||
| * [naver] Improve extraction | ||||
| * [condenast] Improve extraction | ||||
| * [engadget] Relax URL regular expression | ||||
| * [5min] Fix extraction | ||||
| + [nationalgeographic] Add support for Episode Guide | ||||
| + [kaltura] Add support for subtitles | ||||
| * [kaltura] Optimize network requests | ||||
| + [vodplatform] Add extractor for vod-platform.net | ||||
| - [gamekings] Remove extractor | ||||
| * [limelight] Extract HTTP formats | ||||
| * [ntvru] Fix extraction | ||||
| + [comedycentral] Re-add :tds and :thedailyshow shortnames | ||||
|  | ||||
|  | ||||
| version 2016.08.01 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - [yandexmusic:track] Adapt to changes in track location JSON (#10193) | ||||
| - [bloomberg] Support another form of player (#10187) | ||||
| - [limelight] Skip DRM protected videos | ||||
| - [safari] Relax regular expressions for URL matching (#10202) | ||||
| - [cwtv] Add support for cwtvpr.com (#10196) | ||||
|  | ||||
|  | ||||
| version 2016.07.30 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - [twitch:clips] Sort formats | ||||
| - [tv2] Use m3u8_native | ||||
| - [tv2:article] Fix video detection (#10188) | ||||
| - rtve (#10076) | ||||
| - [dailymotion:playlist] Optimize download archive processing (#10180) | ||||
|  | ||||
|  | ||||
| version 2016.07.28 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - shared (#10170) | ||||
| - soundcloud (#10179) | ||||
| - twitch (#9767) | ||||
|  | ||||
|  | ||||
| version 2016.07.26.2 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - smotri | ||||
| - camdemy | ||||
| - mtv | ||||
| - comedycentral | ||||
| - cmt | ||||
| - cbc | ||||
| - mgtv | ||||
| - orf | ||||
|  | ||||
|  | ||||
| version 2016.07.24 | ||||
|  | ||||
| New extractors | ||||
| - arkena (#8682) | ||||
| - lcp (#8682) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - facebook (#10151) | ||||
| - dailymail | ||||
| - telegraaf | ||||
| - dcn | ||||
| - onet | ||||
| - tvp | ||||
|  | ||||
| Miscellaneous | ||||
| - Support $Time$ in DASH manifests | ||||
|  | ||||
|  | ||||
| version 2016.07.22 | ||||
|  | ||||
| New extractors | ||||
| - odatv (#9285) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - bbc | ||||
| - youjizz (#10131) | ||||
| - youtube (#10140) | ||||
| - pornhub (#10138) | ||||
| - eporner (#10139) | ||||
|  | ||||
|  | ||||
| version 2016.07.17 | ||||
|  | ||||
| New extractors | ||||
| - nintendo (#9986) | ||||
| - streamable (#9122) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - ard (#10095) | ||||
| - mtv | ||||
| - comedycentral (#10101) | ||||
| - viki (#10098) | ||||
| - spike (#10106) | ||||
|  | ||||
| Miscellaneous | ||||
| - Improved twitter player detection (#10090) | ||||
|  | ||||
|  | ||||
| version 2016.07.16 | ||||
|  | ||||
| New extractors | ||||
| - ninenow (#5181) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - rtve (#10076) | ||||
| - brightcove | ||||
| - 3qsdn | ||||
| - syfy (#9087, #3820, #2388) | ||||
| - youtube (#10083) | ||||
|  | ||||
| Miscellaneous | ||||
| - Fix subtitle embedding for video-only and audio-only files (#10081) | ||||
|  | ||||
|  | ||||
| version 2016.07.13 | ||||
|  | ||||
| New extractors | ||||
| - rudo | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - biobiochiletv | ||||
| - tvplay | ||||
| - dbtv | ||||
| - brightcove | ||||
| - tmz | ||||
| - youtube (#10059) | ||||
| - shahid (#10062) | ||||
| - vk | ||||
| - ellentv (#10067) | ||||
|  | ||||
|  | ||||
| version 2016.07.11 | ||||
|  | ||||
| New Extractors | ||||
| - roosterteeth (#9864) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - miomio (#9605) | ||||
| - vuclip | ||||
| - youtube | ||||
| - vidzi (#10058) | ||||
|  | ||||
|  | ||||
| version 2016.07.09.2 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - vimeo (#1638) | ||||
| - facebook (#10048) | ||||
| - lynda (#10047) | ||||
| - animeondemand | ||||
|  | ||||
| Fixed/improved features | ||||
| - Embedding subtitles no longer throws an error with problematic inputs (#9063) | ||||
|  | ||||
|  | ||||
| version 2016.07.09.1 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - youtube | ||||
| - ard | ||||
| - srmediatek (#9373) | ||||
|  | ||||
|  | ||||
| version 2016.07.09 | ||||
|  | ||||
| New extractors | ||||
| - Flipagram (#9898) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - telecinco | ||||
| - toutv | ||||
| - radiocanada | ||||
| - tweakers (#9516) | ||||
| - lynda | ||||
| - nick (#7542) | ||||
| - polskieradio (#10028) | ||||
| - le | ||||
| - facebook (#9851) | ||||
| - mgtv | ||||
| - animeondemand (#10031) | ||||
|  | ||||
| Fixed/improved features | ||||
| - `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs | ||||
|   on non-Windows systems | ||||
|  | ||||
|  | ||||
| version 2016.07.07 | ||||
|  | ||||
| New extractors | ||||
| - kamcord (#10001) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - spiegel (#10018) | ||||
| - metacafe (#8539, #3253) | ||||
| - onet (#9950) | ||||
| - francetv (#9955) | ||||
| - brightcove (#9965) | ||||
| - daum (#9972) | ||||
|  | ||||
|  | ||||
| version 2016.07.06 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - youtube (#10007, #10009) | ||||
| - xuite | ||||
| - stitcher | ||||
| - spiegel | ||||
| - slideshare | ||||
| - sandia | ||||
| - rtvnh | ||||
| - prosiebensat1 | ||||
| - onionstudios | ||||
|  | ||||
|  | ||||
| version 2016.07.05 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - brightcove | ||||
| - yahoo (#9995) | ||||
| - pornhub (#9997) | ||||
| - iqiyi | ||||
| - kaltura (#5557) | ||||
| - la7 | ||||
| - Changed features | ||||
| - Rename --cn-verfication-proxy to --geo-verification-proxy | ||||
| Miscellaneous | ||||
| - Add script for displaying downloads statistics | ||||
|  | ||||
|  | ||||
| version 2016.07.03.1 | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - theplatform | ||||
| - aenetworks | ||||
| - nationalgeographic | ||||
| - hrti (#9482) | ||||
| - facebook (#5701) | ||||
| - buzzfeed (#5701) | ||||
| - rai (#8617, #9157, #9232, #8552, #8551) | ||||
| - nationalgeographic (#9991) | ||||
| - iqiyi | ||||
|  | ||||
|  | ||||
| version 2016.07.03 | ||||
|  | ||||
| New extractors | ||||
| - hrti (#9482) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - vk (#9981) | ||||
| - facebook (#9938) | ||||
| - xtube (#9953, #9961) | ||||
|  | ||||
|  | ||||
| version 2016.07.02 | ||||
|  | ||||
| New extractors | ||||
| - fusion (#9958) | ||||
|  | ||||
| Fixed/improved extractors | ||||
| - twitch (#9975) | ||||
| - vine (#9970) | ||||
| - periscope (#9967) | ||||
| - pornhub (#8696) | ||||
|  | ||||
|  | ||||
| version 2016.07.01 | ||||
|  | ||||
| New extractors | ||||
| - 9c9media | ||||
| - ctvnews (#2156) | ||||
| - ctv (#4077) | ||||
|  | ||||
| Fixed/Improved extractors | ||||
| - rds | ||||
| - meta (#8789) | ||||
| - pornhub (#9964) | ||||
| - sixplay (#2183) | ||||
|  | ||||
| New features | ||||
| - Accept quoted strings across multiple lines (#9940) | ||||
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -94,7 +94,7 @@ _EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'la | ||||
| youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) | ||||
| 	$(PYTHON) devscripts/make_lazy_extractors.py $@ | ||||
|  | ||||
| youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish | ||||
| youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog | ||||
| 	@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ | ||||
| 		--exclude '*.DS_Store' \ | ||||
| 		--exclude '*.kate-swp' \ | ||||
| @@ -107,7 +107,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- | ||||
| 		--exclude 'docs/_build' \ | ||||
| 		-- \ | ||||
| 		bin devscripts test youtube_dl docs \ | ||||
| 		LICENSE README.md README.txt \ | ||||
| 		ChangeLog LICENSE README.md README.txt \ | ||||
| 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ | ||||
| 		youtube-dl.zsh youtube-dl.fish setup.py \ | ||||
| 		youtube-dl | ||||
|   | ||||
							
								
								
									
										114
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										114
									
								
								README.md
									
									
									
									
									
								
							| @@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --mark-watched                   Mark videos watched (YouTube only) | ||||
|     --no-mark-watched                Do not mark videos watched (YouTube only) | ||||
|     --no-color                       Do not emit color codes in output | ||||
|     --abort-on-unavailable-fragment  Abort downloading when some fragment is not | ||||
|                                      available | ||||
|  | ||||
| ## Network Options: | ||||
|     --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy. | ||||
| @@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     -R, --retries RETRIES            Number of retries (default is 10), or | ||||
|                                      "infinite". | ||||
|     --fragment-retries RETRIES       Number of retries for a fragment (default | ||||
|                                      is 10), or "infinite" (DASH only) | ||||
|                                      is 10), or "infinite" (DASH and hlsnative | ||||
|                                      only) | ||||
|     --skip-unavailable-fragments     Skip unavailable fragments (DASH and | ||||
|                                      hlsnative only) | ||||
|     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) | ||||
|                                      (default is 1024) | ||||
|     --no-resize-buffer               Do not automatically adjust the buffer | ||||
| @@ -201,32 +206,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     -a, --batch-file FILE            File containing URLs to download ('-' for | ||||
|                                      stdin) | ||||
|     --id                             Use only video ID in file name | ||||
|     -o, --output TEMPLATE            Output filename template. Use %(title)s to | ||||
|                                      get the title, %(uploader)s for the | ||||
|                                      uploader name, %(uploader_id)s for the | ||||
|                                      uploader nickname if different, | ||||
|                                      %(autonumber)s to get an automatically | ||||
|                                      incremented number, %(ext)s for the | ||||
|                                      filename extension, %(format)s for the | ||||
|                                      format description (like "22 - 1280x720" or | ||||
|                                      "HD"), %(format_id)s for the unique id of | ||||
|                                      the format (like YouTube's itags: "137"), | ||||
|                                      %(upload_date)s for the upload date | ||||
|                                      (YYYYMMDD), %(extractor)s for the provider | ||||
|                                      (youtube, metacafe, etc), %(id)s for the | ||||
|                                      video id, %(playlist_title)s, | ||||
|                                      %(playlist_id)s, or %(playlist)s (=title if | ||||
|                                      present, ID otherwise) for the playlist the | ||||
|                                      video is in, %(playlist_index)s for the | ||||
|                                      position in the playlist. %(height)s and | ||||
|                                      %(width)s for the width and height of the | ||||
|                                      video format. %(resolution)s for a textual | ||||
|                                      description of the resolution of the video | ||||
|                                      format. %% for a literal percent. Use - to | ||||
|                                      output to stdout. Can also be used to | ||||
|                                      download to a different directory, for | ||||
|                                      example with -o '/my/downloads/%(uploader)s | ||||
|                                      /%(title)s-%(id)s.%(ext)s' . | ||||
|     -o, --output TEMPLATE            Output filename template, see the "OUTPUT | ||||
|                                      TEMPLATE" for all the info | ||||
|     --autonumber-size NUMBER         Specify the number of digits in | ||||
|                                      %(autonumber)s when it is present in output | ||||
|                                      filename template or --auto-number option | ||||
| @@ -330,7 +311,15 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      bidirectional text support. Requires bidiv | ||||
|                                      or fribidi executable in PATH | ||||
|     --sleep-interval SECONDS         Number of seconds to sleep before each | ||||
|                                      download. | ||||
|                                      download when used alone or a lower bound | ||||
|                                      of a range for randomized sleep before each | ||||
|                                      download (minimum possible number of | ||||
|                                      seconds to sleep) when used along with | ||||
|                                      --max-sleep-interval. | ||||
|     --max-sleep-interval SECONDS     Upper bound of a range for randomized sleep | ||||
|                                      before each download (maximum possible | ||||
|                                      number of seconds to sleep). Must only be | ||||
|                                      used along with --min-sleep-interval. | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT              Video format code, see the "FORMAT | ||||
| @@ -369,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     -n, --netrc                      Use .netrc authentication data | ||||
|     --video-password PASSWORD        Video password (vimeo, smotri, youku) | ||||
|  | ||||
| ## Adobe Pass Options: | ||||
|     --ap-mso MSO                     Adobe Pass multiple-system operator (TV | ||||
|                                      provider) identifier, use --ap-list-mso for | ||||
|                                      a list of available MSOs | ||||
|     --ap-username USERNAME           Multiple-system operator account login | ||||
|     --ap-password PASSWORD           Multiple-system operator account password. | ||||
|                                      If this option is left out, youtube-dl will | ||||
|                                      ask interactively. | ||||
|     --ap-list-mso                    List all supported multiple-system | ||||
|                                      operators | ||||
|  | ||||
| ## Post-processing Options: | ||||
|     -x, --extract-audio              Convert video files to audio-only files | ||||
|                                      (requires ffmpeg or avconv and ffprobe or | ||||
| @@ -428,11 +428,19 @@ You can configure youtube-dl by placing any supported command line option to a c | ||||
|  | ||||
| For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: | ||||
| ``` | ||||
| -x | ||||
| --no-mtime | ||||
| --proxy 127.0.0.1:3128 | ||||
| -o ~/Movies/%(title)s.%(ext)s | ||||
| # Lines starting with # are comments | ||||
|  | ||||
| # Always extract audio | ||||
| -x | ||||
|  | ||||
| # Do not copy the mtime | ||||
| --no-mtime | ||||
|  | ||||
| # Use this proxy | ||||
| --proxy 127.0.0.1:3128 | ||||
|  | ||||
| # Save all videos under Movies directory in your home directory | ||||
| -o ~/Movies/%(title)s.%(ext)s | ||||
| ``` | ||||
|  | ||||
| Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. | ||||
| @@ -661,7 +669,11 @@ $ youtube-dl -f 'best[filesize<50M]' | ||||
|  | ||||
| # Download best format available via direct link over HTTP/HTTPS protocol | ||||
| $ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]' | ||||
|  | ||||
| # Download the best video format and the best audio format without merging them | ||||
| $ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s' | ||||
| ``` | ||||
| Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name. | ||||
|  | ||||
|  | ||||
| # VIDEO SELECTION | ||||
| @@ -742,7 +754,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [ | ||||
|  | ||||
| ### I have downloaded a video but how can I play it? | ||||
|  | ||||
| Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). | ||||
| Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/). | ||||
|  | ||||
| ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. | ||||
|  | ||||
| @@ -824,10 +836,42 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt | ||||
|  | ||||
| ### How do I pass cookies to youtube-dl? | ||||
|  | ||||
| Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. | ||||
| Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. | ||||
|  | ||||
| In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). | ||||
|  | ||||
| Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. | ||||
|  | ||||
| Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). | ||||
|  | ||||
| ### How do I stream directly to media player? | ||||
|  | ||||
| You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: | ||||
|  | ||||
|     youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - | ||||
|  | ||||
| ### How do I download only new videos from a playlist? | ||||
|  | ||||
| Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. | ||||
|  | ||||
| For example, at first, | ||||
|  | ||||
|     youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" | ||||
|  | ||||
| will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any: | ||||
|  | ||||
|     youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" | ||||
|  | ||||
| ### Should I add `--hls-prefer-native` into my config? | ||||
|  | ||||
| When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed. | ||||
|  | ||||
| When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg. | ||||
|  | ||||
| In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. | ||||
|  | ||||
| If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case. | ||||
|  | ||||
| ### Can you add support for this anime video site, or site which shows current movies for free? | ||||
|  | ||||
| As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. | ||||
| @@ -1196,7 +1240,7 @@ Make sure that someone has not already opened the issue you're trying to open. S | ||||
|  | ||||
| ###  Why are existing options not enough? | ||||
|  | ||||
| Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. | ||||
| Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. | ||||
|  | ||||
| ###  Is there enough context in your bug report? | ||||
|  | ||||
|   | ||||
| @@ -54,7 +54,11 @@ def filter_options(readme): | ||||
|  | ||||
|         if in_options: | ||||
|             if line.lstrip().startswith('-'): | ||||
|                 option, description = re.split(r'\s{2,}', line.lstrip()) | ||||
|                 split = re.split(r'\s{2,}', line.lstrip()) | ||||
|                 # Description string may start with `-` as well. If there is | ||||
|                 # only one piece then it's a description bit not an option. | ||||
|                 if len(split) > 1: | ||||
|                     option, description = split | ||||
|                     split_option = option.split(' ') | ||||
|  | ||||
|                     if not split_option[-1].startswith('-'):  # metavar | ||||
| @@ -63,7 +67,7 @@ def filter_options(readme): | ||||
|                     # Pandoc's definition_lists. See http://pandoc.org/README.html | ||||
|                     # for more information. | ||||
|                     ret += '\n%s\n:   %s\n' % (option, description) | ||||
|             else: | ||||
|                     continue | ||||
|             ret += line.lstrip() + '\n' | ||||
|         else: | ||||
|             ret += line + '\n' | ||||
|   | ||||
| @@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e | ||||
| if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi | ||||
| if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi | ||||
|  | ||||
| read -p "Is ChangeLog up to date? (y/n) " -n 1 | ||||
| if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi | ||||
|  | ||||
| /bin/echo -e "\n### First of all, testing..." | ||||
| make clean | ||||
| if $skip_tests ; then | ||||
| @@ -71,9 +74,12 @@ fi | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
|  | ||||
| /bin/echo -e "\n### Changing version in ChangeLog..." | ||||
| sed -i "s/<unreleased>/$version/" ChangeLog | ||||
|  | ||||
| /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites | ||||
| git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py | ||||
| git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog | ||||
| git commit $gpg_sign_commits -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| #!/usr/bin/env python | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| @@ -21,10 +22,15 @@ def format_size(bytes): | ||||
|  | ||||
| total_bytes = 0 | ||||
|  | ||||
| releases = json.loads(compat_urllib_request.urlopen( | ||||
|     'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8')) | ||||
| for page in itertools.count(1): | ||||
|     releases = json.loads(compat_urllib_request.urlopen( | ||||
|         'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page | ||||
|     ).read().decode('utf-8')) | ||||
|  | ||||
| for release in releases: | ||||
|     if not releases: | ||||
|         break | ||||
|  | ||||
|     for release in releases: | ||||
|         compat_print(release['name']) | ||||
|         for asset in release['assets']: | ||||
|             asset_name = asset['name'] | ||||
|   | ||||
| @@ -13,12 +13,16 @@ | ||||
|  - **5min** | ||||
|  - **8tracks** | ||||
|  - **91porn** | ||||
|  - **9c9media** | ||||
|  - **9c9media:stack** | ||||
|  - **9gag** | ||||
|  - **9now.com.au** | ||||
|  - **abc.net.au** | ||||
|  - **Abc7News** | ||||
|  - **abc.net.au:iview** | ||||
|  - **abcnews** | ||||
|  - **abcnews:video** | ||||
|  - **abcotvs**: ABC Owned Television Stations | ||||
|  - **abcotvs:clips** | ||||
|  - **AcademicEarth:Course** | ||||
|  - **acast** | ||||
|  - **acast:channel** | ||||
| @@ -35,6 +39,7 @@ | ||||
|  - **AlJazeera** | ||||
|  - **Allocine** | ||||
|  - **AlphaPorno** | ||||
|  - **AMCNetworks** | ||||
|  - **AnimeOnDemand** | ||||
|  - **anitube.se** | ||||
|  - **AnySex** | ||||
| @@ -65,6 +70,10 @@ | ||||
|  - **audiomack** | ||||
|  - **audiomack:album** | ||||
|  - **auroravid**: AuroraVid | ||||
|  - **AWAAN** | ||||
|  - **awaan:live** | ||||
|  - **awaan:season** | ||||
|  - **awaan:video** | ||||
|  - **Azubu** | ||||
|  - **AzubuLive** | ||||
|  - **BaiduVideo**: 百度视频 | ||||
| @@ -80,6 +89,7 @@ | ||||
|  - **BeatportPro** | ||||
|  - **Beeg** | ||||
|  - **BehindKink** | ||||
|  - **BellMedia** | ||||
|  - **Bet** | ||||
|  - **Bigflix** | ||||
|  - **Bild**: Bild.de | ||||
| @@ -109,17 +119,22 @@ | ||||
|  - **Canvas** | ||||
|  - **CarambaTV** | ||||
|  - **CarambaTVPage** | ||||
|  - **CBC** | ||||
|  - **CBCPlayer** | ||||
|  - **CartoonNetwork** | ||||
|  - **cbc.ca** | ||||
|  - **cbc.ca:player** | ||||
|  - **cbc.ca:watch** | ||||
|  - **cbc.ca:watch:video** | ||||
|  - **CBS** | ||||
|  - **CBSInteractive** | ||||
|  - **CBSLocal** | ||||
|  - **CBSNews**: CBS News | ||||
|  - **CBSNewsLiveVideo**: CBS News Live Videos | ||||
|  - **CBSSports** | ||||
|  - **CCTV** | ||||
|  - **CDA** | ||||
|  - **CeskaTelevize** | ||||
|  - **channel9**: Channel 9 | ||||
|  - **CharlieRose** | ||||
|  - **Chaturbate** | ||||
|  - **Chilloutzone** | ||||
|  - **chirbit** | ||||
| @@ -142,7 +157,7 @@ | ||||
|  - **CollegeRama** | ||||
|  - **ComCarCoff** | ||||
|  - **ComedyCentral** | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
|  - **ComedyCentralShortname** | ||||
|  - **ComedyCentralTV** | ||||
|  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED | ||||
|  - **Coub** | ||||
| @@ -155,10 +170,11 @@ | ||||
|  - **CSNNE** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews**: 華視新聞 | ||||
|  - **CTV** | ||||
|  - **CTVNews** | ||||
|  - **culturebox.francetvinfo.fr** | ||||
|  - **CultureUnplugged** | ||||
|  - **curiositystream** | ||||
|  - **curiositystream:collection** | ||||
|  - **CWTV** | ||||
|  - **DailyMail** | ||||
|  - **dailymotion** | ||||
| @@ -170,10 +186,6 @@ | ||||
|  - **daum.net:playlist** | ||||
|  - **daum.net:user** | ||||
|  - **DBTV** | ||||
|  - **DCN** | ||||
|  - **dcn:live** | ||||
|  - **dcn:season** | ||||
|  - **dcn:video** | ||||
|  - **DctpTv** | ||||
|  - **DeezerPlaylist** | ||||
|  - **defense.gouv.fr** | ||||
| @@ -182,6 +194,7 @@ | ||||
|  - **DigitallySpeaking** | ||||
|  - **Digiteka** | ||||
|  - **Discovery** | ||||
|  - **DiscoveryGo** | ||||
|  - **Dotsub** | ||||
|  - **DouyuTV**: 斗鱼 | ||||
|  - **DPlay** | ||||
| @@ -214,13 +227,14 @@ | ||||
|  - **EsriVideo** | ||||
|  - **Europa** | ||||
|  - **EveryonesMixtape** | ||||
|  - **exfm**: ex.fm | ||||
|  - **ExpoTV** | ||||
|  - **ExtremeTube** | ||||
|  - **EyedoTV** | ||||
|  - **facebook** | ||||
|  - **FacebookPluginsVideo** | ||||
|  - **faz.net** | ||||
|  - **fc2** | ||||
|  - **fc2:embed** | ||||
|  - **Fczenit** | ||||
|  - **features.aol.com** | ||||
|  - **fernsehkritik.tv** | ||||
| @@ -233,11 +247,12 @@ | ||||
|  - **Formula1** | ||||
|  - **FOX** | ||||
|  - **Foxgay** | ||||
|  - **FoxNews**: Fox News and Fox Business Video | ||||
|  - **foxnews**: Fox News and Fox Business Video | ||||
|  - **foxnews:article** | ||||
|  - **foxnews:insider** | ||||
|  - **FoxSports** | ||||
|  - **france2.fr:generation-quoi** | ||||
|  - **FranceCulture** | ||||
|  - **FranceCultureEmission** | ||||
|  - **FranceInter** | ||||
|  - **francetv**: France 2, 3, 4, 5 and Ô | ||||
|  - **francetvinfo.fr** | ||||
| @@ -247,8 +262,8 @@ | ||||
|  - **Funimation** | ||||
|  - **FunnyOrDie** | ||||
|  - **Fusion** | ||||
|  - **FXNetworks** | ||||
|  - **GameInformer** | ||||
|  - **Gamekings** | ||||
|  - **GameOne** | ||||
|  - **gameone:playlist** | ||||
|  - **Gamersyde** | ||||
| @@ -263,9 +278,9 @@ | ||||
|  - **Glide**: Glide mobile video messages (glide.me) | ||||
|  - **Globo** | ||||
|  - **GloboArticle** | ||||
|  - **Go** | ||||
|  - **GodTube** | ||||
|  - **GodTV** | ||||
|  - **GoldenMoustache** | ||||
|  - **Golem** | ||||
|  - **GoogleDrive** | ||||
|  - **Goshgay** | ||||
| @@ -278,6 +293,8 @@ | ||||
|  - **HellPorno** | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HGTV** | ||||
|  - **hgtv.com:show** | ||||
|  - **HistoricFilms** | ||||
|  - **history:topic**: History.com Topic | ||||
|  - **hitbox** | ||||
| @@ -310,6 +327,7 @@ | ||||
|  - **ivi**: ivi.ru | ||||
|  - **ivi:compilation**: ivi.ru compilations | ||||
|  - **ivideon**: Ivideon TV | ||||
|  - **Iwara** | ||||
|  - **Izlesene** | ||||
|  - **JeuxVideo** | ||||
|  - **Jove** | ||||
| @@ -323,6 +341,7 @@ | ||||
|  - **KarriereVideos** | ||||
|  - **keek** | ||||
|  - **KeezMovies** | ||||
|  - **Ketnet** | ||||
|  - **KhanAcademy** | ||||
|  - **KickStarter** | ||||
|  - **KonserthusetPlay** | ||||
| @@ -338,6 +357,7 @@ | ||||
|  - **kuwo:song**: 酷我音乐 | ||||
|  - **la7.it** | ||||
|  - **Laola1Tv** | ||||
|  - **LCI** | ||||
|  - **Lcp** | ||||
|  - **LcpPlay** | ||||
|  - **Le**: 乐视网 | ||||
| @@ -376,6 +396,7 @@ | ||||
|  - **Metacritic** | ||||
|  - **Mgoon** | ||||
|  - **MGTV**: 芒果TV | ||||
|  - **MiaoPai** | ||||
|  - **Minhateca** | ||||
|  - **MinistryGrid** | ||||
|  - **Minoto** | ||||
| @@ -397,11 +418,12 @@ | ||||
|  - **MovieClips** | ||||
|  - **MovieFap** | ||||
|  - **Moviezine** | ||||
|  - **MovingImage** | ||||
|  - **MPORA** | ||||
|  - **MSN** | ||||
|  - **mtg**: MTG services | ||||
|  - **MTV** | ||||
|  - **mtv.de** | ||||
|  - **mtviggy.com** | ||||
|  - **mtvservices:embedded** | ||||
|  - **MuenchenTV**: münchen.tv | ||||
|  - **MusicPlayOn** | ||||
| @@ -417,11 +439,13 @@ | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **natgeo** | ||||
|  - **natgeo:channel** | ||||
|  - **natgeo:episodeguide** | ||||
|  - **natgeo:video** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
|  - **NBCNews** | ||||
|  - **NBCOlympics** | ||||
|  - **NBCSports** | ||||
|  - **NBCSportsVPlayer** | ||||
|  - **ndr**: NDR.de - Norddeutscher Rundfunk | ||||
| @@ -441,9 +465,9 @@ | ||||
|  - **Newstube** | ||||
|  - **NextMedia**: 蘋果日報 | ||||
|  - **NextMediaActionNews**: 蘋果日報 - 動新聞 | ||||
|  - **nextmovie.com** | ||||
|  - **nfb**: National Film Board of Canada | ||||
|  - **nfl.com** | ||||
|  - **NhkVod** | ||||
|  - **nhl.com** | ||||
|  - **nhl.com:news**: NHL news | ||||
|  - **nhl.com:videocenter** | ||||
| @@ -452,7 +476,6 @@ | ||||
|  - **nick.de** | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
|  - **NineCNineMedia** | ||||
|  - **Nintendo** | ||||
|  - **njoy**: N-JOY | ||||
|  - **njoy:embed** | ||||
| @@ -510,7 +533,6 @@ | ||||
|  - **Pinkbike** | ||||
|  - **Pladform** | ||||
|  - **play.fm** | ||||
|  - **played.to** | ||||
|  - **PlaysTV** | ||||
|  - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz | ||||
|  - **Playvid** | ||||
| @@ -520,7 +542,10 @@ | ||||
|  - **plus.google**: Google Plus | ||||
|  - **pluzz.francetv.fr** | ||||
|  - **podomatic** | ||||
|  - **Pokemon** | ||||
|  - **PolskieRadio** | ||||
|  - **PolskieRadioCategory** | ||||
|  - **PornCom** | ||||
|  - **PornHd** | ||||
|  - **PornHub**: PornHub and Thumbzilla | ||||
|  - **PornHubPlaylist** | ||||
| @@ -560,10 +585,12 @@ | ||||
|  - **revision3:embed** | ||||
|  - **RICE** | ||||
|  - **RingTV** | ||||
|  - **RMCDecouverte** | ||||
|  - **RockstarGames** | ||||
|  - **RoosterTeeth** | ||||
|  - **RottenTomatoes** | ||||
|  - **Roxwel** | ||||
|  - **Rozhlas** | ||||
|  - **RTBF** | ||||
|  - **rte**: Raidió Teilifís Éireann TV | ||||
|  - **rte:radio**: Raidió Teilifís Éireann radio | ||||
| @@ -621,6 +648,7 @@ | ||||
|  - **smotri:user**: Smotri.com user videos | ||||
|  - **Snotr** | ||||
|  - **Sohu** | ||||
|  - **SonyLIV** | ||||
|  - **soundcloud** | ||||
|  - **soundcloud:playlist** | ||||
|  - **soundcloud:search**: Soundcloud search | ||||
| @@ -647,7 +675,6 @@ | ||||
|  - **sr:mediathek**: Saarländischer Rundfunk | ||||
|  - **SRGSSR** | ||||
|  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites | ||||
|  - **SSA** | ||||
|  - **stanfordoc**: Stanford Open ClassRoom | ||||
|  - **Steam** | ||||
|  - **Stitcher** | ||||
| @@ -663,8 +690,8 @@ | ||||
|  - **SztvHu** | ||||
|  - **Tagesschau** | ||||
|  - **tagesschau:player** | ||||
|  - **Tapely** | ||||
|  - **Tass** | ||||
|  - **TBS** | ||||
|  - **TDSLifeway** | ||||
|  - **teachertube**: teachertube.com videos | ||||
|  - **teachertube:user:collection**: teachertube.com user and collection videos | ||||
| @@ -679,9 +706,11 @@ | ||||
|  - **Telecinco**: telecinco.es, cuatro.com and mediaset.es | ||||
|  - **Telegraaf** | ||||
|  - **TeleMB** | ||||
|  - **TeleQuebec** | ||||
|  - **TeleTask** | ||||
|  - **Telewebion** | ||||
|  - **TF1** | ||||
|  - **TFO** | ||||
|  - **TheIntercept** | ||||
|  - **ThePlatform** | ||||
|  - **ThePlatformFeed** | ||||
| @@ -690,8 +719,6 @@ | ||||
|  - **TheStar** | ||||
|  - **ThisAmericanLife** | ||||
|  - **ThisAV** | ||||
|  - **THVideo** | ||||
|  - **THVideoPlaylist** | ||||
|  - **tinypic**: tinypic.com videos | ||||
|  - **tlc.de** | ||||
|  - **TMZ** | ||||
| @@ -699,13 +726,13 @@ | ||||
|  - **TNAFlix** | ||||
|  - **TNAFlixNetworkEmbed** | ||||
|  - **toggle** | ||||
|  - **Tosh**: Tosh.0 | ||||
|  - **tou.tv** | ||||
|  - **Toypics**: Toypics user profile | ||||
|  - **ToypicsUser**: Toypics user profile | ||||
|  - **TrailerAddict** (Currently broken) | ||||
|  - **Trilulilu** | ||||
|  - **trollvids** | ||||
|  - **TruTube** | ||||
|  - **TruTV** | ||||
|  - **Tube8** | ||||
|  - **TubiTv** | ||||
|  - **tudou** | ||||
| @@ -727,9 +754,10 @@ | ||||
|  - **TVCArticle** | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvland.com** | ||||
|  - **TVNoe** | ||||
|  - **tvp**: Telewizja Polska | ||||
|  - **tvp:embed**: Telewizja Polska | ||||
|  - **tvp:series** | ||||
|  - **TVPlay**: TV3Play and related services | ||||
|  - **Tweakers** | ||||
|  - **twitch:chapter** | ||||
|  - **twitch:clips** | ||||
| @@ -745,8 +773,12 @@ | ||||
|  - **udemy:course** | ||||
|  - **UDNEmbed**: 聯合影音 | ||||
|  - **Unistra** | ||||
|  - **uol.com.br** | ||||
|  - **uplynk** | ||||
|  - **uplynk:preplay** | ||||
|  - **Urort**: NRK P3 Urørt | ||||
|  - **URPlay** | ||||
|  - **USANetwork** | ||||
|  - **USAToday** | ||||
|  - **ustream** | ||||
|  - **ustream:channel** | ||||
| @@ -762,7 +794,9 @@ | ||||
|  - **VevoPlaylist** | ||||
|  - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet | ||||
|  - **vh1.com** | ||||
|  - **Viafree** | ||||
|  - **Vice** | ||||
|  - **Viceland** | ||||
|  - **ViceShow** | ||||
|  - **Vidbit** | ||||
|  - **Viddler** | ||||
| @@ -807,6 +841,7 @@ | ||||
|  - **vk:wallpost** | ||||
|  - **vlive** | ||||
|  - **Vodlocker** | ||||
|  - **VODPlatform** | ||||
|  - **VoiceRepublic** | ||||
|  - **VoxMedia** | ||||
|  - **Vporn** | ||||
| @@ -881,6 +916,4 @@ | ||||
|  - **Zapiks** | ||||
|  - **ZDF** | ||||
|  - **ZDFChannel** | ||||
|  - **zingmp3:album**: mp3.zing.vn albums | ||||
|  - **zingmp3:song**: mp3.zing.vn songs | ||||
|  - **ZippCast** | ||||
|  - **zingmp3**: mp3.zing.vn | ||||
|   | ||||
| @@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertEqual(ie._og_search_property('foobar', html), 'Foo') | ||||
|         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') | ||||
|         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') | ||||
|         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') | ||||
|         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) | ||||
|         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) | ||||
|  | ||||
|     def test_html_search_meta(self): | ||||
|         ie = self.ie | ||||
|   | ||||
| @@ -39,9 +39,12 @@ from youtube_dl.utils import ( | ||||
|     is_html, | ||||
|     js_to_json, | ||||
|     limit_length, | ||||
|     mimetype2ext, | ||||
|     month_by_name, | ||||
|     ohdave_rsa_encrypt, | ||||
|     OnDemandPagedList, | ||||
|     orderedSet, | ||||
|     parse_age_limit, | ||||
|     parse_duration, | ||||
|     parse_filesize, | ||||
|     parse_count, | ||||
| @@ -308,6 +311,7 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) | ||||
|         self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) | ||||
|         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) | ||||
|         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) | ||||
|  | ||||
|     def test_determine_ext(self): | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') | ||||
| @@ -431,6 +435,20 @@ class TestUtil(unittest.TestCase): | ||||
|             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), | ||||
|             'trailer.mp4') | ||||
|  | ||||
|     def test_parse_age_limit(self): | ||||
|         self.assertEqual(parse_age_limit(None), None) | ||||
|         self.assertEqual(parse_age_limit(False), None) | ||||
|         self.assertEqual(parse_age_limit('invalid'), None) | ||||
|         self.assertEqual(parse_age_limit(0), 0) | ||||
|         self.assertEqual(parse_age_limit(18), 18) | ||||
|         self.assertEqual(parse_age_limit(21), 21) | ||||
|         self.assertEqual(parse_age_limit(22), None) | ||||
|         self.assertEqual(parse_age_limit('18'), 18) | ||||
|         self.assertEqual(parse_age_limit('18+'), 18) | ||||
|         self.assertEqual(parse_age_limit('PG-13'), 13) | ||||
|         self.assertEqual(parse_age_limit('TV-14'), 14) | ||||
|         self.assertEqual(parse_age_limit('TV-MA'), 17) | ||||
|  | ||||
|     def test_parse_duration(self): | ||||
|         self.assertEqual(parse_duration(None), None) | ||||
|         self.assertEqual(parse_duration(False), None) | ||||
| @@ -609,6 +627,22 @@ class TestUtil(unittest.TestCase): | ||||
|             limit_length('foo bar baz asd', 12).startswith('foo bar')) | ||||
|         self.assertTrue('...' in limit_length('foo bar baz asd', 12)) | ||||
|  | ||||
|     def test_mimetype2ext(self): | ||||
|         self.assertEqual(mimetype2ext(None), None) | ||||
|         self.assertEqual(mimetype2ext('video/x-flv'), 'flv') | ||||
|         self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') | ||||
|         self.assertEqual(mimetype2ext('text/vtt'), 'vtt') | ||||
|         self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') | ||||
|         self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') | ||||
|  | ||||
|     def test_month_by_name(self): | ||||
|         self.assertEqual(month_by_name(None), None) | ||||
|         self.assertEqual(month_by_name('December', 'en'), 12) | ||||
|         self.assertEqual(month_by_name('décembre', 'fr'), 12) | ||||
|         self.assertEqual(month_by_name('December'), 12) | ||||
|         self.assertEqual(month_by_name('décembre'), None) | ||||
|         self.assertEqual(month_by_name('Unknown', 'unknown'), None) | ||||
|  | ||||
|     def test_parse_codecs(self): | ||||
|         self.assertEqual(parse_codecs(''), {}) | ||||
|         self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { | ||||
| @@ -696,6 +730,9 @@ class TestUtil(unittest.TestCase): | ||||
|         inp = '''{"foo":101}''' | ||||
|         self.assertEqual(js_to_json(inp), '''{"foo":101}''') | ||||
|  | ||||
|         inp = '''{"duration": "00:01:07"}''' | ||||
|         self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') | ||||
|  | ||||
|     def test_js_to_json_edgecases(self): | ||||
|         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") | ||||
|         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) | ||||
| @@ -801,7 +838,10 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_filesize('2 MiB'), 2097152) | ||||
|         self.assertEqual(parse_filesize('5 GB'), 5000000000) | ||||
|         self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) | ||||
|         self.assertEqual(parse_filesize('1.2tb'), 1200000000000) | ||||
|         self.assertEqual(parse_filesize('1,24 KB'), 1240) | ||||
|         self.assertEqual(parse_filesize('1,24 kb'), 1240) | ||||
|         self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) | ||||
|  | ||||
|     def test_parse_count(self): | ||||
|         self.assertEqual(parse_count(None), None) | ||||
| @@ -952,6 +992,7 @@ The first line | ||||
|         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) | ||||
|         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) | ||||
|         self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) | ||||
|         self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10']) | ||||
|  | ||||
|     def test_cli_valueless_option(self): | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|   | ||||
							
								
								
									
										70
									
								
								test/test_verbose_output.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								test/test_verbose_output.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| #!/usr/bin/env python | ||||
| # coding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import unittest | ||||
|  | ||||
| import sys | ||||
| import os | ||||
| import subprocess | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||||
|  | ||||
|  | ||||
| class TestVerboseOutput(unittest.TestCase): | ||||
|     def test_private_info_arg(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '--username', 'johnsmith@gmail.com', | ||||
|                 '--password', 'secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue(b'--username' in serr) | ||||
|         self.assertTrue(b'johnsmith' not in serr) | ||||
|         self.assertTrue(b'--password' in serr) | ||||
|         self.assertTrue(b'secret' not in serr) | ||||
|  | ||||
|     def test_private_info_shortarg(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '-u', 'johnsmith@gmail.com', | ||||
|                 '-p', 'secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue(b'-u' in serr) | ||||
|         self.assertTrue(b'johnsmith' not in serr) | ||||
|         self.assertTrue(b'-p' in serr) | ||||
|         self.assertTrue(b'secret' not in serr) | ||||
|  | ||||
|     def test_private_info_eq(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '--username=johnsmith@gmail.com', | ||||
|                 '--password=secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue(b'--username' in serr) | ||||
|         self.assertTrue(b'johnsmith' not in serr) | ||||
|         self.assertTrue(b'--password' in serr) | ||||
|         self.assertTrue(b'secret' not in serr) | ||||
|  | ||||
|     def test_private_info_shortarg_eq(self): | ||||
|         outp = subprocess.Popen( | ||||
|             [ | ||||
|                 sys.executable, 'youtube_dl/__main__.py', '-v', | ||||
|                 '-u=johnsmith@gmail.com', | ||||
|                 '-p=secret', | ||||
|             ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         sout, serr = outp.communicate() | ||||
|         self.assertTrue(b'-u' in serr) | ||||
|         self.assertTrue(b'johnsmith' not in serr) | ||||
|         self.assertTrue(b'-p' in serr) | ||||
|         self.assertTrue(b'secret' not in serr) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -131,6 +131,9 @@ class YoutubeDL(object): | ||||
|     username:          Username for authentication purposes. | ||||
|     password:          Password for authentication purposes. | ||||
|     videopassword:     Password for accessing a video. | ||||
|     ap_mso:            Adobe Pass multiple-system operator identifier. | ||||
|     ap_username:       Multiple-system operator account username. | ||||
|     ap_password:       Multiple-system operator account password. | ||||
|     usenetrc:          Use netrc for authentication instead. | ||||
|     verbose:           Print additional info to stdout. | ||||
|     quiet:             Do not print messages to stdout. | ||||
| @@ -249,7 +252,16 @@ class YoutubeDL(object): | ||||
|     source_address:    (Experimental) Client-side IP address to bind to. | ||||
|     call_home:         Boolean, true iff we are allowed to contact the | ||||
|                        youtube-dl servers for debugging. | ||||
|     sleep_interval:    Number of seconds to sleep before each download. | ||||
|     sleep_interval:    Number of seconds to sleep before each download when | ||||
|                        used alone or a lower bound of a range for randomized | ||||
|                        sleep before each download (minimum possible number | ||||
|                        of seconds to sleep) when used along with | ||||
|                        max_sleep_interval. | ||||
|     max_sleep_interval:Upper bound of a range for randomized sleep before each | ||||
|                        download (maximum possible number of seconds to sleep). | ||||
|                        Must only be used along with sleep_interval. | ||||
|                        Actual sleep time will be a random float from range | ||||
|                        [sleep_interval; max_sleep_interval]. | ||||
|     listformats:       Print an overview of available video formats and exit. | ||||
|     list_thumbnails:   Print a table of all thumbnails and exit. | ||||
|     match_filter:      A function that gets called with the info_dict of | ||||
| @@ -1247,8 +1259,10 @@ class YoutubeDL(object): | ||||
|                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] | ||||
|         if thumbnails: | ||||
|             thumbnails.sort(key=lambda t: ( | ||||
|                 t.get('preference'), t.get('width'), t.get('height'), | ||||
|                 t.get('id'), t.get('url'))) | ||||
|                 t.get('preference') if t.get('preference') is not None else -1, | ||||
|                 t.get('width') if t.get('width') is not None else -1, | ||||
|                 t.get('height') if t.get('height') is not None else -1, | ||||
|                 t.get('id') if t.get('id') is not None else '', t.get('url'))) | ||||
|             for i, t in enumerate(thumbnails): | ||||
|                 t['url'] = sanitize_url(t['url']) | ||||
|                 if t.get('width') and t.get('height'): | ||||
| @@ -1290,7 +1304,7 @@ class YoutubeDL(object): | ||||
|                 for subtitle_format in subtitle: | ||||
|                     if subtitle_format.get('url'): | ||||
|                         subtitle_format['url'] = sanitize_url(subtitle_format['url']) | ||||
|                     if 'ext' not in subtitle_format: | ||||
|                     if subtitle_format.get('ext') is None: | ||||
|                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() | ||||
|  | ||||
|         if self.params.get('listsubtitles', False): | ||||
| @@ -1345,7 +1359,7 @@ class YoutubeDL(object): | ||||
|                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', | ||||
|                 ) | ||||
|             # Automatically determine file extension if missing | ||||
|             if 'ext' not in format: | ||||
|             if format.get('ext') is None: | ||||
|                 format['ext'] = determine_ext(format['url']).lower() | ||||
|             # Automatically determine protocol if missing (useful for format | ||||
|             # selection purposes) | ||||
| @@ -1594,7 +1608,9 @@ class YoutubeDL(object): | ||||
|                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) | ||||
|                     else: | ||||
|                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename) | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                         # Use newline='' to prevent conversion of newline characters | ||||
|                         # See https://github.com/rg3/youtube-dl/issues/10268 | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: | ||||
|                             subfile.write(sub_data) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error('Cannot write subtitles file ' + sub_filename) | ||||
|   | ||||
| @@ -34,12 +34,14 @@ from .utils import ( | ||||
|     setproctitle, | ||||
|     std_headers, | ||||
|     write_string, | ||||
|     render_table, | ||||
| ) | ||||
| from .update import update_self | ||||
| from .downloader import ( | ||||
|     FileDownloader, | ||||
| ) | ||||
| from .extractor import gen_extractors, list_extractors | ||||
| from .extractor.adobepass import MSO_INFO | ||||
| from .YoutubeDL import YoutubeDL | ||||
|  | ||||
|  | ||||
| @@ -118,18 +120,26 @@ def _real_main(argv=None): | ||||
|                 desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) | ||||
|             write_string(desc + '\n', out=sys.stdout) | ||||
|         sys.exit(0) | ||||
|     if opts.ap_list_mso: | ||||
|         table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] | ||||
|         write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) | ||||
|         sys.exit(0) | ||||
|  | ||||
|     # Conflicting, missing and erroneous options | ||||
|     if opts.usenetrc and (opts.username is not None or opts.password is not None): | ||||
|         parser.error('using .netrc conflicts with giving username/password') | ||||
|     if opts.password is not None and opts.username is None: | ||||
|         parser.error('account username missing\n') | ||||
|     if opts.ap_password is not None and opts.ap_username is None: | ||||
|         parser.error('TV Provider account username missing\n') | ||||
|     if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): | ||||
|         parser.error('using output template conflicts with using title, video ID or auto number') | ||||
|     if opts.usetitle and opts.useid: | ||||
|         parser.error('using title conflicts with using video ID') | ||||
|     if opts.username is not None and opts.password is None: | ||||
|         opts.password = compat_getpass('Type account password and press [Return]: ') | ||||
|     if opts.ap_username is not None and opts.ap_password is None: | ||||
|         opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') | ||||
|     if opts.ratelimit is not None: | ||||
|         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) | ||||
|         if numeric_limit is None: | ||||
| @@ -145,6 +155,18 @@ def _real_main(argv=None): | ||||
|         if numeric_limit is None: | ||||
|             parser.error('invalid max_filesize specified') | ||||
|         opts.max_filesize = numeric_limit | ||||
|     if opts.sleep_interval is not None: | ||||
|         if opts.sleep_interval < 0: | ||||
|             parser.error('sleep interval must be positive or 0') | ||||
|     if opts.max_sleep_interval is not None: | ||||
|         if opts.max_sleep_interval < 0: | ||||
|             parser.error('max sleep interval must be positive or 0') | ||||
|         if opts.max_sleep_interval < opts.sleep_interval: | ||||
|             parser.error('max sleep interval must be greater than or equal to min sleep interval') | ||||
|     else: | ||||
|         opts.max_sleep_interval = opts.sleep_interval | ||||
|     if opts.ap_mso and opts.ap_mso not in MSO_INFO: | ||||
|         parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') | ||||
|  | ||||
|     def parse_retries(retries): | ||||
|         if retries in ('inf', 'infinite'): | ||||
| @@ -283,6 +305,9 @@ def _real_main(argv=None): | ||||
|         'password': opts.password, | ||||
|         'twofactor': opts.twofactor, | ||||
|         'videopassword': opts.videopassword, | ||||
|         'ap_mso': opts.ap_mso, | ||||
|         'ap_username': opts.ap_username, | ||||
|         'ap_password': opts.ap_password, | ||||
|         'quiet': (opts.quiet or any_getting or any_printing), | ||||
|         'no_warnings': opts.no_warnings, | ||||
|         'forceurl': opts.geturl, | ||||
| @@ -308,6 +333,7 @@ def _real_main(argv=None): | ||||
|         'nooverwrites': opts.nooverwrites, | ||||
|         'retries': opts.retries, | ||||
|         'fragment_retries': opts.fragment_retries, | ||||
|         'skip_unavailable_fragments': opts.skip_unavailable_fragments, | ||||
|         'buffersize': opts.buffersize, | ||||
|         'noresizebuffer': opts.noresizebuffer, | ||||
|         'continuedl': opts.continue_dl, | ||||
| @@ -370,6 +396,7 @@ def _real_main(argv=None): | ||||
|         'source_address': opts.source_address, | ||||
|         'call_home': opts.call_home, | ||||
|         'sleep_interval': opts.sleep_interval, | ||||
|         'max_sleep_interval': opts.max_sleep_interval, | ||||
|         'external_downloader': opts.external_downloader, | ||||
|         'list_thumbnails': opts.list_thumbnails, | ||||
|         'playlist_items': opts.playlist_items, | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import os | ||||
| import re | ||||
| import sys | ||||
| import time | ||||
| import random | ||||
|  | ||||
| from ..compat import compat_os_name | ||||
| from ..utils import ( | ||||
| @@ -342,8 +343,11 @@ class FileDownloader(object): | ||||
|             }) | ||||
|             return True | ||||
|  | ||||
|         sleep_interval = self.params.get('sleep_interval') | ||||
|         if sleep_interval: | ||||
|         min_sleep_interval = self.params.get('sleep_interval') | ||||
|         if min_sleep_interval: | ||||
|             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) | ||||
|             print(min_sleep_interval, max_sleep_interval) | ||||
|             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) | ||||
|             self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) | ||||
|             time.sleep(sleep_interval) | ||||
|  | ||||
|   | ||||
| @@ -38,8 +38,10 @@ class DashSegmentsFD(FragmentFD): | ||||
|         segments_filenames = [] | ||||
|  | ||||
|         fragment_retries = self.params.get('fragment_retries', 0) | ||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||
|  | ||||
|         def append_url_to_file(target_url, tmp_filename, segment_name): | ||||
|         def process_segment(segment, tmp_filename, fatal): | ||||
|             target_url, segment_name = segment | ||||
|             target_filename = '%s-%s' % (tmp_filename, segment_name) | ||||
|             count = 0 | ||||
|             while count <= fragment_retries: | ||||
| @@ -52,26 +54,35 @@ class DashSegmentsFD(FragmentFD): | ||||
|                     down.close() | ||||
|                     segments_filenames.append(target_sanitized) | ||||
|                     break | ||||
|                 except (compat_urllib_error.HTTPError, ) as err: | ||||
|                 except compat_urllib_error.HTTPError as err: | ||||
|                     # YouTube may often return 404 HTTP error for a fragment causing the | ||||
|                     # whole download to fail. However if the same fragment is immediately | ||||
|                     # retried with the same request data this usually succeeds (1-2 attemps | ||||
|                     # is usually enough) thus allowing to download the whole file successfully. | ||||
|                     # So, we will retry all fragments that fail with 404 HTTP error for now. | ||||
|                     if err.code != 404: | ||||
|                         raise | ||||
|                     # Retry fragment | ||||
|                     # To be future-proof we will retry all fragments that fail with any | ||||
|                     # HTTP error. | ||||
|                     count += 1 | ||||
|                     if count <= fragment_retries: | ||||
|                         self.report_retry_fragment(segment_name, count, fragment_retries) | ||||
|                         self.report_retry_fragment(err, segment_name, count, fragment_retries) | ||||
|             if count > fragment_retries: | ||||
|                 if not fatal: | ||||
|                     self.report_skip_fragment(segment_name) | ||||
|                     return True | ||||
|                 self.report_error('giving up after %s fragment retries' % fragment_retries) | ||||
|                 return False | ||||
|             return True | ||||
|  | ||||
|         if initialization_url: | ||||
|             append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init') | ||||
|         for i, segment_url in enumerate(segment_urls): | ||||
|             append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i) | ||||
|         segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] | ||||
|         segments_to_download.extend([ | ||||
|             (segment_url, 'Seg%d' % i) | ||||
|             for i, segment_url in enumerate(segment_urls)]) | ||||
|  | ||||
|         for i, segment in enumerate(segments_to_download): | ||||
|             # In DASH, the first segment contains necessary headers to | ||||
|             # generate a valid MP4 file, so always abort for the first segment | ||||
|             fatal = i == 0 or not skip_unavailable_fragments | ||||
|             if not process_segment(segment, ctx['tmpfilename'], fatal): | ||||
|                 return False | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|   | ||||
| @@ -96,6 +96,12 @@ class CurlFD(ExternalFD): | ||||
|         cmd = [self.exe, '--location', '-o', tmpfilename] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') | ||||
|         cmd += self._valueless_option('--silent', 'noprogress') | ||||
|         cmd += self._valueless_option('--verbose', 'verbose') | ||||
|         cmd += self._option('--limit-rate', 'ratelimit') | ||||
|         cmd += self._option('--retry', 'retries') | ||||
|         cmd += self._option('--max-filesize', 'max_filesize') | ||||
|         cmd += self._option('--interface', 'source_address') | ||||
|         cmd += self._option('--proxy', 'proxy') | ||||
|         cmd += self._valueless_option('--insecure', 'nocheckcertificate') | ||||
| @@ -103,6 +109,16 @@ class CurlFD(ExternalFD): | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] | ||||
|  | ||||
|         self._debug_cmd(cmd) | ||||
|  | ||||
|         # curl writes the progress to stderr so don't capture it. | ||||
|         p = subprocess.Popen(cmd) | ||||
|         p.communicate() | ||||
|         return p.returncode | ||||
|  | ||||
|  | ||||
| class AxelFD(ExternalFD): | ||||
|     AVAILABLE_OPT = '-V' | ||||
| @@ -204,6 +220,12 @@ class FFmpegFD(ExternalFD): | ||||
|         if proxy: | ||||
|             if not re.match(r'^[\da-zA-Z]+://', proxy): | ||||
|                 proxy = 'http://%s' % proxy | ||||
|  | ||||
|             if proxy.startswith('socks'): | ||||
|                 self.report_warning( | ||||
|                     '%s does not support SOCKS proxies. Downloading is likely to fail. ' | ||||
|                     'Consider adding --hls-prefer-native to your command.' % self.get_basename()) | ||||
|  | ||||
|             # Since December 2015 ffmpeg supports -http_proxy option (see | ||||
|             # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) | ||||
|             # We could switch to the following code if we are able to detect version properly | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import time | ||||
| from .common import FileDownloader | ||||
| from .http import HttpFD | ||||
| from ..utils import ( | ||||
|     error_to_compat_str, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
| ) | ||||
| @@ -22,13 +23,19 @@ class FragmentFD(FileDownloader): | ||||
|  | ||||
|     Available options: | ||||
|  | ||||
|     fragment_retries:   Number of times to retry a fragment for HTTP error (DASH only) | ||||
|     fragment_retries:   Number of times to retry a fragment for HTTP error (DASH | ||||
|                         and hlsnative only) | ||||
|     skip_unavailable_fragments: | ||||
|                         Skip unavailable fragments (DASH and hlsnative only) | ||||
|     """ | ||||
|  | ||||
|     def report_retry_fragment(self, fragment_name, count, retries): | ||||
|     def report_retry_fragment(self, err, fragment_name, count, retries): | ||||
|         self.to_screen( | ||||
|             '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...' | ||||
|             % (fragment_name, count, self.format_retries(retries))) | ||||
|             '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' | ||||
|             % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) | ||||
|  | ||||
|     def report_skip_fragment(self, fragment_name): | ||||
|         self.to_screen('[download] Skipping fragment %s...' % fragment_name) | ||||
|  | ||||
|     def _prepare_and_start_frag_download(self, ctx): | ||||
|         self._prepare_frag_download(ctx) | ||||
|   | ||||
| @@ -13,6 +13,7 @@ from .fragment import FragmentFD | ||||
| from .external import FFmpegFD | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urlparse, | ||||
|     compat_struct_pack, | ||||
| ) | ||||
| @@ -20,6 +21,7 @@ from ..utils import ( | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     parse_m3u8_attributes, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -82,6 +84,14 @@ class HlsFD(FragmentFD): | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         fragment_retries = self.params.get('fragment_retries', 0) | ||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||
|         test = self.params.get('test', False) | ||||
|  | ||||
|         extra_query = None | ||||
|         extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') | ||||
|         if extra_param_to_segment_url: | ||||
|             extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) | ||||
|         i = 0 | ||||
|         media_sequence = 0 | ||||
|         decrypt_info = {'METHOD': 'NONE'} | ||||
| @@ -94,13 +104,37 @@ class HlsFD(FragmentFD): | ||||
|                         line | ||||
|                         if re.match(r'^https?://', line) | ||||
|                         else compat_urlparse.urljoin(man_url, line)) | ||||
|                     frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|                     frag_name = 'Frag%d' % i | ||||
|                     frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name) | ||||
|                     if extra_query: | ||||
|                         frag_url = update_url_query(frag_url, extra_query) | ||||
|                     count = 0 | ||||
|                     while count <= fragment_retries: | ||||
|                         try: | ||||
|                             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|                             if not success: | ||||
|                                 return False | ||||
|                             down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|                             frag_content = down.read() | ||||
|                             down.close() | ||||
|                             break | ||||
|                         except compat_urllib_error.HTTPError as err: | ||||
|                             # Unavailable (possibly temporary) fragments may be served. | ||||
|                             # First we try to retry then either skip or abort. | ||||
|                             # See https://github.com/rg3/youtube-dl/issues/10165, | ||||
|                             # https://github.com/rg3/youtube-dl/issues/10448). | ||||
|                             count += 1 | ||||
|                             if count <= fragment_retries: | ||||
|                                 self.report_retry_fragment(err, frag_name, count, fragment_retries) | ||||
|                     if count > fragment_retries: | ||||
|                         if skip_unavailable_fragments: | ||||
|                             i += 1 | ||||
|                             media_sequence += 1 | ||||
|                             self.report_skip_fragment(frag_name) | ||||
|                             continue | ||||
|                         self.report_error( | ||||
|                             'giving up after %s fragment retries' % fragment_retries) | ||||
|                         return False | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | ||||
|                         frag_content = AES.new( | ||||
| @@ -108,7 +142,7 @@ class HlsFD(FragmentFD): | ||||
|                     ctx['dest_stream'].write(frag_content) | ||||
|                     frags_filenames.append(frag_sanitized) | ||||
|                     # We only download the first fragment during the test | ||||
|                     if self.params.get('test', False): | ||||
|                     if test: | ||||
|                         break | ||||
|                     i += 1 | ||||
|                     media_sequence += 1 | ||||
| @@ -116,10 +150,12 @@ class HlsFD(FragmentFD): | ||||
|                     decrypt_info = parse_m3u8_attributes(line[11:]) | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         if 'IV' in decrypt_info: | ||||
|                             decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) | ||||
|                             decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) | ||||
|                         if not re.match(r'^https?://', decrypt_info['URI']): | ||||
|                             decrypt_info['URI'] = compat_urlparse.urljoin( | ||||
|                                 man_url, decrypt_info['URI']) | ||||
|                         if extra_query: | ||||
|                             decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | ||||
|                         decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() | ||||
|                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | ||||
|                     media_sequence = int(line[22:]) | ||||
|   | ||||
| @@ -7,12 +7,13 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     js_to_json, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ABCIE(InfoExtractor): | ||||
|     IE_NAME = 'abc.net.au' | ||||
|     _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', | ||||
| @@ -93,3 +94,59 @@ class ABCIE(InfoExtractor): | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ABCIViewIE(InfoExtractor): | ||||
|     IE_NAME = 'abc.net.au:iview' | ||||
|     _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)' | ||||
|  | ||||
|     # ABC iview programs are normally available for 14 days only. | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', | ||||
|         'md5': '979d10b2939101f0d27a06b79edad536', | ||||
|         'info_dict': { | ||||
|             'id': 'FA1505V024S00', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Series 27 Ep 24', | ||||
|             'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', | ||||
|             'upload_date': '20160820', | ||||
|             'uploader_id': 'abc1', | ||||
|             'timestamp': 1471719600, | ||||
|         }, | ||||
|         'skip': 'Video gone', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_params = self._parse_json(self._search_regex( | ||||
|             r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) | ||||
|         title = video_params['title'] | ||||
|         stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') | ||||
|  | ||||
|         formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         src_vtt = stream.get('captions', {}).get('src-vtt') | ||||
|         if src_vtt: | ||||
|             subtitles['en'] = [{ | ||||
|                 'url': src_vtt, | ||||
|                 'ext': 'vtt', | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), | ||||
|             'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage), | ||||
|             'duration': int_or_none(video_params.get('eventDuration')), | ||||
|             'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), | ||||
|             'series': video_params.get('seriesTitle'), | ||||
|             'series_id': video_params.get('seriesHouseNumber') or video_id[:7], | ||||
|             'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), | ||||
|             'episode': self._html_search_meta('episode_title', webpage), | ||||
|             'uploader_id': video_params.get('channel'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from ..compat import compat_urlparse | ||||
|  | ||||
| class AbcNewsVideoIE(AMPIE): | ||||
|     IE_NAME = 'abcnews:video' | ||||
|     _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', | ||||
| @@ -49,7 +49,7 @@ class AbcNewsVideoIE(AMPIE): | ||||
|  | ||||
| class AbcNewsIE(InfoExtractor): | ||||
|     IE_NAME = 'abcnews' | ||||
|     _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', | ||||
|   | ||||
| @@ -1,13 +1,19 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_iso8601 | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class Abc7NewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)' | ||||
| class ABCOTVSIE(InfoExtractor): | ||||
|     IE_NAME = 'abcotvs' | ||||
|     IE_DESC = 'ABC Owned Television Stations' | ||||
|     _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', | ||||
| @@ -15,7 +21,7 @@ class Abc7NewsIE(InfoExtractor): | ||||
|                 'id': '472581', | ||||
|                 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'East Bay museum celebrates history of synthesized music', | ||||
|                 'title': 'East Bay museum celebrates vintage synthesizers', | ||||
|                 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'timestamp': 1421123075, | ||||
| @@ -41,7 +47,7 @@ class Abc7NewsIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
| 
 | ||||
|         m3u8 = self._html_search_meta( | ||||
|             'contentURL', webpage, 'm3u8 url', fatal=True) | ||||
|             'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0] | ||||
| 
 | ||||
|         formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
| @@ -66,3 +72,41 @@ class Abc7NewsIE(InfoExtractor): | ||||
|             'uploader': uploader, | ||||
|             'formats': formats, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class ABCOTVSClipsIE(InfoExtractor): | ||||
|     IE_NAME = 'abcotvs:clips' | ||||
|     _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://clips.abcotvs.com/kabc/video/214814', | ||||
|         'info_dict': { | ||||
|             'id': '214814', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'SpaceX launch pad explosion destroys rocket, satellite', | ||||
|             'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b', | ||||
|             'upload_date': '20160901', | ||||
|             'timestamp': 1472756695, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0] | ||||
|         title = video_data['title'] | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             video_data['videoURL'].split('?')[0], video_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video_data.get('description'), | ||||
|             'thumbnail': video_data.get('thumbnailURL'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': int_or_none(video_data.get('pubDate')), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										174
									
								
								youtube_dl/extractor/adobepass.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										174
									
								
								youtube_dl/extractor/adobepass.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,174 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
| import xml.etree.ElementTree as etree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     urlencode_postdata, | ||||
|     unified_timestamp, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| MSO_INFO = { | ||||
|     'DTV': { | ||||
|         'name': 'DirecTV', | ||||
|         'username_field': 'username', | ||||
|         'password_field': 'password', | ||||
|     }, | ||||
|     'Rogers': { | ||||
|         'name': 'Rogers Cable', | ||||
|         'username_field': 'UserName', | ||||
|         'password_field': 'UserPassword', | ||||
|     }, | ||||
| } | ||||
|  | ||||
|  | ||||
| class AdobePassIE(InfoExtractor): | ||||
|     _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' | ||||
|     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' | ||||
|     _MVPD_CACHE = 'ap-mvpd' | ||||
|  | ||||
|     @staticmethod | ||||
|     def _get_mvpd_resource(provider_id, title, guid, rating): | ||||
|         channel = etree.Element('channel') | ||||
|         channel_title = etree.SubElement(channel, 'title') | ||||
|         channel_title.text = provider_id | ||||
|         item = etree.SubElement(channel, 'item') | ||||
|         resource_title = etree.SubElement(item, 'title') | ||||
|         resource_title.text = title | ||||
|         resource_guid = etree.SubElement(item, 'guid') | ||||
|         resource_guid.text = guid | ||||
|         resource_rating = etree.SubElement(item, 'media:rating') | ||||
|         resource_rating.attrib = {'scheme': 'urn:v-chip'} | ||||
|         resource_rating.text = rating | ||||
|         return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>' | ||||
|  | ||||
|     def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): | ||||
|         def xml_text(xml_str, tag): | ||||
|             return self._search_regex( | ||||
|                 '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag) | ||||
|  | ||||
|         def is_expired(token, date_ele): | ||||
|             token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) | ||||
|             return token_expires and token_expires <= int(time.time()) | ||||
|  | ||||
|         def post_form(form_page_res, note, data={}): | ||||
|             form_page, urlh = form_page_res | ||||
|             post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url') | ||||
|             if not re.match(r'https?://', post_url): | ||||
|                 post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) | ||||
|             form_data = self._hidden_inputs(form_page) | ||||
|             form_data.update(data) | ||||
|             return self._download_webpage_handle( | ||||
|                 post_url, video_id, note, data=urlencode_postdata(form_data), headers={ | ||||
|                     'Content-Type': 'application/x-www-form-urlencoded', | ||||
|                 }) | ||||
|  | ||||
|         def raise_mvpd_required(): | ||||
|             raise ExtractorError( | ||||
|                 'This video is only available for users of participating TV providers. ' | ||||
|                 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' | ||||
|                 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) | ||||
|  | ||||
|         mvpd_headers = { | ||||
|             'ap_42': 'anonymous', | ||||
|             'ap_11': 'Linux i686', | ||||
|             'ap_z': self._USER_AGENT, | ||||
|             'User-Agent': self._USER_AGENT, | ||||
|         } | ||||
|  | ||||
|         guid = xml_text(resource, 'guid') | ||||
|         count = 0 | ||||
|         while count < 2: | ||||
|             requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} | ||||
|             authn_token = requestor_info.get('authn_token') | ||||
|             if authn_token and is_expired(authn_token, 'simpleTokenExpires'): | ||||
|                 authn_token = None | ||||
|             if not authn_token: | ||||
|                 # TODO add support for other TV Providers | ||||
|                 mso_id = self._downloader.params.get('ap_mso') | ||||
|                 if not mso_id: | ||||
|                     raise_mvpd_required() | ||||
|                 username, password = self._get_login_info('ap_username', 'ap_password', mso_id) | ||||
|                 if not username or not password: | ||||
|                     raise_mvpd_required() | ||||
|                 mso_info = MSO_INFO[mso_id] | ||||
|  | ||||
|                 provider_redirect_page_res = self._download_webpage_handle( | ||||
|                     self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, | ||||
|                     'Downloading Provider Redirect Page', query={ | ||||
|                         'noflash': 'true', | ||||
|                         'mso_id': mso_id, | ||||
|                         'requestor_id': requestor_id, | ||||
|                         'no_iframe': 'false', | ||||
|                         'domain_name': 'adobe.com', | ||||
|                         'redirect_url': url, | ||||
|                     }) | ||||
|                 provider_login_page_res = post_form( | ||||
|                     provider_redirect_page_res, 'Downloading Provider Login Page') | ||||
|                 mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { | ||||
|                     mso_info['username_field']: username, | ||||
|                     mso_info['password_field']: password, | ||||
|                 }) | ||||
|                 if mso_id == 'DTV': | ||||
|                     post_form(mvpd_confirm_page_res, 'Confirming Login') | ||||
|  | ||||
|                 session = self._download_webpage( | ||||
|                     self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, | ||||
|                     'Retrieving Session', data=urlencode_postdata({ | ||||
|                         '_method': 'GET', | ||||
|                         'requestor_id': requestor_id, | ||||
|                     }), headers=mvpd_headers) | ||||
|                 if '<pendingLogout' in session: | ||||
|                     self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) | ||||
|                     count += 1 | ||||
|                     continue | ||||
|                 authn_token = unescapeHTML(xml_text(session, 'authnToken')) | ||||
|                 requestor_info['authn_token'] = authn_token | ||||
|                 self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) | ||||
|  | ||||
|             authz_token = requestor_info.get(guid) | ||||
|             if authz_token and is_expired(authz_token, 'simpleTokenTTL'): | ||||
|                 authz_token = None | ||||
|             if not authz_token: | ||||
|                 authorize = self._download_webpage( | ||||
|                     self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, | ||||
|                     'Retrieving Authorization Token', data=urlencode_postdata({ | ||||
|                         'resource_id': resource, | ||||
|                         'requestor_id': requestor_id, | ||||
|                         'authentication_token': authn_token, | ||||
|                         'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), | ||||
|                         'userMeta': '1', | ||||
|                     }), headers=mvpd_headers) | ||||
|                 if '<pendingLogout' in authorize: | ||||
|                     self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) | ||||
|                     count += 1 | ||||
|                     continue | ||||
|                 authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) | ||||
|                 requestor_info[guid] = authz_token | ||||
|                 self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) | ||||
|  | ||||
|             mvpd_headers.update({ | ||||
|                 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), | ||||
|                 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), | ||||
|             }) | ||||
|  | ||||
|             short_authorize = self._download_webpage( | ||||
|                 self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', | ||||
|                 video_id, 'Retrieving Media Token', data=urlencode_postdata({ | ||||
|                     'authz_token': authz_token, | ||||
|                     'requestor_id': requestor_id, | ||||
|                     'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), | ||||
|                     'hashed_guid': 'false', | ||||
|                 }), headers=mvpd_headers) | ||||
|             if '<pendingLogout' in short_authorize: | ||||
|                 self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) | ||||
|                 count += 1 | ||||
|                 continue | ||||
|             return short_authorize | ||||
| @@ -3,16 +3,14 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .turner import TurnerBaseIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AdultSwimIE(InfoExtractor): | ||||
| class AdultSwimIE(TurnerBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
| @@ -83,6 +81,21 @@ class AdultSwimIE(InfoExtractor): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # heroMetadata.trailer | ||||
|         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', | ||||
|         'info_dict': { | ||||
|             'id': 'I0LQFQkaSUaFp8PnAWHhoQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Decker - Inside Decker: A New Hero', | ||||
|             'description': 'md5:c916df071d425d62d70c86d4399d3ee0', | ||||
|             'duration': 249.008, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -133,79 +146,56 @@ class AdultSwimIE(InfoExtractor): | ||||
|             if video_info is None: | ||||
|                 if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: | ||||
|                     video_info = bootstrapped_data['slugged_video'] | ||||
|                 else: | ||||
|             if not video_info: | ||||
|                 video_info = bootstrapped_data.get( | ||||
|                     'heroMetadata', {}).get('trailer', {}).get('video') | ||||
|             if not video_info: | ||||
|                 video_info = bootstrapped_data.get('onlineOriginals', [None])[0] | ||||
|             if not video_info: | ||||
|                 raise ExtractorError('Unable to find video info') | ||||
|  | ||||
|             show = bootstrapped_data['show'] | ||||
|             show_title = show['title'] | ||||
|             stream = video_info.get('stream') | ||||
|             clips = [stream] if stream else video_info.get('clips') | ||||
|             if not clips: | ||||
|             if stream and stream.get('videoPlaybackID'): | ||||
|                 segment_ids = [stream['videoPlaybackID']] | ||||
|             elif video_info.get('clips'): | ||||
|                 segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] | ||||
|             elif video_info.get('videoPlaybackID'): | ||||
|                 segment_ids = [video_info['videoPlaybackID']] | ||||
|             else: | ||||
|                 if video_info.get('auth') is True: | ||||
|                     raise ExtractorError( | ||||
|                         'This video is only available via cable service provider subscription that' | ||||
|                     ' is not currently supported. You may want to use --cookies.' | ||||
|                     if video_info.get('auth') is True else 'Unable to find stream or clips', | ||||
|                     expected=True) | ||||
|             segment_ids = [clip['videoPlaybackID'] for clip in clips] | ||||
|                         ' is not currently supported. You may want to use --cookies.', expected=True) | ||||
|                 else: | ||||
|                     raise ExtractorError('Unable to find stream or clips') | ||||
|  | ||||
|         episode_id = video_info['id'] | ||||
|         episode_title = video_info['title'] | ||||
|         episode_description = video_info['description'] | ||||
|         episode_duration = video_info.get('duration') | ||||
|         episode_description = video_info.get('description') | ||||
|         episode_duration = int_or_none(video_info.get('duration')) | ||||
|         view_count = int_or_none(video_info.get('views')) | ||||
|  | ||||
|         entries = [] | ||||
|         for part_num, segment_id in enumerate(segment_ids): | ||||
|             segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id | ||||
|  | ||||
|             segement_info = self._extract_cvp_info( | ||||
|                 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id, | ||||
|                 segment_id, { | ||||
|                     'secure': { | ||||
|                         'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', | ||||
|                         'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', | ||||
|                     }, | ||||
|                 }) | ||||
|             segment_title = '%s - %s' % (show_title, episode_title) | ||||
|             if len(segment_ids) > 1: | ||||
|                 segment_title += ' Part %d' % (part_num + 1) | ||||
|  | ||||
|             idoc = self._download_xml( | ||||
|                 segment_url, segment_title, | ||||
|                 'Downloading segment information', 'Unable to download segment information') | ||||
|  | ||||
|             segment_duration = float_or_none( | ||||
|                 xpath_text(idoc, './/trt', 'segment duration').strip()) | ||||
|  | ||||
|             formats = [] | ||||
|             file_els = idoc.findall('.//files/file') or idoc.findall('./files/file') | ||||
|  | ||||
|             unique_urls = [] | ||||
|             unique_file_els = [] | ||||
|             for file_el in file_els: | ||||
|                 media_url = file_el.text | ||||
|                 if not media_url or determine_ext(media_url) == 'f4m': | ||||
|                     continue | ||||
|                 if file_el.text not in unique_urls: | ||||
|                     unique_urls.append(file_el.text) | ||||
|                     unique_file_els.append(file_el) | ||||
|  | ||||
|             for file_el in unique_file_els: | ||||
|                 bitrate = file_el.attrib.get('bitrate') | ||||
|                 ftype = file_el.attrib.get('type') | ||||
|                 media_url = file_el.text | ||||
|                 if determine_ext(media_url) == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         media_url, segment_title, 'mp4', preference=0, | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'format_id': '%s_%s' % (bitrate, ftype), | ||||
|                         'url': file_el.text.strip(), | ||||
|                         # The bitrate may not be a number (for example: 'iphone') | ||||
|                         'tbr': int(bitrate) if bitrate.isdigit() else None, | ||||
|                     }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             entries.append({ | ||||
|             segement_info.update({ | ||||
|                 'id': segment_id, | ||||
|                 'title': segment_title, | ||||
|                 'formats': formats, | ||||
|                 'duration': segment_duration, | ||||
|                 'description': episode_description | ||||
|                 'description': episode_description, | ||||
|             }) | ||||
|             entries.append(segement_info) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
| @@ -214,5 +204,6 @@ class AdultSwimIE(InfoExtractor): | ||||
|             'entries': entries, | ||||
|             'title': '%s - %s' % (show_title, episode_title), | ||||
|             'description': episode_description, | ||||
|             'duration': episode_duration | ||||
|             'duration': episode_duration, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|   | ||||
| @@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE): | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         if theplatform_metadata.get('AETN$isBehindWall'): | ||||
|             requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] | ||||
|             resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating']) | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 requestor_id, theplatform_metadata['title'], | ||||
|                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), | ||||
|                 theplatform_metadata['ratings'][0]['rating']) | ||||
|             query['auth'] = self._extract_mvpd_auth( | ||||
|                 url, video_id, requestor_id, resource) | ||||
|         info.update(self._search_json_ld(webpage, video_id, fatal=False)) | ||||
|   | ||||
| @@ -4,7 +4,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AlJazeeraIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', | ||||
|   | ||||
							
								
								
									
										91
									
								
								youtube_dl/extractor/amcnetworks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								youtube_dl/extractor/amcnetworks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     update_url_query, | ||||
|     parse_age_limit, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AMCNetworksIE(ThePlatformIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': 's3MX01Nl4vPH', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maron - Season 4 - Step 1', | ||||
|             'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', | ||||
|             'age_limit': 17, | ||||
|             'upload_date': '20160505', | ||||
|             'timestamp': 1462468831, | ||||
|             'uploader': 'AMCN', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ifc.com/movies/chaos', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|             'manifest': 'm3u', | ||||
|         } | ||||
|         media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url') | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         video_id = theplatform_metadata['pid'] | ||||
|         title = theplatform_metadata['title'] | ||||
|         rating = theplatform_metadata['ratings'][0]['rating'] | ||||
|         auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') | ||||
|         if auth_required == 'true': | ||||
|             requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id') | ||||
|             resource = self._get_mvpd_resource(requestor_id, title, video_id, rating) | ||||
|             query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource) | ||||
|         media_url = update_url_query(media_url, query) | ||||
|         formats, subtitles = self._extract_theplatform_smil(media_url, video_id) | ||||
|         self._sort_formats(formats) | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|             'age_limit': parse_age_limit(parse_age_limit(rating)), | ||||
|         }) | ||||
|         ns_keys = theplatform_metadata.get('$xmlns', {}).keys() | ||||
|         if ns_keys: | ||||
|             ns = list(ns_keys)[0] | ||||
|             series = theplatform_metadata.get(ns + '$show') | ||||
|             season_number = int_or_none(theplatform_metadata.get(ns + '$season')) | ||||
|             episode = theplatform_metadata.get(ns + '$episodeTitle') | ||||
|             episode_number = int_or_none(theplatform_metadata.get(ns + '$episode')) | ||||
|             if season_number: | ||||
|                 title = 'Season %d - %s' % (season_number, title) | ||||
|             if series: | ||||
|                 title = '%s - %s' % (series, title) | ||||
|             info.update({ | ||||
|                 'title': title, | ||||
|                 'series': series, | ||||
|                 'season_number': season_number, | ||||
|                 'episode': episode, | ||||
|                 'episode_number': episode_number, | ||||
|             }) | ||||
|         return info | ||||
| @@ -123,6 +123,10 @@ class AolFeaturesIE(InfoExtractor): | ||||
|             'title': 'What To Watch - February 17, 2016', | ||||
|         }, | ||||
|         'add_ie': ['FiveMin'], | ||||
|         'params': { | ||||
|             # encrypted m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,8 +1,6 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| @@ -15,7 +13,7 @@ class AparatIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.aparat.com/v/wP8On', | ||||
|         'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1', | ||||
|         'md5': '131aca2e14fe7c4dcb3c4877ba300c89', | ||||
|         'info_dict': { | ||||
|             'id': 'wP8On', | ||||
|             'ext': 'mp4', | ||||
| @@ -31,13 +29,13 @@ class AparatIE(InfoExtractor): | ||||
|         # Note: There is an easier-to-parse configuration at | ||||
|         # http://www.aparat.com/video/video/config/videohash/%video_id | ||||
|         # but the URL in there does not work | ||||
|         embed_url = ('http://www.aparat.com/video/video/embed/videohash/' + | ||||
|                      video_id + '/vt/frame') | ||||
|         embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id | ||||
|         webpage = self._download_webpage(embed_url, video_id) | ||||
|  | ||||
|         video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( | ||||
|             r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] | ||||
|         for i, video_url in enumerate(video_urls): | ||||
|         file_list = self._parse_json(self._search_regex( | ||||
|             r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) | ||||
|         for i, item in enumerate(file_list[0]): | ||||
|             video_url = item['file'] | ||||
|             req = HEADRequest(video_url) | ||||
|             res = self._request_webpage( | ||||
|                 req, video_id, note='Testing video URL %d' % i, errnote=False) | ||||
|   | ||||
| @@ -1,67 +1,65 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from .jwplatform import JWPlatformBaseIE | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ArchiveOrgIE(InfoExtractor): | ||||
| class ArchiveOrgIE(JWPlatformBaseIE): | ||||
|     IE_NAME = 'archive.org' | ||||
|     IE_DESC = 'archive.org videos' | ||||
|     _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|         'md5': '8af1d4cf447933ed3c7f4871162602db', | ||||
|         'info_dict': { | ||||
|             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|             'ext': 'ogv', | ||||
|             'ext': 'ogg', | ||||
|             'title': '1968 Demo - FJCC Conference Presentation Reel #1', | ||||
|             'description': 'md5:1780b464abaca9991d8968c877bb53ed', | ||||
|             'description': 'md5:da45c349df039f1cc8075268eb1b5c25', | ||||
|             'upload_date': '19681210', | ||||
|             'uploader': 'SRI International' | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://archive.org/details/Cops1922', | ||||
|         'md5': '18f2a19e6d89af8425671da1cf3d4e04', | ||||
|         'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba', | ||||
|         'info_dict': { | ||||
|             'id': 'Cops1922', | ||||
|             'ext': 'ogv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Buster Keaton\'s "Cops" (1922)', | ||||
|             'description': 'md5:70f72ee70882f713d4578725461ffcc3', | ||||
|             'description': 'md5:b4544662605877edd99df22f9620d858', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://archive.org/embed/' + video_id, video_id) | ||||
|         jwplayer_playlist = self._parse_json(self._search_regex( | ||||
|             r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);", | ||||
|             webpage, 'jwplayer playlist'), video_id) | ||||
|         info = self._parse_jwplayer_data( | ||||
|             {'playlist': jwplayer_playlist}, video_id, base_url=url) | ||||
|  | ||||
|         json_url = url + ('&' if '?' in url else '?') + 'output=json' | ||||
|         data = self._download_json(json_url, video_id) | ||||
|         def get_optional(metadata, field): | ||||
|             return metadata.get(field, [None])[0] | ||||
|  | ||||
|         def get_optional(data_dict, field): | ||||
|             return data_dict['metadata'].get(field, [None])[0] | ||||
|  | ||||
|         title = get_optional(data, 'title') | ||||
|         description = get_optional(data, 'description') | ||||
|         uploader = get_optional(data, 'creator') | ||||
|         upload_date = unified_strdate(get_optional(data, 'date')) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format': fdata['format'], | ||||
|                 'url': 'http://' + data['server'] + data['dir'] + fn, | ||||
|                 'file_size': int(fdata['size']), | ||||
|             } | ||||
|             for fn, fdata in data['files'].items() | ||||
|             if 'Video' in fdata['format']] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': data.get('misc', {}).get('image'), | ||||
|         } | ||||
|         metadata = self._download_json( | ||||
|             'http://archive.org/details/' + video_id, video_id, query={ | ||||
|                 'output': 'json', | ||||
|             })['metadata'] | ||||
|         info.update({ | ||||
|             'title': get_optional(metadata, 'title') or info.get('title'), | ||||
|             'description': clean_html(get_optional(metadata, 'description')), | ||||
|         }) | ||||
|         if info.get('_type') != 'playlist': | ||||
|             info.update({ | ||||
|                 'uploader': get_optional(metadata, 'creator'), | ||||
|                 'upload_date': unified_strdate(get_optional(metadata, 'date')), | ||||
|             }) | ||||
|         return info | ||||
|   | ||||
| @@ -73,6 +73,7 @@ class ARDMediathekIE(InfoExtractor): | ||||
|             'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb', | ||||
|             'duration': 3287, | ||||
|         }, | ||||
|         'skip': 'Video is no longer available', | ||||
|     }] | ||||
|  | ||||
|     def _extract_media_info(self, media_info_url, webpage, video_id): | ||||
| @@ -237,7 +238,7 @@ class ARDMediathekIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class ARDIE(InfoExtractor): | ||||
|     _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html' | ||||
|     _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', | ||||
|         'md5': 'd216c3a86493f9322545e045ddc3eb35', | ||||
|   | ||||
| @@ -12,46 +12,41 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     sanitized_Request, | ||||
|     smuggle_url, | ||||
|     unsmuggle_url, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class DCNIE(InfoExtractor): | ||||
| class AWAANIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() | ||||
|         if video_id and int(video_id) > 0: | ||||
|             return self.url_result( | ||||
|                 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo') | ||||
|                 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') | ||||
|         elif season_id and int(season_id) > 0: | ||||
|             return self.url_result(smuggle_url( | ||||
|                 'http://www.dcndigital.ae/program/season/%s' % season_id, | ||||
|                 {'show_id': show_id}), 'DCNSeason') | ||||
|                 'http://awaan.ae/program/season/%s' % season_id, | ||||
|                 {'show_id': show_id}), 'AWAANSeason') | ||||
|         else: | ||||
|             return self.url_result( | ||||
|                 'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason') | ||||
|                 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') | ||||
| 
 | ||||
| 
 | ||||
| class DCNBaseIE(InfoExtractor): | ||||
|     def _extract_video_info(self, video_data, video_id, is_live): | ||||
| class AWAANBaseIE(InfoExtractor): | ||||
|     def _parse_video_data(self, video_data, video_id, is_live): | ||||
|         title = video_data.get('title_en') or video_data['title_ar'] | ||||
|         img = video_data.get('img') | ||||
|         thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None | ||||
|         duration = int_or_none(video_data.get('duration')) | ||||
|         description = video_data.get('description_en') or video_data.get('description_ar') | ||||
|         timestamp = parse_iso8601(video_data.get('create_time'), ' ') | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'description': video_data.get('description_en') or video_data.get('description_ar'), | ||||
|             'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': parse_iso8601(video_data.get('create_time'), ' '), | ||||
|             'is_live': is_live, | ||||
|         } | ||||
| 
 | ||||
| @@ -75,11 +70,12 @@ class DCNBaseIE(InfoExtractor): | ||||
|         return formats | ||||
| 
 | ||||
| 
 | ||||
| class DCNVideoIE(DCNBaseIE): | ||||
|     IE_NAME = 'dcn:video' | ||||
| class AWAANVideoIE(AWAANBaseIE): | ||||
|     IE_NAME = 'awaan:video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', | ||||
|         'md5': '5f61c33bfc7794315c671a62d43116aa', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '17375', | ||||
| @@ -90,10 +86,6 @@ class DCNVideoIE(DCNBaseIE): | ||||
|             'timestamp': 1227504126, | ||||
|             'upload_date': '20081124', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', | ||||
|         'only_matching': True, | ||||
| @@ -102,11 +94,10 @@ class DCNVideoIE(DCNBaseIE): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| 
 | ||||
|         request = sanitized_Request( | ||||
|         video_data = self._download_json( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, | ||||
|             headers={'Origin': 'http://www.dcndigital.ae'}) | ||||
|         video_data = self._download_json(request, video_id) | ||||
|         info = self._extract_video_info(video_data, video_id, False) | ||||
|             video_id, headers={'Origin': 'http://awaan.ae'}) | ||||
|         info = self._parse_video_data(video_data, video_id, False) | ||||
| 
 | ||||
|         webpage = self._download_webpage( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + | ||||
| @@ -121,19 +112,31 @@ class DCNVideoIE(DCNBaseIE): | ||||
|         return info | ||||
| 
 | ||||
| 
 | ||||
| class DCNLiveIE(DCNBaseIE): | ||||
|     IE_NAME = 'dcn:live' | ||||
| class AWAANLiveIE(AWAANBaseIE): | ||||
|     IE_NAME = 'awaan:live' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://awaan.ae/live/6/dubai-tv', | ||||
|         'info_dict': { | ||||
|             'id': '6', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'upload_date': '20150107', | ||||
|             'timestamp': 1420588800, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
| 
 | ||||
|         request = sanitized_Request( | ||||
|         channel_data = self._download_json( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, | ||||
|             headers={'Origin': 'http://www.dcndigital.ae'}) | ||||
| 
 | ||||
|         channel_data = self._download_json(request, channel_id) | ||||
|         info = self._extract_video_info(channel_data, channel_id, True) | ||||
|             channel_id, headers={'Origin': 'http://awaan.ae'}) | ||||
|         info = self._parse_video_data(channel_data, channel_id, True) | ||||
| 
 | ||||
|         webpage = self._download_webpage( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + | ||||
| @@ -148,8 +151,8 @@ class DCNLiveIE(DCNBaseIE): | ||||
|         return info | ||||
| 
 | ||||
| 
 | ||||
| class DCNSeasonIE(InfoExtractor): | ||||
|     IE_NAME = 'dcn:season' | ||||
| class AWAANSeasonIE(InfoExtractor): | ||||
|     IE_NAME = 'awaan:season' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' | ||||
|     _TEST = { | ||||
|         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', | ||||
| @@ -170,21 +173,17 @@ class DCNSeasonIE(InfoExtractor): | ||||
|             data['season'] = season_id | ||||
|             show_id = smuggled_data.get('show_id') | ||||
|             if show_id is None: | ||||
|                 request = sanitized_Request( | ||||
|                 season = self._download_json( | ||||
|                     'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, | ||||
|                     headers={'Origin': 'http://www.dcndigital.ae'}) | ||||
|                 season = self._download_json(request, season_id) | ||||
|                     season_id, headers={'Origin': 'http://awaan.ae'}) | ||||
|                 show_id = season['id'] | ||||
|         data['show_id'] = show_id | ||||
|         request = sanitized_Request( | ||||
|         show = self._download_json( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/show', | ||||
|             urlencode_postdata(data), | ||||
|             { | ||||
|                 'Origin': 'http://www.dcndigital.ae', | ||||
|             show_id, data=urlencode_postdata(data), headers={ | ||||
|                 'Origin': 'http://awaan.ae', | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded' | ||||
|             }) | ||||
| 
 | ||||
|         show = self._download_json(request, show_id) | ||||
|         if not season_id: | ||||
|             season_id = show['default_season'] | ||||
|         for season in show['seasons']: | ||||
| @@ -195,6 +194,6 @@ class DCNSeasonIE(InfoExtractor): | ||||
|                 for video in show['videos']: | ||||
|                     video_id = compat_str(video['id']) | ||||
|                     entries.append(self.url_result( | ||||
|                         'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id)) | ||||
|                         'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) | ||||
| 
 | ||||
|                 return self.playlist_result(entries, season_id, title) | ||||
| @@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class AzubuLiveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.azubu.tv/MarsTVMDLen', | ||||
|   | ||||
| @@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|             'uploader_id': 'dotscale', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }, { | ||||
|         # with escaped quote in title | ||||
|         'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', | ||||
|         'info_dict': { | ||||
|             'title': '"Entropy" EP', | ||||
|             'uploader_id': 'jstrecords', | ||||
|             'id': 'entropy-ep', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|         entries = [ | ||||
|             self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) | ||||
|             for t_path in tracks_paths] | ||||
|         title = self._search_regex( | ||||
|             r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) | ||||
|         title = self._html_search_regex( | ||||
|             r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', | ||||
|             webpage, 'title', fatal=False) | ||||
|         if title: | ||||
|             title = title.replace(r'\"', '"') | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'uploader_id': uploader_id, | ||||
|   | ||||
| @@ -2,19 +2,23 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     dict_get, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from ..compat import ( | ||||
|     compat_etree_fromstring, | ||||
|     compat_HTTPError, | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -229,51 +233,6 @@ class BBCCoUkIE(InfoExtractor): | ||||
|         asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') | ||||
|         return [ref.get('href') for ref in asx.findall('./Entry/ref')] | ||||
|  | ||||
|     def _extract_connection(self, connection, programme_id): | ||||
|         formats = [] | ||||
|         kind = connection.get('kind') | ||||
|         protocol = connection.get('protocol') | ||||
|         supplier = connection.get('supplier') | ||||
|         if protocol == 'http': | ||||
|             href = connection.get('href') | ||||
|             transfer_format = connection.get('transferFormat') | ||||
|             # ASX playlist | ||||
|             if supplier == 'asx': | ||||
|                 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): | ||||
|                     formats.append({ | ||||
|                         'url': ref, | ||||
|                         'format_id': 'ref%s_%s' % (i, supplier), | ||||
|                     }) | ||||
|             # Skip DASH until supported | ||||
|             elif transfer_format == 'dash': | ||||
|                 pass | ||||
|             elif transfer_format == 'hls': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     href, programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id=supplier, fatal=False)) | ||||
|             # Direct link | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': href, | ||||
|                     'format_id': supplier or kind or protocol, | ||||
|                 }) | ||||
|         elif protocol == 'rtmp': | ||||
|             application = connection.get('application', 'ondemand') | ||||
|             auth_string = connection.get('authString') | ||||
|             identifier = connection.get('identifier') | ||||
|             server = connection.get('server') | ||||
|             formats.append({ | ||||
|                 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), | ||||
|                 'play_path': identifier, | ||||
|                 'app': '%s?%s' % (application, auth_string), | ||||
|                 'page_url': 'http://www.bbc.co.uk', | ||||
|                 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', | ||||
|                 'rtmp_live': False, | ||||
|                 'ext': 'flv', | ||||
|                 'format_id': supplier, | ||||
|             }) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_items(self, playlist): | ||||
|         return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) | ||||
|  | ||||
| @@ -294,46 +253,6 @@ class BBCCoUkIE(InfoExtractor): | ||||
|     def _extract_connections(self, media): | ||||
|         return self._findall_ns(media, './{%s}connection') | ||||
|  | ||||
|     def _extract_video(self, media, programme_id): | ||||
|         formats = [] | ||||
|         vbr = int_or_none(media.get('bitrate')) | ||||
|         vcodec = media.get('encoding') | ||||
|         service = media.get('service') | ||||
|         width = int_or_none(media.get('width')) | ||||
|         height = int_or_none(media.get('height')) | ||||
|         file_size = int_or_none(media.get('media_file_size')) | ||||
|         for connection in self._extract_connections(media): | ||||
|             conn_formats = self._extract_connection(connection, programme_id) | ||||
|             for format in conn_formats: | ||||
|                 format.update({ | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                     'vbr': vbr, | ||||
|                     'vcodec': vcodec, | ||||
|                     'filesize': file_size, | ||||
|                 }) | ||||
|                 if service: | ||||
|                     format['format_id'] = '%s_%s' % (service, format['format_id']) | ||||
|             formats.extend(conn_formats) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_audio(self, media, programme_id): | ||||
|         formats = [] | ||||
|         abr = int_or_none(media.get('bitrate')) | ||||
|         acodec = media.get('encoding') | ||||
|         service = media.get('service') | ||||
|         for connection in self._extract_connections(media): | ||||
|             conn_formats = self._extract_connection(connection, programme_id) | ||||
|             for format in conn_formats: | ||||
|                 format.update({ | ||||
|                     'format_id': '%s_%s' % (service, format['format_id']), | ||||
|                     'abr': abr, | ||||
|                     'acodec': acodec, | ||||
|                     'vcodec': 'none', | ||||
|                 }) | ||||
|             formats.extend(conn_formats) | ||||
|         return formats | ||||
|  | ||||
|     def _get_subtitles(self, media, programme_id): | ||||
|         subtitles = {} | ||||
|         for connection in self._extract_connections(media): | ||||
| @@ -379,13 +298,87 @@ class BBCCoUkIE(InfoExtractor): | ||||
|     def _process_media_selector(self, media_selection, programme_id): | ||||
|         formats = [] | ||||
|         subtitles = None | ||||
|         urls = [] | ||||
|  | ||||
|         for media in self._extract_medias(media_selection): | ||||
|             kind = media.get('kind') | ||||
|             if kind == 'audio': | ||||
|                 formats.extend(self._extract_audio(media, programme_id)) | ||||
|             elif kind == 'video': | ||||
|                 formats.extend(self._extract_video(media, programme_id)) | ||||
|             if kind in ('video', 'audio'): | ||||
|                 bitrate = int_or_none(media.get('bitrate')) | ||||
|                 encoding = media.get('encoding') | ||||
|                 service = media.get('service') | ||||
|                 width = int_or_none(media.get('width')) | ||||
|                 height = int_or_none(media.get('height')) | ||||
|                 file_size = int_or_none(media.get('media_file_size')) | ||||
|                 for connection in self._extract_connections(media): | ||||
|                     href = connection.get('href') | ||||
|                     if href in urls: | ||||
|                         continue | ||||
|                     if href: | ||||
|                         urls.append(href) | ||||
|                     conn_kind = connection.get('kind') | ||||
|                     protocol = connection.get('protocol') | ||||
|                     supplier = connection.get('supplier') | ||||
|                     transfer_format = connection.get('transferFormat') | ||||
|                     format_id = supplier or conn_kind or protocol | ||||
|                     if service: | ||||
|                         format_id = '%s_%s' % (service, format_id) | ||||
|                     # ASX playlist | ||||
|                     if supplier == 'asx': | ||||
|                         for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): | ||||
|                             formats.append({ | ||||
|                                 'url': ref, | ||||
|                                 'format_id': 'ref%s_%s' % (i, format_id), | ||||
|                             }) | ||||
|                     elif transfer_format == 'dash': | ||||
|                         formats.extend(self._extract_mpd_formats( | ||||
|                             href, programme_id, mpd_id=format_id, fatal=False)) | ||||
|                     elif transfer_format == 'hls': | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             href, programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                             m3u8_id=format_id, fatal=False)) | ||||
|                     elif transfer_format == 'hds': | ||||
|                         formats.extend(self._extract_f4m_formats( | ||||
|                             href, programme_id, f4m_id=format_id, fatal=False)) | ||||
|                     else: | ||||
|                         if not service and not supplier and bitrate: | ||||
|                             format_id += '-%d' % bitrate | ||||
|                         fmt = { | ||||
|                             'format_id': format_id, | ||||
|                             'filesize': file_size, | ||||
|                         } | ||||
|                         if kind == 'video': | ||||
|                             fmt.update({ | ||||
|                                 'width': width, | ||||
|                                 'height': height, | ||||
|                                 'vbr': bitrate, | ||||
|                                 'vcodec': encoding, | ||||
|                             }) | ||||
|                         else: | ||||
|                             fmt.update({ | ||||
|                                 'abr': bitrate, | ||||
|                                 'acodec': encoding, | ||||
|                                 'vcodec': 'none', | ||||
|                             }) | ||||
|                         if protocol == 'http': | ||||
|                             # Direct link | ||||
|                             fmt.update({ | ||||
|                                 'url': href, | ||||
|                             }) | ||||
|                         elif protocol == 'rtmp': | ||||
|                             application = connection.get('application', 'ondemand') | ||||
|                             auth_string = connection.get('authString') | ||||
|                             identifier = connection.get('identifier') | ||||
|                             server = connection.get('server') | ||||
|                             fmt.update({ | ||||
|                                 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), | ||||
|                                 'play_path': identifier, | ||||
|                                 'app': '%s?%s' % (application, auth_string), | ||||
|                                 'page_url': 'http://www.bbc.co.uk', | ||||
|                                 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', | ||||
|                                 'rtmp_live': False, | ||||
|                                 'ext': 'flv', | ||||
|                             }) | ||||
|                         formats.append(fmt) | ||||
|             elif kind == 'captions': | ||||
|                 subtitles = self.extract_subtitles(media, programme_id) | ||||
|         return formats, subtitles | ||||
| @@ -589,7 +582,7 @@ class BBCIE(BBCCoUkIE): | ||||
|         'info_dict': { | ||||
|             'id': '150615_telabyad_kentin_cogu', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi", | ||||
|             'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde", | ||||
|             'description': 'md5:33a4805a855c9baf7115fcbde57e7025', | ||||
|             'timestamp': 1434397334, | ||||
|             'upload_date': '20150615', | ||||
| @@ -654,6 +647,23 @@ class BBCIE(BBCCoUkIE): | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # single video embedded with Morph | ||||
|         'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975', | ||||
|         'info_dict': { | ||||
|             'id': 'p041vhd0', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Nigeria v Japan - Men's First Round", | ||||
|             'description': 'Live coverage of the first round from Group B at the Amazonia Arena.', | ||||
|             'duration': 7980, | ||||
|             'uploader': 'BBC Sport', | ||||
|             'uploader_id': 'bbc_sport', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Georestricted to UK', | ||||
|     }, { | ||||
|         # single video with playlist.sxml URL in playlist param | ||||
|         'url': 'http://www.bbc.com/sport/0/football/33653409', | ||||
| @@ -751,7 +761,7 @@ class BBCIE(BBCCoUkIE): | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         json_ld_info = self._search_json_ld(webpage, playlist_id, default=None) | ||||
|         json_ld_info = self._search_json_ld(webpage, playlist_id, default={}) | ||||
|         timestamp = json_ld_info.get('timestamp') | ||||
|  | ||||
|         playlist_title = json_ld_info.get('title') | ||||
| @@ -820,13 +830,19 @@ class BBCIE(BBCCoUkIE): | ||||
|                         # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) | ||||
|                         playlist = data_playable.get('otherSettings', {}).get('playlist', {}) | ||||
|                         if playlist: | ||||
|                             for key in ('progressiveDownload', 'streaming'): | ||||
|                             entry = None | ||||
|                             for key in ('streaming', 'progressiveDownload'): | ||||
|                                 playlist_url = playlist.get('%sUrl' % key) | ||||
|                                 if not playlist_url: | ||||
|                                     continue | ||||
|                                 try: | ||||
|                                     entries.append(self._extract_from_playlist_sxml( | ||||
|                                         playlist_url, playlist_id, timestamp)) | ||||
|                                     info = self._extract_from_playlist_sxml( | ||||
|                                         playlist_url, playlist_id, timestamp) | ||||
|                                     if not entry: | ||||
|                                         entry = info | ||||
|                                     else: | ||||
|                                         entry['title'] = info['title'] | ||||
|                                         entry['formats'].extend(info['formats']) | ||||
|                                 except Exception as e: | ||||
|                                     # Some playlist URL may fail with 500, at the same time | ||||
|                                     # the other one may work fine (e.g. | ||||
| @@ -834,6 +850,9 @@ class BBCIE(BBCCoUkIE): | ||||
|                                     if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: | ||||
|                                         continue | ||||
|                                     raise | ||||
|                             if entry: | ||||
|                                 self._sort_formats(entry['formats']) | ||||
|                                 entries.append(entry) | ||||
|  | ||||
|         if entries: | ||||
|             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | ||||
| @@ -866,6 +885,50 @@ class BBCIE(BBCCoUkIE): | ||||
|                 'subtitles': subtitles, | ||||
|             } | ||||
|  | ||||
|         # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) | ||||
|         # There are several setPayload calls may be present but the video | ||||
|         # seems to be always related to the first one | ||||
|         morph_payload = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'Morph\.setPayload\([^,]+,\s*({.+?})\);', | ||||
|                 webpage, 'morph payload', default='{}'), | ||||
|             playlist_id, fatal=False) | ||||
|         if morph_payload: | ||||
|             components = try_get(morph_payload, lambda x: x['body']['components'], list) or [] | ||||
|             for component in components: | ||||
|                 if not isinstance(component, dict): | ||||
|                     continue | ||||
|                 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict) | ||||
|                 if not lead_media: | ||||
|                     continue | ||||
|                 identifiers = lead_media.get('identifiers') | ||||
|                 if not identifiers or not isinstance(identifiers, dict): | ||||
|                     continue | ||||
|                 programme_id = identifiers.get('vpid') or identifiers.get('playablePid') | ||||
|                 if not programme_id: | ||||
|                     continue | ||||
|                 title = lead_media.get('title') or self._og_search_title(webpage) | ||||
|                 formats, subtitles = self._download_media_selector(programme_id) | ||||
|                 self._sort_formats(formats) | ||||
|                 description = lead_media.get('summary') | ||||
|                 uploader = lead_media.get('masterBrand') | ||||
|                 uploader_id = lead_media.get('mid') | ||||
|                 duration = None | ||||
|                 duration_d = lead_media.get('duration') | ||||
|                 if isinstance(duration_d, dict): | ||||
|                     duration = parse_duration(dict_get( | ||||
|                         duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration'))) | ||||
|                 return { | ||||
|                     'id': programme_id, | ||||
|                     'title': title, | ||||
|                     'description': description, | ||||
|                     'duration': duration, | ||||
|                     'uploader': uploader, | ||||
|                     'uploader_id': uploader_id, | ||||
|                     'formats': formats, | ||||
|                     'subtitles': subtitles, | ||||
|                 } | ||||
|  | ||||
|         def extract_all(pattern): | ||||
|             return list(filter(None, map( | ||||
|                 lambda s: self._parse_json(s, playlist_id, fatal=False), | ||||
| @@ -883,7 +946,7 @@ class BBCIE(BBCCoUkIE): | ||||
|             r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) | ||||
|         if entries: | ||||
|             return self.playlist_result( | ||||
|                 [self.url_result(entry, 'BBCCoUk') for entry in entries], | ||||
|                 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], | ||||
|                 playlist_id, playlist_title, playlist_description) | ||||
|  | ||||
|         # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511) | ||||
| @@ -965,7 +1028,7 @@ class BBCIE(BBCCoUkIE): | ||||
|  | ||||
|  | ||||
| class BBCCoUkArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' | ||||
|     IE_NAME = 'bbc.co.uk:article' | ||||
|     IE_DESC = 'BBC articles' | ||||
|  | ||||
| @@ -995,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class BBCCoUkPlaylistBaseIE(InfoExtractor): | ||||
|     def _entries(self, webpage, url, playlist_id): | ||||
|         single_page = 'page' in compat_urlparse.parse_qs( | ||||
|             compat_urlparse.urlparse(url).query) | ||||
|         for page_num in itertools.count(2): | ||||
|             for video_id in re.findall( | ||||
|                     self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): | ||||
|                 yield self.url_result( | ||||
|                     self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) | ||||
|             if single_page: | ||||
|                 return | ||||
|             next_page = self._search_regex( | ||||
|                 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2', | ||||
|                 webpage, 'next page url', default=None, group='url') | ||||
|             if not next_page: | ||||
|                 break | ||||
|             webpage = self._download_webpage( | ||||
|                 compat_urlparse.urljoin(url, next_page), playlist_id, | ||||
|                 'Downloading page %d' % page_num, page_num) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) | ||||
|             for video_id in re.findall( | ||||
|                 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)] | ||||
|  | ||||
|         title, description = self._extract_title_and_description(webpage) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|         return self.playlist_result( | ||||
|             self._entries(webpage, url, playlist_id), | ||||
|             playlist_id, title, description) | ||||
|  | ||||
|  | ||||
| class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
| @@ -1056,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|             'description': 'French thriller serial about a missing teenager.', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }, { | ||||
|         # multipage playlist, explicit page | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', | ||||
|         'info_dict': { | ||||
|             'id': 'b00mfl7n', | ||||
|             'title': 'Frozen Planet - Clips - BBC One', | ||||
|             'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', | ||||
|         }, | ||||
|         'playlist_mincount': 24, | ||||
|     }, { | ||||
|         # multipage playlist, all pages | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', | ||||
|         'info_dict': { | ||||
|             'id': 'b00mfl7n', | ||||
|             'title': 'Frozen Planet - Clips - BBC One', | ||||
|             'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', | ||||
|         }, | ||||
|         'playlist_mincount': 142, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', | ||||
|         'only_matching': True, | ||||
|   | ||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/bellmedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/bellmedia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BellMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|         (?P<domain> | ||||
|             (?: | ||||
|                 ctv| | ||||
|                 tsn| | ||||
|                 bnn| | ||||
|                 thecomedynetwork| | ||||
|                 discovery| | ||||
|                 discoveryvelocity| | ||||
|                 sciencechannel| | ||||
|                 investigationdiscovery| | ||||
|                 animalplanet| | ||||
|                 bravo| | ||||
|                 mtv| | ||||
|                 space | ||||
|             )\.ca| | ||||
|             much\.com | ||||
|         )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctv.ca/video/player?vid=706966', | ||||
|         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', | ||||
|         'info_dict': { | ||||
|             'id': '706966', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'', | ||||
|             'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.', | ||||
|             'upload_date': '20150919', | ||||
|             'timestamp': 1442624700, | ||||
|         }, | ||||
|         'expected_warnings': ['HTTP Error 404'], | ||||
|     }, { | ||||
|         'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _DOMAINS = { | ||||
|         'thecomedynetwork': 'comedy', | ||||
|         'discoveryvelocity': 'discvel', | ||||
|         'sciencechannel': 'discsci', | ||||
|         'investigationdiscovery': 'invdisc', | ||||
|         'animalplanet': 'aniplan', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         domain = domain.split('.')[0] | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
| @@ -2,7 +2,6 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class BetIE(MTVServicesInfoExtractor): | ||||
| @@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor): | ||||
|     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
|         return compat_urllib_parse_urlencode({ | ||||
|         return { | ||||
|             'uuid': uri, | ||||
|         }) | ||||
|         } | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') | ||||
|   | ||||
| @@ -11,15 +11,6 @@ from ..compat import compat_urllib_parse_unquote | ||||
| class BigflixIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', | ||||
|         'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3', | ||||
|         'info_dict': { | ||||
|             'id': '16537', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Singham Returns', | ||||
|             'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', | ||||
|         } | ||||
|     }, { | ||||
|         # 2 formats | ||||
|         'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -1,205 +1,101 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import datetime | ||||
| import hashlib | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_etree_fromstring, | ||||
|     compat_str, | ||||
|     compat_parse_qs, | ||||
|     compat_xml_parse_error, | ||||
| ) | ||||
| from ..compat import compat_parse_qs | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     xpath_text, | ||||
|     unified_timestamp, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BiliBiliIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bilibili.tv/video/av1074402/', | ||||
|         'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', | ||||
|         'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', | ||||
|         'info_dict': { | ||||
|             'id': '1554319', | ||||
|             'ext': 'flv', | ||||
|             'id': '1074402', | ||||
|             'ext': 'mp4', | ||||
|             'title': '【金坷垃】金泡沫', | ||||
|             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', | ||||
|             'duration': 308.067, | ||||
|             'duration': 308.315, | ||||
|             'timestamp': 1398012660, | ||||
|             'upload_date': '20140420', | ||||
|             'thumbnail': 're:^https?://.+\.jpg', | ||||
|             'uploader': '菊子桑', | ||||
|             'uploader_id': '156160', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.bilibili.com/video/av1041170/', | ||||
|         'info_dict': { | ||||
|             'id': '1041170', | ||||
|             'title': '【BD1080P】刀语【诸神&异域】', | ||||
|             'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', | ||||
|         }, | ||||
|         'playlist_count': 9, | ||||
|     }, { | ||||
|         'url': 'http://www.bilibili.com/video/av4808130/', | ||||
|         'info_dict': { | ||||
|             'id': '4808130', | ||||
|             'title': '【长篇】哆啦A梦443【钉铛】', | ||||
|             'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': '55cdadedf3254caaa0d5d27cf20a8f9c', | ||||
|             'info_dict': { | ||||
|                 'id': '4808130_part1', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '【长篇】哆啦A梦443【钉铛】', | ||||
|                 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', | ||||
|                 'timestamp': 1464564180, | ||||
|                 'upload_date': '20160529', | ||||
|                 'uploader': '喜欢拉面', | ||||
|                 'uploader_id': '151066', | ||||
|             }, | ||||
|         }, { | ||||
|             'md5': '926f9f67d0c482091872fbd8eca7ea3d', | ||||
|             'info_dict': { | ||||
|                 'id': '4808130_part2', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '【长篇】哆啦A梦443【钉铛】', | ||||
|                 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', | ||||
|                 'timestamp': 1464564180, | ||||
|                 'upload_date': '20160529', | ||||
|                 'uploader': '喜欢拉面', | ||||
|                 'uploader_id': '151066', | ||||
|             }, | ||||
|         }, { | ||||
|             'md5': '4b7b225b968402d7c32348c646f1fd83', | ||||
|             'info_dict': { | ||||
|                 'id': '4808130_part3', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '【长篇】哆啦A梦443【钉铛】', | ||||
|                 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', | ||||
|                 'timestamp': 1464564180, | ||||
|                 'upload_date': '20160529', | ||||
|                 'uploader': '喜欢拉面', | ||||
|                 'uploader_id': '151066', | ||||
|             }, | ||||
|         }, { | ||||
|             'md5': '7b795e214166501e9141139eea236e91', | ||||
|             'info_dict': { | ||||
|                 'id': '4808130_part4', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '【长篇】哆啦A梦443【钉铛】', | ||||
|                 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', | ||||
|                 'timestamp': 1464564180, | ||||
|                 'upload_date': '20160529', | ||||
|                 'uploader': '喜欢拉面', | ||||
|                 'uploader_id': '151066', | ||||
|             }, | ||||
|         }], | ||||
|     }, { | ||||
|         # Missing upload time | ||||
|         'url': 'http://www.bilibili.com/video/av1867637/', | ||||
|         'info_dict': { | ||||
|             'id': '2880301', | ||||
|             'ext': 'flv', | ||||
|             'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', | ||||
|             'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', | ||||
|             'uploader': '黑夜为猫', | ||||
|             'uploader_id': '610729', | ||||
|         }, | ||||
|         'params': { | ||||
|             # Just to test metadata extraction | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['upload time'], | ||||
|     }] | ||||
|     } | ||||
|  | ||||
|     # BiliBili blocks keys from time to time. The current key is extracted from | ||||
|     # the Android client | ||||
|     # TODO: find the sign algorithm used in the flash player | ||||
|     _APP_KEY = '86385cdc024c0f6c' | ||||
|     _APP_KEY = '6f90a59ac58a4123' | ||||
|     _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         params = compat_parse_qs(self._search_regex( | ||||
|         if 'anime/v' not in url: | ||||
|             cid = compat_parse_qs(self._search_regex( | ||||
|                 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', | ||||
|                  r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], | ||||
|             webpage, 'player parameters')) | ||||
|         cid = params['cid'][0] | ||||
|  | ||||
|         info_xml_str = self._download_webpage( | ||||
|             'http://interface.bilibili.com/v_cdn_play', | ||||
|             cid, query={'appkey': self._APP_KEY, 'cid': cid}, | ||||
|             note='Downloading video info page') | ||||
|  | ||||
|         err_msg = None | ||||
|         durls = None | ||||
|         info_xml = None | ||||
|         try: | ||||
|             info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8')) | ||||
|         except compat_xml_parse_error: | ||||
|             info_json = self._parse_json(info_xml_str, video_id, fatal=False) | ||||
|             err_msg = (info_json or {}).get('error_text') | ||||
|                 webpage, 'player parameters'))['cid'][0] | ||||
|         else: | ||||
|             err_msg = xpath_text(info_xml, './message') | ||||
|             js = self._download_json( | ||||
|                 'http://bangumi.bilibili.com/web_api/get_source', video_id, | ||||
|                 data=urlencode_postdata({'episode_id': video_id}), | ||||
|                 headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}) | ||||
|             cid = js['result']['cid'] | ||||
|  | ||||
|         if info_xml is not None: | ||||
|             durls = info_xml.findall('./durl') | ||||
|         if not durls: | ||||
|             if err_msg: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError('No videos found!') | ||||
|         payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) | ||||
|         sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() | ||||
|  | ||||
|         video_info = self._download_json( | ||||
|             'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), | ||||
|             video_id, note='Downloading video info page') | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         for durl in durls: | ||||
|             size = xpath_text(durl, ['./filesize', './size']) | ||||
|         for idx, durl in enumerate(video_info['durl']): | ||||
|             formats = [{ | ||||
|                 'url': durl.find('./url').text, | ||||
|                 'filesize': int_or_none(size), | ||||
|                 'url': durl['url'], | ||||
|                 'filesize': int_or_none(durl['size']), | ||||
|             }] | ||||
|             for backup_url in durl.findall('./backup_url/url'): | ||||
|             for backup_url in durl.get('backup_url', []): | ||||
|                 formats.append({ | ||||
|                     'url': backup_url.text, | ||||
|                     'url': backup_url, | ||||
|                     # backup URLs have lower priorities | ||||
|                     'preference': -2 if 'hd.mp4' in backup_url.text else -3, | ||||
|                     'preference': -2 if 'hd.mp4' in backup_url else -3, | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             entries.append({ | ||||
|                 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), | ||||
|                 'duration': int_or_none(xpath_text(durl, './length'), 1000), | ||||
|                 'id': '%s_part%s' % (video_id, idx), | ||||
|                 'duration': float_or_none(durl.get('length'), 1000), | ||||
|                 'formats': formats, | ||||
|             }) | ||||
|  | ||||
|         title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         datetime_str = self._html_search_regex( | ||||
|             r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) | ||||
|         timestamp = None | ||||
|         if datetime_str: | ||||
|             timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) | ||||
|         timestamp = unified_timestamp(self._html_search_regex( | ||||
|             r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) | ||||
|         thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) | ||||
|  | ||||
|         # TODO 'view_count' requires deobfuscating Javascript | ||||
|         info = { | ||||
|             'id': compat_str(cid), | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'thumbnail': self._html_search_meta('thumbnailUrl', webpage), | ||||
|             'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000), | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': float_or_none(video_info.get('timelength'), scale=1000), | ||||
|         } | ||||
|  | ||||
|         uploader_mobj = re.search( | ||||
|   | ||||
| @@ -24,7 +24,8 @@ class BIQLEIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ребенок в шоке от автоматической мойки', | ||||
|             'uploader': 'Dmitry Kotov', | ||||
|         } | ||||
|         }, | ||||
|         'skip': ' This video was marked as adult.  Embedding adult videos on external sites is prohibited.', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -20,6 +21,18 @@ class BloombergIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=hds]', | ||||
|         }, | ||||
|     }, { | ||||
|         # video ID in BPlayer(...) | ||||
|         'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', | ||||
|         'info_dict': { | ||||
|             'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Meet the Real-Life Tech Wizards of Middle Earth', | ||||
|             'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=hds]', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', | ||||
|         'only_matching': True, | ||||
| @@ -33,7 +46,11 @@ class BloombergIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex( | ||||
|             r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'id', group='url') | ||||
|             webpage, 'id', group='url', default=None) | ||||
|         if not video_id: | ||||
|             bplayer_data = self._parse_json(self._search_regex( | ||||
|                 r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) | ||||
|             video_id = bplayer_data['id'] | ||||
|         title = re.sub(': Video$', '', self._og_search_title(webpage)) | ||||
|  | ||||
|         embed_info = self._download_json( | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from ..utils import ( | ||||
|  | ||||
| class BpbIE(InfoExtractor): | ||||
|     IE_DESC = 'Bundeszentrale für politische Bildung' | ||||
|     _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', | ||||
|   | ||||
| @@ -1,31 +1,74 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from .adobepass import AdobePassIE | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BravoTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)' | ||||
|     _TEST = { | ||||
| class BravoTVIE(AdobePassIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale', | ||||
|         'md5': 'd60cdf68904e854fac669bd26cccf801', | ||||
|         'md5': '9086d0b7ef0ea2aabc4781d75f4e5863', | ||||
|         'info_dict': { | ||||
|             'id': 'LitrBdX64qLn', | ||||
|             'id': 'zHyk1_HU_mPy', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Last Chance Kitchen Returns', | ||||
|             'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13', | ||||
|             'timestamp': 1448926740, | ||||
|             'upload_date': '20151130', | ||||
|             'title': 'LCK Ep 12: Fishy Finale', | ||||
|             'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.', | ||||
|             'uploader': 'NBCU-BRAV', | ||||
|             'upload_date': '20160302', | ||||
|             'timestamp': 1456945320, | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid') | ||||
|         release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid') | ||||
|         return self.url_result(smuggle_url( | ||||
|             'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid), | ||||
|             {'force_smil_url': True}), 'ThePlatform', release_pid) | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         settings = self._parse_json(self._search_regex( | ||||
|             r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), | ||||
|             display_id) | ||||
|         info = {} | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|         } | ||||
|         account_pid, release_pid = [None] * 2 | ||||
|         tve = settings.get('sharedTVE') | ||||
|         if tve: | ||||
|             query['manifest'] = 'm3u' | ||||
|             account_pid = 'HNK2IC' | ||||
|             release_pid = tve['release_pid'] | ||||
|             if tve.get('entitlement') == 'auth': | ||||
|                 adobe_pass = settings.get('adobePass', {}) | ||||
|                 resource = self._get_mvpd_resource( | ||||
|                     adobe_pass.get('adobePassResourceId', 'bravo'), | ||||
|                     tve['title'], release_pid, tve.get('rating')) | ||||
|                 query['auth'] = self._extract_mvpd_auth( | ||||
|                     url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) | ||||
|         else: | ||||
|             shared_playlist = settings['shared_playlist'] | ||||
|             account_pid = shared_playlist['account_pid'] | ||||
|             metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] | ||||
|             release_pid = metadata['release_pid'] | ||||
|             info.update({ | ||||
|                 'title': metadata['title'], | ||||
|                 'description': metadata.get('description'), | ||||
|                 'season_number': int_or_none(metadata.get('season_num')), | ||||
|                 'episode_number': int_or_none(metadata.get('episode_num')), | ||||
|             }) | ||||
|             query['switch'] = 'progressive' | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'id': release_pid, | ||||
|             'url': smuggle_url(update_url_query( | ||||
|                 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid), | ||||
|                 query), {'force_smil_url': True}), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         }) | ||||
|         return info | ||||
|   | ||||
| @@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class CamdemyFolderIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # links with trailing slash | ||||
|         'url': 'http://www.camdemy.com/folder/450', | ||||
|   | ||||
| @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): | ||||
|                                     (?:(?:www|m)\.)?canalplus\.fr| | ||||
|                                     (?:www\.)?piwiplus\.fr| | ||||
|                                     (?:www\.)?d8\.tv| | ||||
|                                     (?:www\.)?c8\.fr| | ||||
|                                     (?:www\.)?d17\.tv| | ||||
|                                     (?:www\.)?itele\.fr | ||||
|                                 )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| | ||||
| @@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor): | ||||
|         'canalplus': 'cplus', | ||||
|         'piwiplus': 'teletoon', | ||||
|         'd8': 'd8', | ||||
|         'c8': 'd8', | ||||
|         'd17': 'd17', | ||||
|         'itele': 'itele', | ||||
|     } | ||||
|   | ||||
| @@ -1,11 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import float_or_none | ||||
|  | ||||
|  | ||||
| class CanvasIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', | ||||
|         'md5': 'ea838375a547ac787d4064d8c7860a6c', | ||||
| @@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', | ||||
|         'info_dict': { | ||||
|             'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', | ||||
|             'display_id': 'herbekijk-sorry-voor-alles', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Herbekijk Sorry voor alles', | ||||
|             'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 3788.06, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site_id, display_id = mobj.group('site_id'), mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|         title = (self._search_regex( | ||||
|             r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', | ||||
|             webpage, 'title', default=None) or self._og_search_title(webpage) | ||||
|             webpage, 'title', default=None) or self._og_search_title( | ||||
|             webpage)).strip() | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id') | ||||
|             r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) | ||||
|             'https://mediazone.vrt.be/api/v1/%s/assets/%s' | ||||
|             % (site_id, video_id), display_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for target in data['targetUrls']: | ||||
|   | ||||
							
								
								
									
										36
									
								
								youtube_dl/extractor/cartoonnetwork.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								youtube_dl/extractor/cartoonnetwork.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .turner import TurnerBaseIE | ||||
|  | ||||
|  | ||||
| class CartoonNetworkIE(TurnerBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html', | ||||
|         'info_dict': { | ||||
|             'id': '8a250ab04ed07e6c014ef3f1e2f9016c', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Starfire the Cat Lady', | ||||
|             'description': 'Robin decides to become a cat so that Starfire will finally love him.', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() | ||||
|         query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id | ||||
|         return self._extract_cvp_info( | ||||
|             'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { | ||||
|                 'secure': { | ||||
|                     'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', | ||||
|                     'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', | ||||
|                 }, | ||||
|             }) | ||||
| @@ -9,10 +9,19 @@ from ..utils import ( | ||||
|     js_to_json, | ||||
|     smuggle_url, | ||||
|     try_get, | ||||
|     xpath_text, | ||||
|     xpath_element, | ||||
|     xpath_with_ns, | ||||
|     find_xpath_attr, | ||||
|     parse_iso8601, | ||||
|     parse_age_limit, | ||||
|     int_or_none, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CBCIE(InfoExtractor): | ||||
|     IE_NAME = 'cbc.ca' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         # with mediaId | ||||
| @@ -114,6 +123,7 @@ class CBCIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class CBCPlayerIE(InfoExtractor): | ||||
|     IE_NAME = 'cbc.ca:player' | ||||
|     _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbc.ca/player/play/2683190193', | ||||
| @@ -167,3 +177,165 @@ class CBCPlayerIE(InfoExtractor): | ||||
|                 }), | ||||
|             'id': video_id, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CBCWatchBaseIE(InfoExtractor): | ||||
|     _device_id = None | ||||
|     _device_token = None | ||||
|     _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' | ||||
|     _NS_MAP = { | ||||
|         'media': 'http://search.yahoo.com/mrss/', | ||||
|         'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', | ||||
|     } | ||||
|  | ||||
|     def _call_api(self, path, video_id): | ||||
|         url = path if path.startswith('http') else self._API_BASE_URL + path | ||||
|         result = self._download_xml(url, video_id, headers={ | ||||
|             'X-Clearleap-DeviceId': self._device_id, | ||||
|             'X-Clearleap-DeviceToken': self._device_token, | ||||
|         }) | ||||
|         error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') | ||||
|         if error_message: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) | ||||
|         return result | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if not self._device_id or not self._device_token: | ||||
|             device = self._downloader.cache.load('cbcwatch', 'device') or {} | ||||
|             self._device_id, self._device_token = device.get('id'), device.get('token') | ||||
|             if not self._device_id or not self._device_token: | ||||
|                 result = self._download_xml( | ||||
|                     self._API_BASE_URL + 'device/register', | ||||
|                     None, data=b'<device><type>web</type></device>') | ||||
|                 self._device_id = xpath_text(result, 'deviceId', fatal=True) | ||||
|                 self._device_token = xpath_text(result, 'deviceToken', fatal=True) | ||||
|                 self._downloader.cache.store( | ||||
|                     'cbcwatch', 'device', { | ||||
|                         'id': self._device_id, | ||||
|                         'token': self._device_token, | ||||
|                     }) | ||||
|  | ||||
|     def _parse_rss_feed(self, rss): | ||||
|         channel = xpath_element(rss, 'channel', fatal=True) | ||||
|  | ||||
|         def _add_ns(path): | ||||
|             return xpath_with_ns(path, self._NS_MAP) | ||||
|  | ||||
|         entries = [] | ||||
|         for item in channel.findall('item'): | ||||
|             guid = xpath_text(item, 'guid', fatal=True) | ||||
|             title = xpath_text(item, 'title', fatal=True) | ||||
|  | ||||
|             media_group = xpath_element(item, _add_ns('media:group'), fatal=True) | ||||
|             content = xpath_element(media_group, _add_ns('media:content'), fatal=True) | ||||
|             content_url = content.attrib['url'] | ||||
|  | ||||
|             thumbnails = [] | ||||
|             for thumbnail in media_group.findall(_add_ns('media:thumbnail')): | ||||
|                 thumbnail_url = thumbnail.get('url') | ||||
|                 if not thumbnail_url: | ||||
|                     continue | ||||
|                 thumbnails.append({ | ||||
|                     'id': thumbnail.get('profile'), | ||||
|                     'url': thumbnail_url, | ||||
|                     'width': int_or_none(thumbnail.get('width')), | ||||
|                     'height': int_or_none(thumbnail.get('height')), | ||||
|                 }) | ||||
|  | ||||
|             timestamp = None | ||||
|             release_date = find_xpath_attr( | ||||
|                 item, _add_ns('media:credit'), 'role', 'releaseDate') | ||||
|             if release_date is not None: | ||||
|                 timestamp = parse_iso8601(release_date.text) | ||||
|  | ||||
|             entries.append({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': content_url, | ||||
|                 'id': guid, | ||||
|                 'title': title, | ||||
|                 'description': xpath_text(item, 'description'), | ||||
|                 'timestamp': timestamp, | ||||
|                 'duration': int_or_none(content.get('duration')), | ||||
|                 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))), | ||||
|                 'episode': xpath_text(item, _add_ns('clearleap:episode')), | ||||
|                 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))), | ||||
|                 'series': xpath_text(item, _add_ns('clearleap:series')), | ||||
|                 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))), | ||||
|                 'thumbnails': thumbnails, | ||||
|                 'ie_key': 'CBCWatchVideo', | ||||
|             }) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, xpath_text(channel, 'guid'), | ||||
|             xpath_text(channel, 'title'), | ||||
|             xpath_text(channel, 'description')) | ||||
|  | ||||
|  | ||||
| class CBCWatchVideoIE(CBCWatchBaseIE): | ||||
|     IE_NAME = 'cbc.ca:watch:video' | ||||
|     _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         result = self._call_api(url, video_id) | ||||
|  | ||||
|         m3u8_url = xpath_text(result, 'url', fatal=True) | ||||
|         formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) | ||||
|         if len(formats) < 2: | ||||
|             formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') | ||||
|         # Despite metadata in m3u8 all video+audio formats are | ||||
|         # actually video-only (no audio) | ||||
|         for f in formats: | ||||
|             if f.get('acodec') != 'none' and f.get('vcodec') != 'none': | ||||
|                 f['acodec'] = 'none' | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': video_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         rss = xpath_element(result, 'rss') | ||||
|         if rss: | ||||
|             info.update(self._parse_rss_feed(rss)['entries'][0]) | ||||
|             del info['url'] | ||||
|             del info['_type'] | ||||
|             del info['ie_key'] | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class CBCWatchIE(CBCWatchBaseIE): | ||||
|     IE_NAME = 'cbc.ca:watch' | ||||
|     _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', | ||||
|         'info_dict': { | ||||
|             'id': '38e815a-009e3ab12e4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Customer (Dis)Service', | ||||
|             'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', | ||||
|             'upload_date': '20160219', | ||||
|             'timestamp': 1455840000, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|             'format': 'bestvideo', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to Canada', | ||||
|     }, { | ||||
|         'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', | ||||
|         'info_dict': { | ||||
|             'id': '1ed4b385-cd84-49cf-95f0-80f004680057', | ||||
|             'title': 'Arthur', | ||||
|             'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', | ||||
|         }, | ||||
|         'playlist_mincount': 30, | ||||
|         'skip': 'Geo-restricted to Canada', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         rss = self._call_api('web/browse/' + video_id, video_id) | ||||
|         return self._parse_rss_feed(rss) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from .theplatform import ThePlatformFeedIE | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     find_xpath_attr, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -17,19 +18,6 @@ class CBSBaseIE(ThePlatformFeedIE): | ||||
|             }] | ||||
|         } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] | ||||
|  | ||||
|     def _extract_video_info(self, filter_query, video_id): | ||||
|         return self._extract_feed_info( | ||||
|             'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { | ||||
|                 'series': entry.get('cbs$SeriesTitle'), | ||||
|                 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), | ||||
|                 'episode': entry.get('cbs$EpisodeTitle'), | ||||
|                 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), | ||||
|             }, { | ||||
|                 'StreamPack': { | ||||
|                     'manifest': 'm3u', | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|  | ||||
| class CBSIE(CBSBaseIE): | ||||
|     _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' | ||||
| @@ -38,7 +26,6 @@ class CBSIE(CBSBaseIE): | ||||
|         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', | ||||
|         'info_dict': { | ||||
|             'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', | ||||
|             'display_id': 'connect-chat-feat-garth-brooks', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Connect Chat feat. Garth Brooks', | ||||
|             'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', | ||||
| @@ -47,7 +34,10 @@ class CBSIE(CBSBaseIE): | ||||
|             'upload_date': '20131127', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', | ||||
| @@ -56,8 +46,31 @@ class CBSIE(CBSBaseIE): | ||||
|         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' | ||||
|  | ||||
|     def _extract_video_info(self, guid): | ||||
|         path = 'dJ5BDC/media/guid/2198311517/' + guid | ||||
|         smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path | ||||
|         formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) | ||||
|         for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): | ||||
|             try: | ||||
|                 tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) | ||||
|                 formats.extend(tp_formats) | ||||
|             except ExtractorError: | ||||
|                 continue | ||||
|         self._sort_formats(formats) | ||||
|         metadata = self._download_theplatform_metadata(path, guid) | ||||
|         info = self._parse_theplatform_metadata(metadata) | ||||
|         info.update({ | ||||
|             'id': guid, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'series': metadata.get('cbs$SeriesTitle'), | ||||
|             'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), | ||||
|             'episode': metadata.get('cbs$EpisodeTitle'), | ||||
|             'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         content_id = self._match_id(url) | ||||
|         return self._extract_video_info('byGuid=%s' % content_id, content_id) | ||||
|         return self._extract_video_info(content_id) | ||||
|   | ||||
| @@ -1,12 +1,10 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import datetime | ||||
|  | ||||
| from .anvato import AnvatoIE | ||||
| from .sendtonews import SendtoNewsIE | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import unified_timestamp | ||||
|  | ||||
|  | ||||
| class CBSLocalIE(AnvatoIE): | ||||
| @@ -43,13 +41,8 @@ class CBSLocalIE(AnvatoIE): | ||||
|         'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', | ||||
|         'info_dict': { | ||||
|             'id': 'GxfCe0Zo7D-175909-5588', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Recap: CLE 15, CIN 6', | ||||
|             'description': '5/16/16: Indians\' bats explode for 15 runs in a win', | ||||
|             'upload_date': '20160516', | ||||
|             'timestamp': 1463433840, | ||||
|             'duration': 49, | ||||
|         }, | ||||
|         'playlist_count': 9, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
| @@ -62,19 +55,15 @@ class CBSLocalIE(AnvatoIE): | ||||
|  | ||||
|         sendtonews_url = SendtoNewsIE._extract_url(webpage) | ||||
|         if sendtonews_url: | ||||
|             info_dict = { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': compat_urlparse.urljoin(url, sendtonews_url), | ||||
|             } | ||||
|         else: | ||||
|             return self.url_result( | ||||
|                 compat_urlparse.urljoin(url, sendtonews_url), | ||||
|                 ie=SendtoNewsIE.ie_key()) | ||||
|  | ||||
|         info_dict = self._extract_anvato_videos(webpage, display_id) | ||||
|  | ||||
|         time_str = self._html_search_regex( | ||||
|             r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) | ||||
|         timestamp = None | ||||
|         if time_str: | ||||
|             timestamp = calendar.timegm(datetime.datetime.strptime( | ||||
|                 time_str, '%b %d, %Y %I:%M %p').timetuple()) | ||||
|         timestamp = unified_timestamp(time_str) | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'display_id': display_id, | ||||
|   | ||||
| @@ -2,13 +2,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .cbs import CBSBaseIE | ||||
| from .cbs import CBSIE | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CBSNewsIE(CBSBaseIE): | ||||
| class CBSNewsIE(CBSIE): | ||||
|     IE_DESC = 'CBS News' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' | ||||
|  | ||||
| @@ -35,7 +35,8 @@ class CBSNewsIE(CBSBaseIE): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', | ||||
|                 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', | ||||
|                 'upload_date': '19700101', | ||||
|                 'upload_date': '20140404', | ||||
|                 'timestamp': 1396650660, | ||||
|                 'uploader': 'CBSI-NEW', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 205, | ||||
| @@ -63,14 +64,15 @@ class CBSNewsIE(CBSBaseIE): | ||||
|  | ||||
|         item = video_info['item'] if 'item' in video_info else video_info | ||||
|         guid = item['mpxRefId'] | ||||
|         return self._extract_video_info('byGuid=%s' % guid, guid) | ||||
|         return self._extract_video_info(guid) | ||||
|  | ||||
|  | ||||
| class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|     IE_DESC = 'CBS News Live Videos' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|     # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', | ||||
|         'info_dict': { | ||||
|             'id': 'clinton-sanders-prepare-to-face-off-in-nh', | ||||
| @@ -78,15 +80,8 @@ class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|             'title': 'Clinton, Sanders Prepare To Face Off In NH', | ||||
|             'duration': 334, | ||||
|         }, | ||||
|         'skip': 'Video gone, redirected to http://www.cbsnews.com/live/', | ||||
|     }, { | ||||
|         'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/', | ||||
|         'info_dict': { | ||||
|             'id': 'video-shows-intense-paragliding-accident', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Video Shows Intense Paragliding Accident', | ||||
|         }, | ||||
|     }] | ||||
|         'skip': 'Video gone', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -4,7 +4,7 @@ from .cbs import CBSBaseIE | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(CBSBaseIE): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', | ||||
| @@ -23,6 +23,9 @@ class CBSSportsIE(CBSBaseIE): | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _extract_video_info(self, filter_query, video_id): | ||||
|         return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_video_info('byId=%s' % video_id, video_id) | ||||
|   | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/cctv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/cctv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import float_or_none | ||||
|  | ||||
|  | ||||
| class CCTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:.+?\.)? | ||||
|         (?: | ||||
|             cctv\.(?:com|cn)| | ||||
|             cntv\.cn | ||||
|         )/ | ||||
|         (?: | ||||
|             video/[^/]+/(?P<id>[0-9a-f]{32})| | ||||
|             \d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+) | ||||
|         )''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', | ||||
|         'md5': '819c7b49fc3927d529fb4cd555621823', | ||||
|         'info_dict': { | ||||
|             'id': '454368eb19ad44a1925bf1eb96140a61', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if not video_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             video_id = self._search_regex( | ||||
|                 r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})', | ||||
|                 webpage, 'video_id') | ||||
|         api_data = self._download_json( | ||||
|             'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id) | ||||
|         m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url']) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': api_data['title'], | ||||
|             'formats': self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False), | ||||
|             'duration': float_or_none(api_data.get('video', {}).get('totalLength')), | ||||
|         } | ||||
| @@ -17,7 +17,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | ||||
|         'info_dict': { | ||||
|   | ||||
							
								
								
									
										51
									
								
								youtube_dl/extractor/charlierose.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								youtube_dl/extractor/charlierose.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_end | ||||
|  | ||||
|  | ||||
| class CharlieRoseIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://charlierose.com/videos/27996', | ||||
|         'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', | ||||
|         'info_dict': { | ||||
|             'id': '27996', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Remembering Zaha Hadid', | ||||
|             'thumbnail': 're:^https?://.*\.jpg\?\d+', | ||||
|             'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', | ||||
|             'subtitles': { | ||||
|                 'en': [{ | ||||
|                     'ext': 'vtt', | ||||
|                 }], | ||||
|             }, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://charlierose.com/videos/27996', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _PLAYER_BASE = 'https://charlierose.com/video/player/%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) | ||||
|  | ||||
|         title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') | ||||
|  | ||||
|         info_dict = self._parse_html5_media_entries( | ||||
|             self._PLAYER_BASE % video_id, webpage, video_id, | ||||
|             m3u8_entry_protocol='m3u8_native')[0] | ||||
|  | ||||
|         self._sort_formats(info_dict['formats']) | ||||
|         self._remove_duplicate_formats(info_dict['formats']) | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
| @@ -17,7 +17,8 @@ class ChaturbateIE(InfoExtractor): | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'Room is offline', | ||||
|     }, { | ||||
|         'url': 'https://en.chaturbate.com/siswet19/', | ||||
|         'only_matching': True, | ||||
|   | ||||
| @@ -1,30 +1,33 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import parse_duration | ||||
|  | ||||
|  | ||||
| class ChirbitIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://chirb.it/PrIPv5', | ||||
|         'md5': '9847b0dad6ac3e074568bf2cfb197de8', | ||||
|         'url': 'http://chirb.it/be2abG', | ||||
|         'info_dict': { | ||||
|             'id': 'PrIPv5', | ||||
|             'id': 'be2abG', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Фасадстрой', | ||||
|             'duration': 52, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|             'title': 'md5:f542ea253f5255240be4da375c6a5d7e', | ||||
|             'description': 'md5:f24a4e22a71763e32da5fed59e47c770', | ||||
|             'duration': 306, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/wp/MN58c2', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -33,33 +36,36 @@ class ChirbitIE(InfoExtractor): | ||||
|         webpage = self._download_webpage( | ||||
|             'http://chirb.it/%s' % audio_id, audio_id) | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') | ||||
|         data_fd = self._search_regex( | ||||
|             r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             webpage, 'data fd', group='url') | ||||
|  | ||||
|         # Reverse engineered from https://chirb.it/js/chirbit.player.js (look | ||||
|         # for soundURL) | ||||
|         audio_url = base64.b64decode( | ||||
|             data_fd[::-1].encode('ascii')).decode('utf-8') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'itemprop="name">([^<]+)', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'itemprop="playCount"\s*>(\d+)', webpage, | ||||
|             'listen count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>(\d+) Comments?:', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|             r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') | ||||
|         description = self._search_regex( | ||||
|             r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>', | ||||
|             webpage, 'description', default=None) | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'class=["\']c-length["\'][^>]*>([^<]+)', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ChirbitProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://chirbit.com/ScarletBeauty', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from ..utils import ExtractorError | ||||
|  | ||||
| class CMTIE(MTVIE): | ||||
|     IE_NAME = 'cmt.com' | ||||
|     _VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)' | ||||
|     _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|   | ||||
| @@ -3,15 +3,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     url_basename, | ||||
| ) | ||||
| from .turner import TurnerBaseIE | ||||
| from ..utils import url_basename | ||||
|  | ||||
|  | ||||
| class CNNIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ | ||||
| class CNNIE(TurnerBaseIE): | ||||
|     _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ | ||||
|         (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
| @@ -25,6 +22,7 @@ class CNNIE(InfoExtractor): | ||||
|             'duration': 135, | ||||
|             'upload_date': '20130609', | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|     }, { | ||||
|         'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', | ||||
|         'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', | ||||
| @@ -34,7 +32,8 @@ class CNNIE(InfoExtractor): | ||||
|             'title': "Student's epic speech stuns new freshmen", | ||||
|             'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", | ||||
|             'upload_date': '20130821', | ||||
|         } | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|     }, { | ||||
|         'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', | ||||
|         'md5': 'f14d02ebd264df951feb2400e2c25a1b', | ||||
| @@ -44,80 +43,61 @@ class CNNIE(InfoExtractor): | ||||
|             'title': 'Nashville Ep. 1: Hand crafted skateboards', | ||||
|             'description': 'md5:e7223a503315c9f150acac52e76de086', | ||||
|             'upload_date': '20141222', | ||||
|         } | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|     }, { | ||||
|         'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', | ||||
|         'md5': '52a515dc1b0f001cd82e4ceda32be9d1', | ||||
|         'info_dict': { | ||||
|             'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', | ||||
|             'ext': 'mp4', | ||||
|             'title': '5 stunning stats about Netflix', | ||||
|             'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', | ||||
|             'upload_date': '20160819', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _CONFIG = { | ||||
|         # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml | ||||
|         'edition': { | ||||
|             'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml', | ||||
|             'media_src': 'http://pmd.cdn.turner.com/cnn/big', | ||||
|         }, | ||||
|         # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml | ||||
|         'money': { | ||||
|             'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml', | ||||
|             'media_src': 'http://ht3.cdn.turner.com/money/big', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _extract_timestamp(self, video_data): | ||||
|         # TODO: fix timestamp extraction | ||||
|         return None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         path = mobj.group('path') | ||||
|         page_title = mobj.group('title') | ||||
|         info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path | ||||
|         info = self._download_xml(info_url, page_title) | ||||
|  | ||||
|         formats = [] | ||||
|         rex = re.compile(r'''(?x) | ||||
|             (?P<width>[0-9]+)x(?P<height>[0-9]+) | ||||
|             (?:_(?P<bitrate>[0-9]+)k)? | ||||
|         ''') | ||||
|         for f in info.findall('files/file'): | ||||
|             video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) | ||||
|             fdct = { | ||||
|                 'format_id': f.attrib['bitrate'], | ||||
|                 'url': video_url, | ||||
|             } | ||||
|  | ||||
|             mf = rex.match(f.attrib['bitrate']) | ||||
|             if mf: | ||||
|                 fdct['width'] = int(mf.group('width')) | ||||
|                 fdct['height'] = int(mf.group('height')) | ||||
|                 fdct['tbr'] = int_or_none(mf.group('bitrate')) | ||||
|             else: | ||||
|                 mf = rex.search(f.text) | ||||
|                 if mf: | ||||
|                     fdct['width'] = int(mf.group('width')) | ||||
|                     fdct['height'] = int(mf.group('height')) | ||||
|                     fdct['tbr'] = int_or_none(mf.group('bitrate')) | ||||
|                 else: | ||||
|                     mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) | ||||
|                     if mi: | ||||
|                         if mi.group(1) == 'audio': | ||||
|                             fdct['vcodec'] = 'none' | ||||
|                             fdct['ext'] = 'm4a' | ||||
|                         else: | ||||
|                             fdct['tbr'] = int(mi.group(1)) | ||||
|  | ||||
|             formats.append(fdct) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'height': int(t.attrib['height']), | ||||
|             'width': int(t.attrib['width']), | ||||
|             'url': t.text, | ||||
|         } for t in info.findall('images/image')] | ||||
|  | ||||
|         metas_el = info.find('metas') | ||||
|         upload_date = ( | ||||
|             metas_el.attrib.get('version') if metas_el is not None else None) | ||||
|  | ||||
|         duration_el = info.find('length') | ||||
|         duration = parse_duration(duration_el.text) | ||||
|  | ||||
|         return { | ||||
|             'id': info.attrib['id'], | ||||
|             'title': info.find('headline').text, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|             'description': info.find('description').text, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|         sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() | ||||
|         if sub_domain not in ('money', 'edition'): | ||||
|             sub_domain = 'edition' | ||||
|         config = self._CONFIG[sub_domain] | ||||
|         return self._extract_cvp_info( | ||||
|             config['data_src'] % path, page_title, { | ||||
|                 'default': { | ||||
|                     'media_src': config['media_src'], | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|  | ||||
| class CNNBlogsIE(InfoExtractor): | ||||
| @@ -132,6 +112,7 @@ class CNNBlogsIE(InfoExtractor): | ||||
|             'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', | ||||
|             'upload_date': '20140209', | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         'add_ie': ['CNN'], | ||||
|     } | ||||
|  | ||||
| @@ -146,7 +127,7 @@ class CNNBlogsIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class CNNArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)' | ||||
|     _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', | ||||
|         'md5': '689034c2a3d9c6dc4aa72d65a81efd01', | ||||
| @@ -154,9 +135,10 @@ class CNNArticleIE(InfoExtractor): | ||||
|             'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Obama: Cyberattack not an act of war', | ||||
|             'description': 'md5:51ce6750450603795cad0cdfbd7d05c5', | ||||
|             'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', | ||||
|             'upload_date': '20141221', | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         'add_ie': ['CNN'], | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
| @@ -96,3 +97,22 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor): | ||||
|             webpage, 'mrss url', group='url') | ||||
|  | ||||
|         return self._get_videos_info_from_url(mrss_url, video_id) | ||||
|  | ||||
|  | ||||
| class ComedyCentralShortnameIE(InfoExtractor): | ||||
|     _VALID_URL = r'^:(?P<id>tds|thedailyshow)$' | ||||
|     _TESTS = [{ | ||||
|         'url': ':tds', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': ':thedailyshow', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         shortcut_map = { | ||||
|             'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', | ||||
|             'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', | ||||
|         } | ||||
|         return self.url_result(shortcut_map[video_id]) | ||||
|   | ||||
| @@ -662,35 +662,48 @@ class InfoExtractor(object): | ||||
|         else: | ||||
|             return res | ||||
|  | ||||
|     def _get_login_info(self): | ||||
|     def _get_netrc_login_info(self, netrc_machine=None): | ||||
|         username = None | ||||
|         password = None | ||||
|         netrc_machine = netrc_machine or self._NETRC_MACHINE | ||||
|  | ||||
|         if self._downloader.params.get('usenetrc', False): | ||||
|             try: | ||||
|                 info = netrc.netrc().authenticators(netrc_machine) | ||||
|                 if info is not None: | ||||
|                     username = info[0] | ||||
|                     password = info[2] | ||||
|                 else: | ||||
|                     raise netrc.NetrcParseError( | ||||
|                         'No authenticators for %s' % netrc_machine) | ||||
|             except (IOError, netrc.NetrcParseError) as err: | ||||
|                 self._downloader.report_warning( | ||||
|                     'parsing .netrc: %s' % error_to_compat_str(err)) | ||||
|  | ||||
|         return username, password | ||||
|  | ||||
|     def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): | ||||
|         """ | ||||
|         Get the login info as (username, password) | ||||
|         It will look in the netrc file using the _NETRC_MACHINE value | ||||
|         First look for the manually specified credentials using username_option | ||||
|         and password_option as keys in params dictionary. If no such credentials | ||||
|         available look in the netrc file using the netrc_machine or _NETRC_MACHINE | ||||
|         value. | ||||
|         If there's no info available, return (None, None) | ||||
|         """ | ||||
|         if self._downloader is None: | ||||
|             return (None, None) | ||||
|  | ||||
|         username = None | ||||
|         password = None | ||||
|         downloader_params = self._downloader.params | ||||
|  | ||||
|         # Attempt to use provided username and password or .netrc data | ||||
|         if downloader_params.get('username') is not None: | ||||
|             username = downloader_params['username'] | ||||
|             password = downloader_params['password'] | ||||
|         elif downloader_params.get('usenetrc', False): | ||||
|             try: | ||||
|                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) | ||||
|                 if info is not None: | ||||
|                     username = info[0] | ||||
|                     password = info[2] | ||||
|         if downloader_params.get(username_option) is not None: | ||||
|             username = downloader_params[username_option] | ||||
|             password = downloader_params[password_option] | ||||
|         else: | ||||
|                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) | ||||
|             except (IOError, netrc.NetrcParseError) as err: | ||||
|                 self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) | ||||
|             username, password = self._get_netrc_login_info(netrc_machine) | ||||
|  | ||||
|         return (username, password) | ||||
|         return username, password | ||||
|  | ||||
|     def _get_tfa_info(self, note='two-factor verification code'): | ||||
|         """ | ||||
| @@ -727,9 +740,14 @@ class InfoExtractor(object): | ||||
|                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) | ||||
|  | ||||
|     def _og_search_property(self, prop, html, name=None, **kargs): | ||||
|         if not isinstance(prop, (list, tuple)): | ||||
|             prop = [prop] | ||||
|         if name is None: | ||||
|             name = 'OpenGraph %s' % prop | ||||
|         escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) | ||||
|             name = 'OpenGraph %s' % prop[0] | ||||
|         og_regexes = [] | ||||
|         for p in prop: | ||||
|             og_regexes.extend(self._og_regexes(p)) | ||||
|         escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs) | ||||
|         if escaped is None: | ||||
|             return None | ||||
|         return unescapeHTML(escaped) | ||||
| @@ -811,11 +829,14 @@ class InfoExtractor(object): | ||||
|         json_ld = self._search_regex( | ||||
|             r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>', | ||||
|             html, 'JSON-LD', group='json_ld', **kwargs) | ||||
|         default = kwargs.get('default', NO_DEFAULT) | ||||
|         if not json_ld: | ||||
|             return {} | ||||
|         return self._json_ld( | ||||
|             json_ld, video_id, fatal=kwargs.get('fatal', True), | ||||
|             expected_type=expected_type) | ||||
|             return default if default is not NO_DEFAULT else {} | ||||
|         # JSON-LD may be malformed and thus `fatal` should be respected. | ||||
|         # At the same time `default` may be passed that assumes `fatal=False` | ||||
|         # for _search_regex. Let's simulate the same behavior here as well. | ||||
|         fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False | ||||
|         return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type) | ||||
|  | ||||
|     def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): | ||||
|         if isinstance(json_ld, compat_str): | ||||
| @@ -823,57 +844,63 @@ class InfoExtractor(object): | ||||
|         if not json_ld: | ||||
|             return {} | ||||
|         info = {} | ||||
|         if json_ld.get('@context') == 'http://schema.org': | ||||
|             item_type = json_ld.get('@type') | ||||
|         if not isinstance(json_ld, (list, tuple, dict)): | ||||
|             return info | ||||
|         if isinstance(json_ld, dict): | ||||
|             json_ld = [json_ld] | ||||
|         for e in json_ld: | ||||
|             if e.get('@context') == 'http://schema.org': | ||||
|                 item_type = e.get('@type') | ||||
|                 if expected_type is not None and expected_type != item_type: | ||||
|                     return info | ||||
|                 if item_type == 'TVEpisode': | ||||
|                     info.update({ | ||||
|                     'episode': unescapeHTML(json_ld.get('name')), | ||||
|                     'episode_number': int_or_none(json_ld.get('episodeNumber')), | ||||
|                     'description': unescapeHTML(json_ld.get('description')), | ||||
|                         'episode': unescapeHTML(e.get('name')), | ||||
|                         'episode_number': int_or_none(e.get('episodeNumber')), | ||||
|                         'description': unescapeHTML(e.get('description')), | ||||
|                     }) | ||||
|                 part_of_season = json_ld.get('partOfSeason') | ||||
|                     part_of_season = e.get('partOfSeason') | ||||
|                     if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': | ||||
|                         info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) | ||||
|                 part_of_series = json_ld.get('partOfSeries') | ||||
|                     part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') | ||||
|                     if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': | ||||
|                         info['series'] = unescapeHTML(part_of_series.get('name')) | ||||
|                 elif item_type == 'Article': | ||||
|                     info.update({ | ||||
|                     'timestamp': parse_iso8601(json_ld.get('datePublished')), | ||||
|                     'title': unescapeHTML(json_ld.get('headline')), | ||||
|                     'description': unescapeHTML(json_ld.get('articleBody')), | ||||
|                         'timestamp': parse_iso8601(e.get('datePublished')), | ||||
|                         'title': unescapeHTML(e.get('headline')), | ||||
|                         'description': unescapeHTML(e.get('articleBody')), | ||||
|                     }) | ||||
|                 elif item_type == 'VideoObject': | ||||
|                     info.update({ | ||||
|                     'url': json_ld.get('contentUrl'), | ||||
|                     'title': unescapeHTML(json_ld.get('name')), | ||||
|                     'description': unescapeHTML(json_ld.get('description')), | ||||
|                     'thumbnail': json_ld.get('thumbnailUrl'), | ||||
|                     'duration': parse_duration(json_ld.get('duration')), | ||||
|                     'timestamp': unified_timestamp(json_ld.get('uploadDate')), | ||||
|                     'filesize': float_or_none(json_ld.get('contentSize')), | ||||
|                     'tbr': int_or_none(json_ld.get('bitrate')), | ||||
|                     'width': int_or_none(json_ld.get('width')), | ||||
|                     'height': int_or_none(json_ld.get('height')), | ||||
|                         'url': e.get('contentUrl'), | ||||
|                         'title': unescapeHTML(e.get('name')), | ||||
|                         'description': unescapeHTML(e.get('description')), | ||||
|                         'thumbnail': e.get('thumbnailUrl'), | ||||
|                         'duration': parse_duration(e.get('duration')), | ||||
|                         'timestamp': unified_timestamp(e.get('uploadDate')), | ||||
|                         'filesize': float_or_none(e.get('contentSize')), | ||||
|                         'tbr': int_or_none(e.get('bitrate')), | ||||
|                         'width': int_or_none(e.get('width')), | ||||
|                         'height': int_or_none(e.get('height')), | ||||
|                     }) | ||||
|                 break | ||||
|         return dict((k, v) for k, v in info.items() if v is not None) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _hidden_inputs(html): | ||||
|         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) | ||||
|         hidden_inputs = {} | ||||
|         for input in re.findall(r'(?i)<input([^>]+)>', html): | ||||
|             if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): | ||||
|         for input in re.findall(r'(?i)(<input[^>]+>)', html): | ||||
|             attrs = extract_attributes(input) | ||||
|             if not input: | ||||
|                 continue | ||||
|             name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input) | ||||
|             if not name: | ||||
|             if attrs.get('type') not in ('hidden', 'submit'): | ||||
|                 continue | ||||
|             value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) | ||||
|             if not value: | ||||
|                 continue | ||||
|             hidden_inputs[name.group('value')] = value.group('value') | ||||
|             name = attrs.get('name') or attrs.get('id') | ||||
|             value = attrs.get('value') | ||||
|             if name and value is not None: | ||||
|                 hidden_inputs[name] = value | ||||
|         return hidden_inputs | ||||
|  | ||||
|     def _form_hidden_inputs(self, form_id, html): | ||||
| @@ -911,7 +938,8 @@ class InfoExtractor(object): | ||||
|                 if f.get('ext') in ['f4f', 'f4m']:  # Not yet supported | ||||
|                     preference -= 0.5 | ||||
|  | ||||
|             proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1 | ||||
|             protocol = f.get('protocol') or determine_protocol(f) | ||||
|             proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1) | ||||
|  | ||||
|             if f.get('vcodec') == 'none':  # audio only | ||||
|                 preference -= 50 | ||||
| @@ -1128,7 +1156,7 @@ class InfoExtractor(object): | ||||
|             'url': m3u8_url, | ||||
|             'ext': ext, | ||||
|             'protocol': 'm3u8', | ||||
|             'preference': preference - 1 if preference else -1, | ||||
|             'preference': preference - 100 if preference else -100, | ||||
|             'resolution': 'multiple', | ||||
|             'format_note': 'Quality selection URL', | ||||
|         } | ||||
| @@ -1138,13 +1166,6 @@ class InfoExtractor(object): | ||||
|                               m3u8_id=None, note=None, errnote=None, | ||||
|                               fatal=True, live=False): | ||||
|  | ||||
|         formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] | ||||
|  | ||||
|         format_url = lambda u: ( | ||||
|             u | ||||
|             if re.match(r'^https?://', u) | ||||
|             else compat_urlparse.urljoin(m3u8_url, u)) | ||||
|  | ||||
|         res = self._download_webpage_handle( | ||||
|             m3u8_url, video_id, | ||||
|             note=note or 'Downloading m3u8 information', | ||||
| @@ -1155,6 +1176,13 @@ class InfoExtractor(object): | ||||
|         m3u8_doc, urlh = res | ||||
|         m3u8_url = urlh.geturl() | ||||
|  | ||||
|         formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] | ||||
|  | ||||
|         format_url = lambda u: ( | ||||
|             u | ||||
|             if re.match(r'^https?://', u) | ||||
|             else compat_urlparse.urljoin(m3u8_url, u)) | ||||
|  | ||||
|         # We should try extracting formats only from master playlists [1], i.e. | ||||
|         # playlists that describe available qualities. On the other hand media | ||||
|         # playlists [2] should be returned as is since they contain just the media | ||||
| @@ -1176,27 +1204,44 @@ class InfoExtractor(object): | ||||
|                 'protocol': entry_protocol, | ||||
|                 'preference': preference, | ||||
|             }] | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         last_info = {} | ||||
|         last_media = {} | ||||
|         for line in m3u8_doc.splitlines(): | ||||
|             if line.startswith('#EXT-X-STREAM-INF:'): | ||||
|                 last_info = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = parse_m3u8_attributes(line) | ||||
|                 media = parse_m3u8_attributes(line) | ||||
|                 media_type = media.get('TYPE') | ||||
|                 if media_type in ('VIDEO', 'AUDIO'): | ||||
|                     media_url = media.get('URI') | ||||
|                     if media_url: | ||||
|                         format_id = [] | ||||
|                         for v in (media.get('GROUP-ID'), media.get('NAME')): | ||||
|                             if v: | ||||
|                                 format_id.append(v) | ||||
|                         formats.append({ | ||||
|                             'format_id': '-'.join(format_id), | ||||
|                             'url': format_url(media_url), | ||||
|                             'language': media.get('LANGUAGE'), | ||||
|                             'vcodec': 'none' if media_type == 'AUDIO' else None, | ||||
|                             'ext': ext, | ||||
|                             'protocol': entry_protocol, | ||||
|                             'preference': preference, | ||||
|                         }) | ||||
|                     else: | ||||
|                         # When there is no URI in EXT-X-MEDIA let this tag's | ||||
|                         # data be used by regular URI lines below | ||||
|                         last_media = media | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
|                 if last_info is None: | ||||
|                     formats.append({'url': format_url(line)}) | ||||
|                     continue | ||||
|                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) | ||||
|                 tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) | ||||
|                 format_id = [] | ||||
|                 if m3u8_id: | ||||
|                     format_id.append(m3u8_id) | ||||
|                 last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None | ||||
|                 # Despite specification does not mention NAME attribute for | ||||
|                 # EXT-X-STREAM-INF it still sometimes may be present | ||||
|                 stream_name = last_info.get('NAME') or last_media_name | ||||
|                 stream_name = last_info.get('NAME') or last_media.get('NAME') | ||||
|                 # Bandwidth of live streams may differ over time thus making | ||||
|                 # format_id unpredictable. So it's better to keep provided | ||||
|                 # format_id intact. | ||||
| @@ -1227,11 +1272,9 @@ class InfoExtractor(object): | ||||
|                         'abr': abr, | ||||
|                     }) | ||||
|                 f.update(parse_codecs(last_info.get('CODECS'))) | ||||
|                 if last_media is not None: | ||||
|                     f['m3u8_media'] = last_media | ||||
|                     last_media = None | ||||
|                 formats.append(f) | ||||
|                 last_info = {} | ||||
|                 last_media = {} | ||||
|         return formats | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -1670,7 +1713,7 @@ class InfoExtractor(object): | ||||
|                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) | ||||
|         return formats | ||||
|  | ||||
|     def _parse_html5_media_entries(self, base_url, webpage): | ||||
|     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): | ||||
|         def absolute_url(video_url): | ||||
|             return compat_urlparse.urljoin(base_url, video_url) | ||||
|  | ||||
| @@ -1685,6 +1728,21 @@ class InfoExtractor(object): | ||||
|                 return f | ||||
|             return {} | ||||
|  | ||||
|         def _media_formats(src, cur_media_type): | ||||
|             full_url = absolute_url(src) | ||||
|             if determine_ext(full_url) == 'm3u8': | ||||
|                 is_plain_url = False | ||||
|                 formats = self._extract_m3u8_formats( | ||||
|                     full_url, video_id, ext='mp4', | ||||
|                     entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) | ||||
|             else: | ||||
|                 is_plain_url = True | ||||
|                 formats = [{ | ||||
|                     'url': full_url, | ||||
|                     'vcodec': 'none' if cur_media_type == 'audio' else None, | ||||
|                 }] | ||||
|             return is_plain_url, formats | ||||
|  | ||||
|         entries = [] | ||||
|         for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage): | ||||
|             media_info = { | ||||
| @@ -1694,10 +1752,8 @@ class InfoExtractor(object): | ||||
|             media_attributes = extract_attributes(media_tag) | ||||
|             src = media_attributes.get('src') | ||||
|             if src: | ||||
|                 media_info['formats'].append({ | ||||
|                     'url': absolute_url(src), | ||||
|                     'vcodec': 'none' if media_type == 'audio' else None, | ||||
|                 }) | ||||
|                 _, formats = _media_formats(src, media_type) | ||||
|                 media_info['formats'].extend(formats) | ||||
|             media_info['thumbnail'] = media_attributes.get('poster') | ||||
|             if media_content: | ||||
|                 for source_tag in re.findall(r'<source[^>]+>', media_content): | ||||
| @@ -1705,12 +1761,13 @@ class InfoExtractor(object): | ||||
|                     src = source_attributes.get('src') | ||||
|                     if not src: | ||||
|                         continue | ||||
|                     is_plain_url, formats = _media_formats(src, media_type) | ||||
|                     if is_plain_url: | ||||
|                         f = parse_content_type(source_attributes.get('type')) | ||||
|                     f.update({ | ||||
|                         'url': absolute_url(src), | ||||
|                         'vcodec': 'none' if media_type == 'audio' else None, | ||||
|                     }) | ||||
|                         f.update(formats[0]) | ||||
|                         media_info['formats'].append(f) | ||||
|                     else: | ||||
|                         media_info['formats'].extend(formats) | ||||
|                 for track_tag in re.findall(r'<track[^>]+>', media_content): | ||||
|                     track_attributes = extract_attributes(track_tag) | ||||
|                     kind = track_attributes.get('kind') | ||||
| @@ -1726,6 +1783,18 @@ class InfoExtractor(object): | ||||
|                 entries.append(media_info) | ||||
|         return entries | ||||
|  | ||||
|     def _extract_akamai_formats(self, manifest_url, video_id): | ||||
|         formats = [] | ||||
|         f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') | ||||
|         formats.extend(self._extract_f4m_formats( | ||||
|             update_url_query(f4m_url, {'hdcore': '3.7.0'}), | ||||
|             video_id, f4m_id='hds', fatal=False)) | ||||
|         m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|             m3u8_id='hls', fatal=False)) | ||||
|         return formats | ||||
|  | ||||
|     def _live_title(self, name): | ||||
|         """ Generate the title for a live video """ | ||||
|         now = datetime.datetime.now() | ||||
| @@ -1786,7 +1855,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|         any_restricted = False | ||||
|         for tc in self.get_testcases(include_onlymatching=False): | ||||
|             if 'playlist' in tc: | ||||
|             if tc.get('playlist', []): | ||||
|                 tc = tc['playlist'][0] | ||||
|             is_restricted = age_restricted( | ||||
|                 tc.get('info_dict', {}).get('age_limit'), age_limit) | ||||
|   | ||||
| @@ -5,13 +5,17 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     orderedSet, | ||||
|     remove_end, | ||||
|     extract_attributes, | ||||
|     mimetype2ext, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': '3D Printed Speakers Lit With LED', | ||||
|             'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', | ||||
|             'uploader': 'wired', | ||||
|             'upload_date': '20130314', | ||||
|             'timestamp': 1363219200, | ||||
|         } | ||||
|     }, { | ||||
|         # JS embed | ||||
| @@ -67,70 +74,93 @@ class CondeNastIE(InfoExtractor): | ||||
|             'id': '55f9cf8b61646d1acf00000c', | ||||
|             'ext': 'mp4', | ||||
|             'title': '3D printed TSA Travel Sentry keys really do open TSA locks', | ||||
|             'uploader': 'arstechnica', | ||||
|             'upload_date': '20150916', | ||||
|             'timestamp': 1442434955, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _extract_series(self, url, webpage): | ||||
|         title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>', | ||||
|                                         webpage, 'series title', flags=re.DOTALL) | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>', | ||||
|             webpage, 'series title') | ||||
|         url_object = compat_urllib_parse_urlparse(url) | ||||
|         base_url = '%s://%s' % (url_object.scheme, url_object.netloc) | ||||
|         m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', | ||||
|                               webpage, flags=re.DOTALL) | ||||
|         m_paths = re.finditer( | ||||
|             r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) | ||||
|         paths = orderedSet(m.group(1) for m in m_paths) | ||||
|         build_url = lambda path: compat_urlparse.urljoin(base_url, path) | ||||
|         entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] | ||||
|         return self.playlist_result(entries, playlist_title=title) | ||||
|  | ||||
|     def _extract_video(self, webpage, url_type): | ||||
|         if url_type != 'embed': | ||||
|             description = self._html_search_regex( | ||||
|                 [ | ||||
|                     r'<div class="cne-video-description">(.+?)</div>', | ||||
|                     r'<div class="video-post-content">(.+?)</div>', | ||||
|                 ], | ||||
|                 webpage, 'description', fatal=False, flags=re.DOTALL) | ||||
|         else: | ||||
|             description = None | ||||
|         params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, | ||||
|                                     'player params', flags=re.DOTALL) | ||||
|         video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') | ||||
|         player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') | ||||
|         target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') | ||||
|         data = compat_urllib_parse_urlencode({'videoId': video_id, | ||||
|                                               'playerId': player_id, | ||||
|                                               'target': target, | ||||
|         query = {} | ||||
|         params = self._search_regex( | ||||
|             r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None) | ||||
|         if params: | ||||
|             query.update({ | ||||
|                 'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'), | ||||
|                 'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'), | ||||
|                 'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'), | ||||
|             }) | ||||
|         base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]', | ||||
|                                            webpage, 'base info url', | ||||
|                                            default='http://player.cnevids.com/player/loader.js?') | ||||
|         info_url = base_info_url + data | ||||
|         info_page = self._download_webpage(info_url, video_id, | ||||
|                                            'Downloading video info') | ||||
|         video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info') | ||||
|         video_info = self._parse_json(video_info, video_id) | ||||
|         else: | ||||
|             params = extract_attributes(self._search_regex( | ||||
|                 r'(<[^>]+data-js="video-player"[^>]+>)', | ||||
|                 webpage, 'player params element')) | ||||
|             query.update({ | ||||
|                 'videoId': params['data-video'], | ||||
|                 'playerId': params['data-player'], | ||||
|                 'target': params['id'], | ||||
|             }) | ||||
|         video_id = query['videoId'] | ||||
|         video_info = None | ||||
|         info_page = self._download_webpage( | ||||
|             'http://player.cnevids.com/player/video.js', | ||||
|             video_id, 'Downloading video info', query=query, fatal=False) | ||||
|         if info_page: | ||||
|             video_info = self._parse_json(self._search_regex( | ||||
|                 r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] | ||||
|         else: | ||||
|             info_page = self._download_webpage( | ||||
|                 'http://player.cnevids.com/player/loader.js', | ||||
|                 video_id, 'Downloading loader info', query=query) | ||||
|             video_info = self._parse_json(self._search_regex( | ||||
|                 r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id) | ||||
|         title = video_info['title'] | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), | ||||
|             'url': fdata['src'], | ||||
|             'ext': fdata['type'].split('/')[-1], | ||||
|             'quality': 1 if fdata['quality'] == 'high' else 0, | ||||
|         } for fdata in video_info['sources'][0]] | ||||
|         formats = [] | ||||
|         for fdata in video_info.get('sources', [{}])[0]: | ||||
|             src = fdata.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             ext = mimetype2ext(fdata.get('type')) or determine_ext(src) | ||||
|             quality = fdata.get('quality') | ||||
|             formats.append({ | ||||
|                 'format_id': ext + ('-%s' % quality if quality else ''), | ||||
|                 'url': src, | ||||
|                 'ext': ext, | ||||
|                 'quality': 1 if quality == 'high' else 0, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|         info = self._search_json_ld( | ||||
|             webpage, video_id, fatal=False) if url_type != 'embed' else {} | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': video_info['title'], | ||||
|             'thumbnail': video_info['poster_frame'], | ||||
|             'description': description, | ||||
|         } | ||||
|             'title': title, | ||||
|             'thumbnail': video_info.get('poster_frame'), | ||||
|             'uploader': video_info.get('brand'), | ||||
|             'duration': int_or_none(video_info.get('duration')), | ||||
|             'tags': video_info.get('tags'), | ||||
|             'series': video_info.get('series_title'), | ||||
|             'season': video_info.get('season_title'), | ||||
|             'timestamp': parse_iso8601(video_info.get('premiere_date')), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site = mobj.group('site') | ||||
|         url_type = mobj.group('type') | ||||
|         item_id = mobj.group('id') | ||||
|         site, url_type, item_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         # Convert JS embed to regular embed | ||||
|         if url_type == 'embedjs': | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import unicode_literals, division | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
| @@ -8,12 +8,22 @@ from ..utils import int_or_none | ||||
| class CrackleIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.crackle.com/the-art-of-more/2496419', | ||||
|         'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', | ||||
|         'info_dict': { | ||||
|             'id': '2496419', | ||||
|             'id': '2498934', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Heavy Lies the Head', | ||||
|             'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', | ||||
|             'title': 'Everybody Respects A Bloody Nose', | ||||
|             'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'duration': 906, | ||||
|             'series': 'Comedians In Cars Getting Coffee', | ||||
|             'season_number': 8, | ||||
|             'episode_number': 4, | ||||
|             'subtitles': { | ||||
|                 'en-US': [{ | ||||
|                     'ext': 'ttml', | ||||
|                 }] | ||||
|             }, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
| @@ -21,12 +31,8 @@ class CrackleIE(InfoExtractor): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx | ||||
|     _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' | ||||
|     _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' | ||||
|     _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' | ||||
|  | ||||
|     # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx | ||||
|     _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' | ||||
|     _MEDIA_FILE_SLOTS = { | ||||
|         'c544.flv': { | ||||
|             'width': 544, | ||||
| @@ -48,16 +54,21 @@ class CrackleIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         config_doc = self._download_xml( | ||||
|             'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', | ||||
|             video_id, 'Downloading config') | ||||
|  | ||||
|         item = self._download_xml( | ||||
|             'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, | ||||
|             video_id).find('i') | ||||
|         title = item.attrib['t'] | ||||
|  | ||||
|         thumbnail = None | ||||
|         subtitles = {} | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), | ||||
|             'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), | ||||
|             video_id, 'mp4', m3u8_id='hls', fatal=None) | ||||
|         thumbnail = None | ||||
|         path = item.attrib.get('p') | ||||
|         if path: | ||||
|             thumbnail = self._THUMBNAIL_TEMPLATE % path | ||||
| @@ -76,7 +87,7 @@ class CrackleIE(InfoExtractor): | ||||
|                     if locale not in subtitles: | ||||
|                         subtitles[locale] = [] | ||||
|                     subtitles[locale] = [{ | ||||
|                         'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), | ||||
|                         'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v), | ||||
|                         'ext': 'ttml', | ||||
|                     }] | ||||
|         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) | ||||
| @@ -85,7 +96,7 @@ class CrackleIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': item.attrib.get('d'), | ||||
|             'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, | ||||
|             'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, | ||||
|             'series': item.attrib.get('sn'), | ||||
|             'season_number': int_or_none(item.attrib.get('se')), | ||||
|             'episode_number': int_or_none(item.attrib.get('ep')), | ||||
|   | ||||
| @@ -7,7 +7,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CriterionIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+' | ||||
|     _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.criterion.com/films/184-le-samourai', | ||||
|         'md5': 'bc51beba55685509883a9a7830919ec3', | ||||
|   | ||||
| @@ -34,22 +34,51 @@ from ..aes import ( | ||||
|  | ||||
|  | ||||
| class CrunchyrollBaseIE(InfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.crunchyroll.com/login' | ||||
|     _LOGIN_FORM = 'login_form' | ||||
|     _NETRC_MACHINE = 'crunchyroll' | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|         self.report_login() | ||||
|         login_url = 'https://www.crunchyroll.com/?a=formhandler' | ||||
|         data = urlencode_postdata({ | ||||
|             'formname': 'RpcApiUser_Login', | ||||
|             'name': username, | ||||
|             'password': password, | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, 'Downloading login page') | ||||
|  | ||||
|         login_form_str = self._search_regex( | ||||
|             r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, | ||||
|             login_page, 'login form', group='form') | ||||
|  | ||||
|         post_url = extract_attributes(login_form_str).get('action') | ||||
|         if not post_url: | ||||
|             post_url = self._LOGIN_URL | ||||
|         elif not post_url.startswith('http'): | ||||
|             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) | ||||
|  | ||||
|         login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page) | ||||
|  | ||||
|         login_form.update({ | ||||
|             'login_form[name]': username, | ||||
|             'login_form[password]': password, | ||||
|         }) | ||||
|         login_request = sanitized_Request(login_url, data) | ||||
|         login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         self._download_webpage(login_request, None, False, 'Wrong login info') | ||||
|  | ||||
|         response = self._download_webpage( | ||||
|             post_url, None, 'Logging in', 'Wrong login info', | ||||
|             data=urlencode_postdata(login_form), | ||||
|             headers={'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|  | ||||
|         # Successful login | ||||
|         if '<title>Redirecting' in response: | ||||
|             return | ||||
|  | ||||
|         error = self._html_search_regex( | ||||
|             '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>', | ||||
|             response, 'error message', default=None) | ||||
|         if error: | ||||
|             raise ExtractorError('Unable to login: %s' % error, expected=True) | ||||
|  | ||||
|         raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
| @@ -114,6 +143,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): | ||||
|             # rtmp | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409', | ||||
|         'info_dict': { | ||||
|             'id': '702409', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant', | ||||
|             'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': 'TV TOKYO', | ||||
|             'upload_date': '20160508', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', | ||||
|         'only_matching': True, | ||||
| @@ -336,9 +380,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             if video_encode_id in video_encode_ids: | ||||
|                 continue | ||||
|             video_encode_ids.append(video_encode_id) | ||||
|  | ||||
|             video_file = xpath_text(stream_info, './file') | ||||
|             if not video_file: | ||||
|                 continue | ||||
|             if video_file.startswith('http'): | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_file, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 continue | ||||
|  | ||||
|             video_url = xpath_text(stream_info, './host') | ||||
|             video_play_path = xpath_text(stream_info, './file') | ||||
|             if not video_url or not video_play_path: | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             metadata = stream_info.find('./metadata') | ||||
|             format_info = { | ||||
| @@ -353,7 +406,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                 parsed_video_url = compat_urlparse.urlparse(video_url) | ||||
|                 direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( | ||||
|                     netloc='v.lvlt.crcdn.net', | ||||
|                     path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1]))) | ||||
|                     path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) | ||||
|                 if self._is_valid_url(direct_video_url, video_id, video_format): | ||||
|                     format_info.update({ | ||||
|                         'url': direct_video_url, | ||||
| @@ -363,7 +416,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|  | ||||
|             format_info.update({ | ||||
|                 'url': video_url, | ||||
|                 'play_path': video_play_path, | ||||
|                 'play_path': video_file, | ||||
|                 'ext': 'flv', | ||||
|             }) | ||||
|             formats.append(format_info) | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_iso8601, ExtractorError | ||||
| from ..utils import unified_timestamp | ||||
|  | ||||
|  | ||||
| class CtsNewsIE(InfoExtractor): | ||||
|     IE_DESC = '華視新聞' | ||||
|     # https connection failed (Connection reset) | ||||
|     _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', | ||||
| @@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor): | ||||
|             'id': '201501291578109', | ||||
|             'ext': 'mp4', | ||||
|             'title': '以色列.真主黨交火 3人死亡', | ||||
|             'description': 'md5:95e9b295c898b7ff294f09d450178d7d', | ||||
|             'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...', | ||||
|             'timestamp': 1422528540, | ||||
|             'upload_date': '20150129', | ||||
|         } | ||||
| @@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor): | ||||
|             'id': '201309031304098', | ||||
|             'ext': 'mp4', | ||||
|             'title': '韓國31歲童顏男 貌如十多歲小孩', | ||||
|             'description': 'md5:f183feeba3752b683827aab71adad584', | ||||
|             'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1378205880, | ||||
|             'upload_date': '20130903', | ||||
| @@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor): | ||||
|     }, { | ||||
|         # With Youtube embedded video | ||||
|         'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html', | ||||
|         'md5': '1d842c771dc94c8c3bca5af2cc1db9c5', | ||||
|         'add_ie': ['Youtube'], | ||||
|         'md5': 'e4726b2ccd70ba2c319865e28f0a91d1', | ||||
|         'info_dict': { | ||||
|             'id': 'OVbfO7d0_hQ', | ||||
|             'ext': 'mp4', | ||||
| @@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor): | ||||
|             'upload_date': '20150128', | ||||
|             'uploader_id': 'TBSCTS', | ||||
|             'uploader': '中華電視公司', | ||||
|         } | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         news_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, news_id) | ||||
|  | ||||
|         if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None): | ||||
|             feed_url = self._html_search_regex( | ||||
|                 r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)', | ||||
|                 page, 'feed url') | ||||
|             video_url = self._download_webpage( | ||||
|                 feed_url, news_id, note='Fetching feed') | ||||
|         news_id = self._hidden_inputs(page).get('get_id') | ||||
|  | ||||
|         if news_id: | ||||
|             mp4_feed = self._download_json( | ||||
|                 'http://news.cts.com.tw/action/test_mp4feed.php', | ||||
|                 news_id, note='Fetching feed', query={'news_id': news_id}) | ||||
|             video_url = mp4_feed['source_url'] | ||||
|         else: | ||||
|             self.to_screen('Not CTSPlayer video, trying Youtube...') | ||||
|             youtube_url = self._search_regex( | ||||
|                 r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url', | ||||
|                 default=None) | ||||
|             if not youtube_url: | ||||
|                 raise ExtractorError('The news includes no videos!', expected=True) | ||||
|                 r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url') | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': youtube_url, | ||||
|                 'ie_key': 'Youtube', | ||||
|             } | ||||
|             return self.url_result(youtube_url, ie='Youtube') | ||||
|  | ||||
|         description = self._html_search_meta('description', page) | ||||
|         title = self._html_search_meta('title', page) | ||||
|         title = self._html_search_meta('title', page, fatal=True) | ||||
|         thumbnail = self._html_search_meta('image', page) | ||||
|  | ||||
|         datetime_str = self._html_search_regex( | ||||
|             r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time') | ||||
|         # Transform into ISO 8601 format with timezone info | ||||
|         datetime_str = datetime_str.replace('/', '-') + ':00+0800' | ||||
|         timestamp = parse_iso8601(datetime_str, delimiter=' ') | ||||
|             r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False) | ||||
|         timestamp = None | ||||
|         if datetime_str: | ||||
|             timestamp = unified_timestamp(datetime_str) - 8 * 3600 | ||||
|  | ||||
|         return { | ||||
|             'id': news_id, | ||||
|   | ||||
| @@ -1,30 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctv.ca/video/player?vid=706966', | ||||
|         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', | ||||
|         'info_dict': { | ||||
|             'id': '706966', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'', | ||||
|             'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.', | ||||
|             'upload_date': '20150919', | ||||
|             'timestamp': 1442624700, | ||||
|         }, | ||||
|         'expected_warnings': ['HTTP Error 404'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': '9c9media:ctv_web:%s' % video_id, | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
| @@ -1,9 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     HEADRequest, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CultureUnpluggedIE(InfoExtractor): | ||||
| @@ -32,6 +36,9 @@ class CultureUnpluggedIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request | ||||
|         self._request_webpage(HEADRequest( | ||||
|             'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id) | ||||
|         movie_data = self._download_json( | ||||
|             'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id) | ||||
|  | ||||
|   | ||||
							
								
								
									
										120
									
								
								youtube_dl/extractor/curiositystream.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								youtube_dl/extractor/curiositystream.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,120 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     urlencode_postdata, | ||||
|     compat_str, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CuriosityStreamBaseIE(InfoExtractor): | ||||
|     _NETRC_MACHINE = 'curiositystream' | ||||
|     _auth_token = None | ||||
|     _API_BASE_URL = 'https://api.curiositystream.com/v1/' | ||||
|  | ||||
|     def _handle_errors(self, result): | ||||
|         error = result.get('error', {}).get('message') | ||||
|         if error: | ||||
|             if isinstance(error, dict): | ||||
|                 error = ', '.join(error.values()) | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|  | ||||
|     def _call_api(self, path, video_id): | ||||
|         headers = {} | ||||
|         if self._auth_token: | ||||
|             headers['X-Auth-Token'] = self._auth_token | ||||
|         result = self._download_json( | ||||
|             self._API_BASE_URL + path, video_id, headers=headers) | ||||
|         self._handle_errors(result) | ||||
|         return result['data'] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         (email, password) = self._get_login_info() | ||||
|         if email is None: | ||||
|             return | ||||
|         result = self._download_json( | ||||
|             self._API_BASE_URL + 'login', None, data=urlencode_postdata({ | ||||
|                 'email': email, | ||||
|                 'password': password, | ||||
|             })) | ||||
|         self._handle_errors(result) | ||||
|         self._auth_token = result['message']['auth_token'] | ||||
|  | ||||
|     def _extract_media_info(self, media): | ||||
|         video_id = compat_str(media['id']) | ||||
|         limelight_media_id = media['limelight_media_id'] | ||||
|         title = media['title'] | ||||
|  | ||||
|         subtitles = {} | ||||
|         for closed_caption in media.get('closed_captions', []): | ||||
|             sub_url = closed_caption.get('file') | ||||
|             if not sub_url: | ||||
|                 continue | ||||
|             lang = closed_caption.get('code') or closed_caption.get('language') or 'en' | ||||
|             subtitles.setdefault(lang, []).append({ | ||||
|                 'url': sub_url, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': 'limelight:media:' + limelight_media_id, | ||||
|             'title': title, | ||||
|             'description': media.get('description'), | ||||
|             'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), | ||||
|             'duration': int_or_none(media.get('duration')), | ||||
|             'tags': media.get('tags'), | ||||
|             'subtitles': subtitles, | ||||
|             'ie_key': 'LimelightMedia', | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CuriosityStreamIE(CuriosityStreamBaseIE): | ||||
|     IE_NAME = 'curiositystream' | ||||
|     _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://app.curiositystream.com/video/2', | ||||
|         'md5': 'a0074c190e6cddaf86900b28d3e9ee7a', | ||||
|         'info_dict': { | ||||
|             'id': '2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'How Did You Develop The Internet?', | ||||
|             'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', | ||||
|             'timestamp': 1448388615, | ||||
|             'upload_date': '20151124', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         media = self._call_api('media/' + video_id, video_id) | ||||
|         return self._extract_media_info(media) | ||||
|  | ||||
|  | ||||
| class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): | ||||
|     IE_NAME = 'curiositystream:collection' | ||||
|     _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://app.curiositystream.com/collection/2', | ||||
|         'info_dict': { | ||||
|             'id': '2', | ||||
|             'title': 'Curious Minds: The Internet', | ||||
|             'description': 'How is the internet shaping our lives in the 21st Century?', | ||||
|         }, | ||||
|         'playlist_mincount': 17, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         collection_id = self._match_id(url) | ||||
|         collection = self._call_api( | ||||
|             'collections/' + collection_id, collection_id) | ||||
|         entries = [] | ||||
|         for media in collection.get('media', []): | ||||
|             entries.append(self._extract_media_info(media)) | ||||
|         return self.playlist_result( | ||||
|             entries, collection_id, | ||||
|             collection.get('title'), collection.get('description')) | ||||
| @@ -9,7 +9,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CWTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' | ||||
|     _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', | ||||
|         'info_dict': { | ||||
| @@ -28,7 +28,8 @@ class CWTVIE(InfoExtractor): | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'redirect to http://cwtv.com/shows/arrow/', | ||||
|     }, { | ||||
|         'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088', | ||||
|         'info_dict': { | ||||
| @@ -44,22 +45,43 @@ class CWTVIE(InfoExtractor): | ||||
|             'upload_date': '20151006', | ||||
|             'timestamp': 1444107300, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_data = self._download_json( | ||||
|             'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id) | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             video_data['videos']['variantplaylist']['uri'], video_id, 'mp4') | ||||
|         video_data = None | ||||
|         formats = [] | ||||
|         for partner in (154, 213): | ||||
|             vdata = self._download_json( | ||||
|                 'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False) | ||||
|             if not vdata: | ||||
|                 continue | ||||
|             video_data = vdata | ||||
|             for quality, quality_data in vdata.get('videos', {}).items(): | ||||
|                 quality_url = quality_data.get('uri') | ||||
|                 if not quality_url: | ||||
|                     continue | ||||
|                 if quality == 'variantplaylist': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|                 else: | ||||
|                     tbr = int_or_none(quality_data.get('bitrate')) | ||||
|                     format_id = 'http' + ('-%d' % tbr if tbr else '') | ||||
|                     if self._is_valid_url(quality_url, video_id, format_id): | ||||
|                         formats.append({ | ||||
|                             'format_id': format_id, | ||||
|                             'url': quality_url, | ||||
|                             'tbr': tbr, | ||||
|                         }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|   | ||||
| @@ -331,7 +331,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|  | ||||
|             for video_id in re.findall(r'data-xid="(.+?)"', webpage): | ||||
|                 if video_id not in video_ids: | ||||
|                     yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | ||||
|                     yield self.url_result( | ||||
|                         'http://www.dailymotion.com/video/%s' % video_id, | ||||
|                         DailymotionIE.ie_key(), video_id) | ||||
|                     video_ids.add(video_id) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: | ||||
| @@ -392,7 +394,7 @@ class DailymotionUserIE(DailymotionPlaylistIE): | ||||
|  | ||||
|  | ||||
| class DailymotionCloudIE(DailymotionBaseInfoExtractor): | ||||
|     _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/' | ||||
|     _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/' | ||||
|     _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX | ||||
|     _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX | ||||
|  | ||||
|   | ||||
| @@ -38,6 +38,12 @@ class DBTVIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
|         return [url for _, url in re.findall( | ||||
|             r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', | ||||
|             webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from ..compat import compat_str | ||||
|  | ||||
|  | ||||
| class DctpTvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -13,7 +13,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class DemocracynowIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)' | ||||
|     IE_NAME = 'democracynow' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.democracynow.org/shows/2015/7/3', | ||||
|   | ||||
							
								
								
									
										116
									
								
								youtube_dl/extractor/discoverygo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								youtube_dl/extractor/discoverygo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     unescapeHTML, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DiscoveryGoIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?: | ||||
|             discovery| | ||||
|             investigationdiscovery| | ||||
|             discoverylife| | ||||
|             animalplanet| | ||||
|             ahctv| | ||||
|             destinationamerica| | ||||
|             sciencechannel| | ||||
|             tlc| | ||||
|             velocitychannel | ||||
|         )go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)''' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', | ||||
|         'info_dict': { | ||||
|             'id': '57a33c536b66d1cd0345eeb1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Kiss First, Ask Questions Later!', | ||||
|             'description': 'md5:fe923ba34050eae468bffae10831cb22', | ||||
|             'duration': 2579, | ||||
|             'series': 'Love at First Kiss', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 1, | ||||
|             'age_limit': 14, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         container = extract_attributes( | ||||
|             self._search_regex( | ||||
|                 r'(<div[^>]+class=["\']video-player-container[^>]+>)', | ||||
|                 webpage, 'video container')) | ||||
|  | ||||
|         video = self._parse_json( | ||||
|             unescapeHTML(container.get('data-video') or container.get('data-json')), | ||||
|             display_id) | ||||
|  | ||||
|         title = video['name'] | ||||
|  | ||||
|         stream = video.get('stream') | ||||
|         if not stream: | ||||
|             if video.get('authenticated') is True: | ||||
|                 raise ExtractorError( | ||||
|                     'This video is only available via cable service provider subscription that' | ||||
|                     ' is not currently supported. You may want to use --cookies.', expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError('Unable to find stream') | ||||
|         STREAM_URL_SUFFIX = 'streamUrl' | ||||
|         formats = [] | ||||
|         for stream_kind in ('', 'hds'): | ||||
|             suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX | ||||
|             stream_url = stream.get('%s%s' % (stream_kind, suffix)) | ||||
|             if not stream_url: | ||||
|                 continue | ||||
|             if stream_kind == '': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     stream_url, display_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif stream_kind == 'hds': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     stream_url, display_id, f4m_id=stream_kind, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_id = video.get('id') or display_id | ||||
|         description = video.get('description', {}).get('detailed') | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|  | ||||
|         series = video.get('show', {}).get('name') | ||||
|         season_number = int_or_none(video.get('season', {}).get('number')) | ||||
|         episode_number = int_or_none(video.get('episodeNumber')) | ||||
|  | ||||
|         tags = video.get('tags') | ||||
|         age_limit = parse_age_limit(video.get('parental', {}).get('rating')) | ||||
|  | ||||
|         subtitles = {} | ||||
|         captions = stream.get('captions') | ||||
|         if isinstance(captions, list): | ||||
|             for caption in captions: | ||||
|                 subtitle_url = caption.get('fileUrl') | ||||
|                 if (not subtitle_url or not isinstance(subtitle_url, compat_str) or | ||||
|                         not subtitle_url.startswith('http')): | ||||
|                     continue | ||||
|                 lang = caption.get('fileLang', 'en') | ||||
|                 subtitles.setdefault(lang, []).append({'url': subtitle_url}) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'series': series, | ||||
|             'season_number': season_number, | ||||
|             'episode_number': episode_number, | ||||
|             'tags': tags, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
| @@ -10,18 +10,18 @@ from ..utils import ( | ||||
| class DotsubIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|         'md5': '0914d4d69605090f623b7ac329fea66e', | ||||
|         'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', | ||||
|         'md5': '21c7ff600f545358134fea762a6d42b6', | ||||
|         'info_dict': { | ||||
|             'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|             'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', | ||||
|             'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074', | ||||
|             'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'duration': 3169, | ||||
|             'uploader': '4v4l0n42', | ||||
|             'timestamp': 1292248482.625, | ||||
|             'upload_date': '20101213', | ||||
|             'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever', | ||||
|             'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6', | ||||
|             'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p', | ||||
|             'duration': 198, | ||||
|             'uploader': 'liuxt', | ||||
|             'timestamp': 1385778501.104, | ||||
|             'upload_date': '20131130', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -3,9 +3,17 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import time | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import (ExtractorError, unescapeHTML) | ||||
| from ..compat import (compat_str, compat_basestring) | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DouyuTVIE(InfoExtractor): | ||||
| @@ -21,7 +29,6 @@ class DouyuTVIE(InfoExtractor): | ||||
|             'description': 're:.*m7show@163\.com.*', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': '7师傅', | ||||
|             'uploader_id': '431925', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -37,7 +44,6 @@ class DouyuTVIE(InfoExtractor): | ||||
|             'description': 'md5:746a2f7a253966a06755a912f0acc0d2', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': 'douyu小漠', | ||||
|             'uploader_id': '3769985', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -54,7 +60,6 @@ class DouyuTVIE(InfoExtractor): | ||||
|             'description': 're:.*m7show@163\.com.*', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': '7师傅', | ||||
|             'uploader_id': '431925', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -65,6 +70,10 @@ class DouyuTVIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf | ||||
|     # is encrypted originally, but ffdec can dump memory to get the decrypted one. | ||||
|     _API_KEY = 'A12Svb&%1UUmf@hC' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
| @@ -75,74 +84,56 @@ class DouyuTVIE(InfoExtractor): | ||||
|             room_id = self._html_search_regex( | ||||
|                 r'"room_id"\s*:\s*(\d+),', page, 'room id') | ||||
|  | ||||
|         config = None | ||||
|         # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" | ||||
|         # Retry with different parameters - same parameters cause same errors | ||||
|         for i in range(5): | ||||
|             prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( | ||||
|                 room_id, int(time.time())) | ||||
|             auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() | ||||
|         room = self._download_json( | ||||
|             'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, | ||||
|             note='Downloading room info')['data'] | ||||
|  | ||||
|             config_page = self._download_webpage( | ||||
|                 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), | ||||
|                 video_id) | ||||
|             try: | ||||
|                 config = self._parse_json(config_page, video_id, fatal=False) | ||||
|             except ExtractorError: | ||||
|                 # Wait some time before retrying to get a different time() value | ||||
|                 self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' | ||||
|                                                       'Waiting for %(timeout)s seconds before retrying') | ||||
|                 continue | ||||
|             else: | ||||
|                 break | ||||
|         if config is None: | ||||
|             raise ExtractorError('Unable to fetch API result') | ||||
|  | ||||
|         data = config['data'] | ||||
|  | ||||
|         error_code = config.get('error', 0) | ||||
|         if error_code is not 0: | ||||
|             error_desc = 'Server reported error %i' % error_code | ||||
|             if isinstance(data, (compat_str, compat_basestring)): | ||||
|                 error_desc += ': ' + data | ||||
|             raise ExtractorError(error_desc, expected=True) | ||||
|  | ||||
|         show_status = data.get('show_status') | ||||
|         # 1 = live, 2 = offline | ||||
|         if show_status == '2': | ||||
|         if room.get('show_status') == '2': | ||||
|             raise ExtractorError('Live stream is offline', expected=True) | ||||
|  | ||||
|         tt = compat_str(int(time.time() / 60)) | ||||
|         did = uuid.uuid4().hex.upper() | ||||
|  | ||||
|         sign_content = ''.join((room_id, did, self._API_KEY, tt)) | ||||
|         sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() | ||||
|  | ||||
|         flv_data = compat_urllib_parse_urlencode({ | ||||
|             'cdn': 'ws', | ||||
|             'rate': '0', | ||||
|             'tt': tt, | ||||
|             'did': did, | ||||
|             'sign': sign, | ||||
|         }) | ||||
|  | ||||
|         video_info = self._download_json( | ||||
|             'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id, | ||||
|             data=flv_data, note='Downloading video info', | ||||
|             headers={'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|  | ||||
|         error_code = video_info.get('error', 0) | ||||
|         if error_code is not 0: | ||||
|             raise ExtractorError( | ||||
|                 'Live stream is offline', expected=True) | ||||
|                 '%s reported error %i' % (self.IE_NAME, error_code), | ||||
|                 expected=True) | ||||
|  | ||||
|         base_url = data['rtmp_url'] | ||||
|         live_path = data['rtmp_live'] | ||||
|         base_url = video_info['data']['rtmp_url'] | ||||
|         live_path = video_info['data']['rtmp_live'] | ||||
|  | ||||
|         title = self._live_title(unescapeHTML(data['room_name'])) | ||||
|         description = data.get('show_details') | ||||
|         thumbnail = data.get('room_src') | ||||
|         video_url = '%s/%s' % (base_url, live_path) | ||||
|  | ||||
|         uploader = data.get('nickname') | ||||
|         uploader_id = data.get('owner_uid') | ||||
|  | ||||
|         multi_formats = data.get('rtmp_multi_bitrate') | ||||
|         if not isinstance(multi_formats, dict): | ||||
|             multi_formats = {} | ||||
|         multi_formats['live'] = live_path | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': '%s/%s' % (base_url, format_path), | ||||
|             'format_id': format_id, | ||||
|             'preference': 1 if format_id == 'live' else 0, | ||||
|         } for format_id, format_path in multi_formats.items()] | ||||
|         self._sort_formats(formats) | ||||
|         title = self._live_title(unescapeHTML(room['room_name'])) | ||||
|         description = room.get('notice') | ||||
|         thumbnail = room.get('room_src') | ||||
|         uploader = room.get('nickname') | ||||
|  | ||||
|         return { | ||||
|             'id': room_id, | ||||
|             'display_id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'formats': formats, | ||||
|             'is_live': True, | ||||
|         } | ||||
|   | ||||
| @@ -3,7 +3,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import str_to_int | ||||
| from ..utils import ( | ||||
|     NO_DEFAULT, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DrTuberIE(InfoExtractor): | ||||
| @@ -17,7 +20,6 @@ class DrTuberIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'hot perky blonde naked golf', | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
| @@ -36,25 +38,29 @@ class DrTuberIE(InfoExtractor): | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             [r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'], | ||||
|             (r'class="title_watch"[^>]*><p>([^<]+)<', | ||||
|              r'<p[^>]+class="title_substrate">([^<]+)</p>', | ||||
|              r'<title>([^<]+) - \d+'), | ||||
|             webpage, 'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'poster="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         def extract_count(id_, name): | ||||
|         def extract_count(id_, name, default=NO_DEFAULT): | ||||
|             return str_to_int(self._html_search_regex( | ||||
|                 r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_, | ||||
|                 webpage, '%s count' % name, fatal=False)) | ||||
|                 webpage, '%s count' % name, default=default, fatal=False)) | ||||
|  | ||||
|         like_count = extract_count('rate_likes', 'like') | ||||
|         dislike_count = extract_count('rate_dislikes', 'dislike') | ||||
|         dislike_count = extract_count('rate_dislikes', 'dislike', default=None) | ||||
|         comment_count = extract_count('comments_count', 'comment') | ||||
|  | ||||
|         cats_str = self._search_regex( | ||||
|             r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False) | ||||
|         categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str) | ||||
|             r'<div[^>]+class="categories_list">(.+?)</div>', | ||||
|             webpage, 'categories', fatal=False) | ||||
|         categories = [] if not cats_str else re.findall( | ||||
|             r'<a title="([^"]+)"', cats_str) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -4,26 +4,45 @@ from __future__ import unicode_literals | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DRTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', | ||||
|         'md5': 'dc515a9ab50577fa14cc4e4b0265168f', | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', | ||||
|         'md5': '25e659cccc9a2ed956110a299fdf5983', | ||||
|         'info_dict': { | ||||
|             'id': 'panisk-paske-5', | ||||
|             'id': 'klassen-darlig-taber-10', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Panisk Påske (5)', | ||||
|             'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', | ||||
|             'timestamp': 1426984612, | ||||
|             'upload_date': '20150322', | ||||
|             'duration': 1455, | ||||
|             'title': 'Klassen - Dårlig taber (10)', | ||||
|             'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', | ||||
|             'timestamp': 1471991907, | ||||
|             'upload_date': '20160823', | ||||
|             'duration': 606.84, | ||||
|         }, | ||||
|     } | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', | ||||
|         'md5': '2c37175c718155930f939ef59952474a', | ||||
|         'info_dict': { | ||||
|             'id': 'christiania-pusher-street-ryddes-drdkrjpo', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'LIVE Christianias rydning af Pusher Street er i gang', | ||||
|             'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', | ||||
|             'timestamp': 1472800279, | ||||
|             'upload_date': '20160902', | ||||
|             'duration': 131.4, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor): | ||||
|                 'Video %s is not available' % video_id, expected=True) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-(?:material-identifier|episode-slug)="([^"]+)"', | ||||
|             (r'data-(?:material-identifier|episode-slug)="([^"]+)"', | ||||
|                 r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         programcard = self._download_json( | ||||
| @@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor): | ||||
|             video_id, 'Downloading video JSON') | ||||
|         data = programcard['Data'][0] | ||||
|  | ||||
|         title = data['Title'] | ||||
|         description = data['Description'] | ||||
|         timestamp = parse_iso8601(data['CreatedTime']) | ||||
|         title = remove_end(self._og_search_title( | ||||
|             webpage, default=None), ' | TV | DR') or data['Title'] | ||||
|         description = self._og_search_description( | ||||
|             webpage, default=None) or data.get('Description') | ||||
|  | ||||
|         timestamp = parse_iso8601(data.get('CreatedTime')) | ||||
|  | ||||
|         thumbnail = None | ||||
|         duration = None | ||||
| @@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor): | ||||
|         subtitles = {} | ||||
|  | ||||
|         for asset in data['Assets']: | ||||
|             if asset['Kind'] == 'Image': | ||||
|                 thumbnail = asset['Uri'] | ||||
|             elif asset['Kind'] == 'VideoResource': | ||||
|                 duration = asset['DurationInMilliseconds'] / 1000.0 | ||||
|                 restricted_to_denmark = asset['RestrictedToDenmark'] | ||||
|                 spoken_subtitles = asset['Target'] == 'SpokenSubtitles' | ||||
|                 for link in asset['Links']: | ||||
|                     uri = link['Uri'] | ||||
|                     target = link['Target'] | ||||
|                     format_id = target | ||||
|             if asset.get('Kind') == 'Image': | ||||
|                 thumbnail = asset.get('Uri') | ||||
|             elif asset.get('Kind') == 'VideoResource': | ||||
|                 duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) | ||||
|                 restricted_to_denmark = asset.get('RestrictedToDenmark') | ||||
|                 spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' | ||||
|                 for link in asset.get('Links', []): | ||||
|                     uri = link.get('Uri') | ||||
|                     if not uri: | ||||
|                         continue | ||||
|                     target = link.get('Target') | ||||
|                     format_id = target or '' | ||||
|                     preference = None | ||||
|                     if spoken_subtitles: | ||||
|                         preference = -1 | ||||
| @@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor): | ||||
|                             video_id, preference, f4m_id=format_id)) | ||||
|                     elif target == 'HLS': | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             uri, video_id, 'mp4', preference=preference, | ||||
|                             m3u8_id=format_id)) | ||||
|                             uri, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                             preference=preference, m3u8_id=format_id)) | ||||
|                     else: | ||||
|                         bitrate = link.get('Bitrate') | ||||
|                         if bitrate: | ||||
| @@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor): | ||||
|                         formats.append({ | ||||
|                             'url': uri, | ||||
|                             'format_id': format_id, | ||||
|                             'tbr': bitrate, | ||||
|                             'tbr': int_or_none(bitrate), | ||||
|                             'ext': link.get('FileFormat'), | ||||
|                         }) | ||||
|                 subtitles_list = asset.get('SubtitlesList') | ||||
| @@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor): | ||||
|                         'Danish': 'da', | ||||
|                     } | ||||
|                     for subs in subtitles_list: | ||||
|                         lang = subs['Language'] | ||||
|                         subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] | ||||
|                         if not subs.get('Uri'): | ||||
|                             continue | ||||
|                         lang = subs.get('Language') or 'da' | ||||
|                         subtitles.setdefault(LANGS.get(lang, lang), []).append({ | ||||
|                             'url': subs['Uri'], | ||||
|                             'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' | ||||
|                         }) | ||||
|  | ||||
|         if not formats and restricted_to_denmark: | ||||
|             raise ExtractorError( | ||||
|                 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) | ||||
|             self.raise_geo_restricted( | ||||
|                 'Unfortunately, DR is not allowed to show this program outside Denmark.', | ||||
|                 expected=True) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -52,11 +52,24 @@ class EaglePlatformIE(InfoExtractor): | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         # Regular iframe embedding | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return mobj.group('url') | ||||
|         # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) | ||||
|         mobj = re.search( | ||||
|             r'''(?xs) | ||||
|                     <script[^>]+ | ||||
|                         src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) | ||||
|                     .+? | ||||
|                     <div[^>]+ | ||||
|                         class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+ | ||||
|                         data-id=["\'](?P<id>\d+) | ||||
|             ''', webpage) | ||||
|         if mobj is not None: | ||||
|             return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() | ||||
|  | ||||
|     @staticmethod | ||||
|     def _handle_error(response): | ||||
|   | ||||
| @@ -4,9 +4,10 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class EngadgetIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         # video with 5min ID | ||||
|         'url': 'http://www.engadget.com/video/518153925/', | ||||
|         'md5': 'c6820d4828a5064447a4d9fc73f312c9', | ||||
|         'info_dict': { | ||||
| @@ -15,8 +16,12 @@ class EngadgetIE(InfoExtractor): | ||||
|             'title': 'Samsung Galaxy Tab Pro 8.4 Review', | ||||
|         }, | ||||
|         'add_ie': ['FiveMin'], | ||||
|     } | ||||
|     }, { | ||||
|         # video with vidible ID | ||||
|         'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result('5min:%s' % video_id) | ||||
|         return self.url_result('aol-video:%s' % video_id) | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from ..utils import remove_end | ||||
|  | ||||
|  | ||||
| class ESPNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://espn.go.com/video/clip?id=10365079', | ||||
|         'md5': '60e5d097a523e767d06479335d1bdc58', | ||||
| @@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.espn.com/video/clip?id=10365079', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,58 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class ExfmIE(InfoExtractor): | ||||
|     IE_NAME = 'exfm' | ||||
|     IE_DESC = 'ex.fm' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' | ||||
|     _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://ex.fm/song/eh359', | ||||
|             'md5': 'e45513df5631e6d760970b14cc0c11e7', | ||||
|             'info_dict': { | ||||
|                 'id': '44216187', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive', | ||||
|                 'uploader': 'deadjournalist', | ||||
|                 'upload_date': '20120424', | ||||
|                 'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', | ||||
|             }, | ||||
|             'note': 'Soundcloud song', | ||||
|             'skip': 'The site is down too often', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://ex.fm/song/wddt8', | ||||
|             'md5': '966bd70741ac5b8570d8e45bfaed3643', | ||||
|             'info_dict': { | ||||
|                 'id': 'wddt8', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Safe and Sound', | ||||
|                 'uploader': 'Capital Cities', | ||||
|             }, | ||||
|             'skip': 'The site is down too often', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         song_id = mobj.group('id') | ||||
|         info_url = 'http://ex.fm/api/v3/song/%s' % song_id | ||||
|         info = self._download_json(info_url, song_id)['song'] | ||||
|         song_url = info['url'] | ||||
|         if re.match(self._SOUNDCLOUD_URL, song_url) is not None: | ||||
|             self.to_screen('Soundcloud song detected') | ||||
|             return self.url_result(song_url.replace('/stream', ''), 'Soundcloud') | ||||
|         return { | ||||
|             'id': song_id, | ||||
|             'url': song_url, | ||||
|             'ext': 'mp3', | ||||
|             'title': info['title'], | ||||
|             'thumbnail': info['image']['large'], | ||||
|             'uploader': info['artist'], | ||||
|             'view_count': info['loved_count'], | ||||
|         } | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
| @@ -10,25 +8,24 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class ExpoTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' | ||||
|     _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561', | ||||
|         'md5': '2985e6d7a392b2f7a05e0ca350fe41d0', | ||||
|         'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', | ||||
|         'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', | ||||
|         'info_dict': { | ||||
|             'id': '17561', | ||||
|             'id': '667916', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20060212', | ||||
|             'title': 'My Favorite Online Scrapbook Store', | ||||
|             'view_count': int, | ||||
|             'description': 'You\'ll find most everything you need at this virtual store front.', | ||||
|             'uploader': 'Anna T.', | ||||
|             'title': 'NYX Butter Lipstick Little Susie', | ||||
|             'description': 'Goes on like butter, but looks better!', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Stephanie S.', | ||||
|             'upload_date': '20150520', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_key = self._search_regex( | ||||
| @@ -66,7 +63,7 @@ class ExpoTVIE(InfoExtractor): | ||||
|             fatal=False) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date', | ||||
|             fatal=False)) | ||||
|             fatal=False), day_first=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,12 +1,18 @@ | ||||
| # flake8: noqa | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .abc import ABCIE | ||||
| from .abc7news import Abc7NewsIE | ||||
| from .abc import ( | ||||
|     ABCIE, | ||||
|     ABCIViewIE, | ||||
| ) | ||||
| from .abcnews import ( | ||||
|     AbcNewsIE, | ||||
|     AbcNewsVideoIE, | ||||
| ) | ||||
| from .abcotvs import ( | ||||
|     ABCOTVSIE, | ||||
|     ABCOTVSClipsIE, | ||||
| ) | ||||
| from .academicearth import AcademicEarthCourseIE | ||||
| from .acast import ( | ||||
|     ACastIE, | ||||
| @@ -29,6 +35,7 @@ from .aftonbladet import AftonbladetIE | ||||
| from .airmozilla import AirMozillaIE | ||||
| from .aljazeera import AlJazeeraIE | ||||
| from .alphaporno import AlphaPornoIE | ||||
| from .amcnetworks import AMCNetworksIE | ||||
| from .animeondemand import AnimeOnDemandIE | ||||
| from .anitube import AnitubeIE | ||||
| from .anysex import AnySexIE | ||||
| @@ -67,6 +74,12 @@ from .atttechchannel import ATTTechChannelIE | ||||
| from .audimedia import AudiMediaIE | ||||
| from .audioboom import AudioBoomIE | ||||
| from .audiomack import AudiomackIE, AudiomackAlbumIE | ||||
| from .awaan import ( | ||||
|     AWAANIE, | ||||
|     AWAANVideoIE, | ||||
|     AWAANLiveIE, | ||||
|     AWAANSeasonIE, | ||||
| ) | ||||
| from .azubu import AzubuIE, AzubuLiveIE | ||||
| from .baidu import BaiduVideoIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| @@ -80,6 +93,7 @@ from .bbc import ( | ||||
| ) | ||||
| from .beeg import BeegIE | ||||
| from .behindkink import BehindKinkIE | ||||
| from .bellmedia import BellMediaIE | ||||
| from .beatportpro import BeatportProIE | ||||
| from .bet import BetIE | ||||
| from .bigflix import BigflixIE | ||||
| @@ -117,9 +131,12 @@ from .carambatv import ( | ||||
|     CarambaTVIE, | ||||
|     CarambaTVPageIE, | ||||
| ) | ||||
| from .cartoonnetwork import CartoonNetworkIE | ||||
| from .cbc import ( | ||||
|     CBCIE, | ||||
|     CBCPlayerIE, | ||||
|     CBCWatchVideoIE, | ||||
|     CBCWatchIE, | ||||
| ) | ||||
| from .cbs import CBSIE | ||||
| from .cbslocal import CBSLocalIE | ||||
| @@ -130,9 +147,11 @@ from .cbsnews import ( | ||||
| ) | ||||
| from .cbssports import CBSSportsIE | ||||
| from .ccc import CCCIE | ||||
| from .cctv import CCTVIE | ||||
| from .cda import CDAIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .charlierose import CharlieRoseIE | ||||
| from .chaturbate import ChaturbateIE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .chirbit import ( | ||||
| @@ -159,6 +178,7 @@ from .coub import CoubIE | ||||
| from .collegerama import CollegeRamaIE | ||||
| from .comedycentral import ( | ||||
|     ComedyCentralIE, | ||||
|     ComedyCentralShortnameIE, | ||||
|     ComedyCentralTVIE, | ||||
|     ToshIE, | ||||
| ) | ||||
| @@ -176,9 +196,12 @@ from .crunchyroll import ( | ||||
| ) | ||||
| from .cspan import CSpanIE | ||||
| from .ctsnews import CtsNewsIE | ||||
| from .ctv import CTVIE | ||||
| from .ctvnews import CTVNewsIE | ||||
| from .cultureunplugged import CultureUnpluggedIE | ||||
| from .curiositystream import ( | ||||
|     CuriosityStreamIE, | ||||
|     CuriosityStreamCollectionIE, | ||||
| ) | ||||
| from .cwtv import CWTVIE | ||||
| from .dailymail import DailyMailIE | ||||
| from .dailymotion import ( | ||||
| @@ -194,12 +217,6 @@ from .daum import ( | ||||
|     DaumUserIE, | ||||
| ) | ||||
| from .dbtv import DBTVIE | ||||
| from .dcn import ( | ||||
|     DCNIE, | ||||
|     DCNVideoIE, | ||||
|     DCNLiveIE, | ||||
|     DCNSeasonIE, | ||||
| ) | ||||
| from .dctp import DctpTvIE | ||||
| from .deezer import DeezerPlaylistIE | ||||
| from .democracynow import DemocracynowIE | ||||
| @@ -220,6 +237,7 @@ from .dvtv import DVTVIE | ||||
| from .dumpert import DumpertIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| from .discovery import DiscoveryIE | ||||
| from .discoverygo import DiscoveryGoIE | ||||
| from .dispeak import DigitallySpeakingIE | ||||
| from .dropbox import DropboxIE | ||||
| from .dw import ( | ||||
| @@ -247,13 +265,18 @@ from .espn import ESPNIE | ||||
| from .esri import EsriVideoIE | ||||
| from .europa import EuropaIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| from .exfm import ExfmIE | ||||
| from .expotv import ExpoTVIE | ||||
| from .extremetube import ExtremeTubeIE | ||||
| from .eyedotv import EyedoTVIE | ||||
| from .facebook import FacebookIE | ||||
| from .facebook import ( | ||||
|     FacebookIE, | ||||
|     FacebookPluginsVideoIE, | ||||
| ) | ||||
| from .faz import FazIE | ||||
| from .fc2 import FC2IE | ||||
| from .fc2 import ( | ||||
|     FC2IE, | ||||
|     FC2EmbedIE, | ||||
| ) | ||||
| from .fczenit import FczenitIE | ||||
| from .firstpost import FirstpostIE | ||||
| from .firsttv import FirstTVIE | ||||
| @@ -268,12 +291,13 @@ from .formula1 import Formula1IE | ||||
| from .fourtube import FourTubeIE | ||||
| from .fox import FOXIE | ||||
| from .foxgay import FoxgayIE | ||||
| from .foxnews import FoxNewsIE | ||||
| from .foxsports import FoxSportsIE | ||||
| from .franceculture import ( | ||||
|     FranceCultureIE, | ||||
|     FranceCultureEmissionIE, | ||||
| from .foxnews import ( | ||||
|     FoxNewsIE, | ||||
|     FoxNewsArticleIE, | ||||
|     FoxNewsInsiderIE, | ||||
| ) | ||||
| from .foxsports import FoxSportsIE | ||||
| from .franceculture import FranceCultureIE | ||||
| from .franceinter import FranceInterIE | ||||
| from .francetv import ( | ||||
|     PluzzIE, | ||||
| @@ -288,8 +312,8 @@ from .freevideo import FreeVideoIE | ||||
| from .funimation import FunimationIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .fusion import FusionIE | ||||
| from .fxnetworks import FXNetworksIE | ||||
| from .gameinformer import GameInformerIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gameone import ( | ||||
|     GameOneIE, | ||||
|     GameOnePlaylistIE, | ||||
| @@ -308,9 +332,9 @@ from .globo import ( | ||||
|     GloboIE, | ||||
|     GloboArticleIE, | ||||
| ) | ||||
| from .go import GoIE | ||||
| from .godtube import GodTubeIE | ||||
| from .godtv import GodTVIE | ||||
| from .goldenmoustache import GoldenMoustacheIE | ||||
| from .golem import GolemIE | ||||
| from .googledrive import GoogleDriveIE | ||||
| from .googleplus import GooglePlusIE | ||||
| @@ -325,6 +349,10 @@ from .heise import HeiseIE | ||||
| from .hellporno import HellPornoIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .hgtv import ( | ||||
|     HGTVIE, | ||||
|     HGTVComShowIE, | ||||
| ) | ||||
| from .historicfilms import HistoricFilmsIE | ||||
| from .hitbox import HitboxIE, HitboxLiveIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| @@ -368,6 +396,7 @@ from .ivi import ( | ||||
|     IviCompilationIE | ||||
| ) | ||||
| from .ivideon import IvideonIE | ||||
| from .iwara import IwaraIE | ||||
| from .izlesene import IzleseneIE | ||||
| from .jeuxvideo import JeuxVideoIE | ||||
| from .jove import JoveIE | ||||
| @@ -380,6 +409,7 @@ from .kankan import KankanIE | ||||
| from .karaoketv import KaraoketvIE | ||||
| from .karrierevideos import KarriereVideosIE | ||||
| from .keezmovies import KeezMoviesIE | ||||
| from .ketnet import KetnetIE | ||||
| from .khanacademy import KhanAcademyIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| @@ -398,6 +428,7 @@ from .kuwo import ( | ||||
| ) | ||||
| from .la7 import LA7IE | ||||
| from .laola1tv import Laola1TvIE | ||||
| from .lci import LCIIE | ||||
| from .lcp import ( | ||||
|     LcpPlayIE, | ||||
|     LcpIE, | ||||
| @@ -448,6 +479,7 @@ from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mgoon import MgoonIE | ||||
| from .mgtv import MGTVIE | ||||
| from .miaopai import MiaoPaiIE | ||||
| from .microsoftvirtualacademy import ( | ||||
|     MicrosoftVirtualAcademyIE, | ||||
|     MicrosoftVirtualAcademyCourseIE, | ||||
| @@ -476,6 +508,7 @@ from .motherless import MotherlessIE | ||||
| from .motorsport import MotorsportIE | ||||
| from .movieclips import MovieClipsIE | ||||
| from .moviezine import MoviezineIE | ||||
| from .movingimage import MovingImageIE | ||||
| from .msn import MSNIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
| @@ -491,8 +524,9 @@ from .myvi import MyviIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .nationalgeographic import ( | ||||
|     NationalGeographicVideoIE, | ||||
|     NationalGeographicIE, | ||||
|     NationalGeographicChannelIE, | ||||
|     NationalGeographicEpisodeGuideIE, | ||||
| ) | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| @@ -500,6 +534,7 @@ from .nbc import ( | ||||
|     CSNNEIE, | ||||
|     NBCIE, | ||||
|     NBCNewsIE, | ||||
|     NBCOlympicsIE, | ||||
|     NBCSportsIE, | ||||
|     NBCSportsVPlayerIE, | ||||
| ) | ||||
| @@ -531,6 +566,7 @@ from .nextmedia import ( | ||||
| ) | ||||
| from .nfb import NFBIE | ||||
| from .nfl import NFLIE | ||||
| from .nhk import NhkVodIE | ||||
| from .nhl import ( | ||||
|     NHLVideocenterIE, | ||||
|     NHLNewsIE, | ||||
| @@ -542,7 +578,10 @@ from .nick import ( | ||||
|     NickDeIE, | ||||
| ) | ||||
| from .niconico import NiconicoIE, NiconicoPlaylistIE | ||||
| from .ninecninemedia import NineCNineMediaIE | ||||
| from .ninecninemedia import ( | ||||
|     NineCNineMediaStackIE, | ||||
|     NineCNineMediaIE, | ||||
| ) | ||||
| from .ninegag import NineGagIE | ||||
| from .ninenow import NineNowIE | ||||
| from .nintendo import NintendoIE | ||||
| @@ -624,7 +663,6 @@ from .phoenix import PhoenixIE | ||||
| from .photobucket import PhotobucketIE | ||||
| from .pinkbike import PinkbikeIE | ||||
| from .pladform import PladformIE | ||||
| from .played import PlayedIE | ||||
| from .playfm import PlayFMIE | ||||
| from .plays import PlaysTVIE | ||||
| from .playtvak import PlaytvakIE | ||||
| @@ -635,8 +673,13 @@ from .pluralsight import ( | ||||
|     PluralsightCourseIE, | ||||
| ) | ||||
| from .podomatic import PodomaticIE | ||||
| from .polskieradio import PolskieRadioIE | ||||
| from .pokemon import PokemonIE | ||||
| from .polskieradio import ( | ||||
|     PolskieRadioIE, | ||||
|     PolskieRadioCategoryIE, | ||||
| ) | ||||
| from .porn91 import Porn91IE | ||||
| from .porncom import PornComIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import ( | ||||
|     PornHubIE, | ||||
| @@ -688,11 +731,13 @@ from .revision3 import ( | ||||
| ) | ||||
| from .rice import RICEIE | ||||
| from .ringtv import RingTVIE | ||||
| from .rmcdecouverte import RMCDecouverteIE | ||||
| from .ro220 import Ro220IE | ||||
| from .rockstargames import RockstarGamesIE | ||||
| from .roosterteeth import RoosterTeethIE | ||||
| from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rozhlas import RozhlasIE | ||||
| from .rtbf import RTBFIE | ||||
| from .rte import RteIE, RteRadioIE | ||||
| from .rtlnl import RtlNlIE | ||||
| @@ -752,6 +797,7 @@ from .smotri import ( | ||||
| ) | ||||
| from .snotr import SnotrIE | ||||
| from .sohu import SohuIE | ||||
| from .sonyliv import SonyLIVIE | ||||
| from .soundcloud import ( | ||||
|     SoundcloudIE, | ||||
|     SoundcloudSetIE, | ||||
| @@ -788,7 +834,6 @@ from .srgssr import ( | ||||
|     SRGSSRPlayIE, | ||||
| ) | ||||
| from .srmediathek import SRMediathekIE | ||||
| from .ssa import SSAIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .steam import SteamIE | ||||
| from .streamable import StreamableIE | ||||
| @@ -807,8 +852,8 @@ from .tagesschau import ( | ||||
|     TagesschauPlayerIE, | ||||
|     TagesschauIE, | ||||
| ) | ||||
| from .tapely import TapelyIE | ||||
| from .tass import TassIE | ||||
| from .tbs import TBSIE | ||||
| from .tdslifeway import TDSLifewayIE | ||||
| from .teachertube import ( | ||||
|     TeacherTubeIE, | ||||
| @@ -823,10 +868,12 @@ from .telebruxelles import TeleBruxellesIE | ||||
| from .telecinco import TelecincoIE | ||||
| from .telegraaf import TelegraafIE | ||||
| from .telemb import TeleMBIE | ||||
| from .telequebec import TeleQuebecIE | ||||
| from .teletask import TeleTaskIE | ||||
| from .telewebion import TelewebionIE | ||||
| from .testurl import TestURLIE | ||||
| from .tf1 import TF1IE | ||||
| from .tfo import TFOIE | ||||
| from .theintercept import TheInterceptIE | ||||
| from .theplatform import ( | ||||
|     ThePlatformIE, | ||||
| @@ -851,16 +898,11 @@ from .tnaflix import ( | ||||
|     MovieFapIE, | ||||
| ) | ||||
| from .toggle import ToggleIE | ||||
| from .thvideo import ( | ||||
|     THVideoIE, | ||||
|     THVideoPlaylistIE | ||||
| ) | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| from .trilulilu import TriluliluIE | ||||
| from .trollvids import TrollvidsIE | ||||
| from .trutube import TruTubeIE | ||||
| from .trutv import TruTVIE | ||||
| from .tube8 import Tube8IE | ||||
| from .tubitv import TubiTvIE | ||||
| from .tudou import ( | ||||
| @@ -890,11 +932,16 @@ from .tvc import ( | ||||
| ) | ||||
| from .tvigle import TvigleIE | ||||
| from .tvland import TVLandIE | ||||
| from .tvnoe import TVNoeIE | ||||
| from .tvp import ( | ||||
|     TVPEmbedIE, | ||||
|     TVPIE, | ||||
|     TVPSeriesIE, | ||||
| ) | ||||
| from .tvplay import TVPlayIE | ||||
| from .tvplay import ( | ||||
|     TVPlayIE, | ||||
|     ViafreeIE, | ||||
| ) | ||||
| from .tweakers import TweakersIE | ||||
| from .twentyfourvideo import TwentyFourVideoIE | ||||
| from .twentymin import TwentyMinutenIE | ||||
| @@ -923,8 +970,14 @@ from .udemy import ( | ||||
| from .udn import UDNEmbedIE | ||||
| from .digiteka import DigitekaIE | ||||
| from .unistra import UnistraIE | ||||
| from .uol import UOLIE | ||||
| from .uplynk import ( | ||||
|     UplynkIE, | ||||
|     UplynkPreplayIE, | ||||
| ) | ||||
| from .urort import UrortIE | ||||
| from .urplay import URPlayIE | ||||
| from .usanetwork import USANetworkIE | ||||
| from .usatoday import USATodayIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .ustudio import ( | ||||
| @@ -951,6 +1004,7 @@ from .vice import ( | ||||
|     ViceIE, | ||||
|     ViceShowIE, | ||||
| ) | ||||
| from .viceland import VicelandIE | ||||
| from .vidbit import VidbitIE | ||||
| from .viddler import ViddlerIE | ||||
| from .videodetective import VideoDetectiveIE | ||||
| @@ -1004,6 +1058,7 @@ from .vk import ( | ||||
| ) | ||||
| from .vlive import VLiveIE | ||||
| from .vodlocker import VodlockerIE | ||||
| from .vodplatform import VODPlatformIE | ||||
| from .voicerepublic import VoiceRepublicIE | ||||
| from .voxmedia import VoxMediaIE | ||||
| from .vporn import VpornIE | ||||
| @@ -1096,8 +1151,4 @@ from .youtube import ( | ||||
| ) | ||||
| from .zapiks import ZapiksIE | ||||
| from .zdf import ZDFIE, ZDFChannelIE | ||||
| from .zingmp3 import ( | ||||
|     ZingMp3SongIE, | ||||
|     ZingMp3AlbumIE, | ||||
| ) | ||||
| from .zippcast import ZippCastIE | ||||
| from .zingmp3 import ZingMp3IE | ||||
|   | ||||
| @@ -1,20 +1,14 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     sanitized_Request, | ||||
|     str_to_int, | ||||
| ) | ||||
| from ..utils import str_to_int | ||||
| from .keezmovies import KeezMoviesIE | ||||
|  | ||||
|  | ||||
| class ExtremeTubeIE(InfoExtractor): | ||||
| class ExtremeTubeIE(KeezMoviesIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', | ||||
|         'md5': '344d0c6d50e2f16b06e49ca011d8ac69', | ||||
|         'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', | ||||
|         'info_dict': { | ||||
|             'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', | ||||
|             'ext': 'mp4', | ||||
| @@ -35,58 +29,22 @@ class ExtremeTubeIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage, info = self._extract_info(url) | ||||
|  | ||||
|         req = sanitized_Request(url) | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|         if not info['title']: | ||||
|             info['title'] = self._search_regex( | ||||
|                 r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title') | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title') | ||||
|         uploader = self._html_search_regex( | ||||
|             r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         flash_vars = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'), | ||||
|             video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for quality_key, video_url in flash_vars.items(): | ||||
|             height = int_or_none(self._search_regex( | ||||
|                 r'quality_(\d+)[pP]$', quality_key, 'height', default=None)) | ||||
|             if not height: | ||||
|                 continue | ||||
|             f = { | ||||
|                 'url': video_url, | ||||
|             } | ||||
|             mobj = re.search( | ||||
|                 r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url) | ||||
|             if mobj: | ||||
|                 height = int(mobj.group('height')) | ||||
|                 bitrate = int(mobj.group('bitrate')) | ||||
|                 f.update({ | ||||
|                     'format_id': '%dp-%dk' % (height, bitrate), | ||||
|                     'height': height, | ||||
|                     'tbr': bitrate, | ||||
|                 }) | ||||
|             else: | ||||
|                 f.update({ | ||||
|                     'format_id': '%dp' % height, | ||||
|                     'height': height, | ||||
|                 }) | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': formats, | ||||
|         info.update({ | ||||
|             'uploader': uploader, | ||||
|             'view_count': view_count, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -15,6 +15,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     error_to_compat_str, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     limit_length, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| @@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', | ||||
|             'uploader': 'Tennis on Facebook', | ||||
|             'upload_date': '20140908', | ||||
|             'timestamp': 1410199200, | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Video without discernible title', | ||||
| @@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Facebook video #274175099429670', | ||||
|             'uploader': 'Asif Nawab Butt', | ||||
|             'upload_date': '20140506', | ||||
|             'timestamp': 1399398998, | ||||
|         }, | ||||
|         'expected_warnings': [ | ||||
|             'title' | ||||
| @@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor): | ||||
|     }, { | ||||
|         'note': 'Video with DASH manifest', | ||||
|         'url': 'https://www.facebook.com/video.php?v=957955867617029', | ||||
|         'md5': '54706e4db4f5ad58fbad82dde1f1213f', | ||||
|         'md5': 'b2c28d528273b323abe5c6ab59f0f030', | ||||
|         'info_dict': { | ||||
|             'id': '957955867617029', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', | ||||
|             'uploader': 'Demy de Zeeuw', | ||||
|             'upload_date': '20160110', | ||||
|             'timestamp': 1452431627, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', | ||||
| @@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor): | ||||
|         if not video_title: | ||||
|             video_title = 'Facebook video #%s' % video_id | ||||
|         uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'<abbr[^>]+data-utime=["\'](\d+)', webpage, | ||||
|             'timestamp', default=None)) | ||||
|  | ||||
|         info_dict = { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'formats': formats, | ||||
|             'uploader': uploader, | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
|  | ||||
|         return webpage, info_dict | ||||
| @@ -340,3 +351,32 @@ class FacebookIE(InfoExtractor): | ||||
|                 self._VIDEO_PAGE_TEMPLATE % video_id, | ||||
|                 video_id, fatal_if_no_video=True) | ||||
|             return info_dict | ||||
|  | ||||
|  | ||||
| class FacebookPluginsVideoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', | ||||
|         'md5': '5954e92cdfe51fe5782ae9bda7058a07', | ||||
|         'info_dict': { | ||||
|             'id': '10154383743583686', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'What to do during the haze?', | ||||
|             'uploader': 'Gov.sg', | ||||
|             'upload_date': '20160826', | ||||
|             'timestamp': 1472184808, | ||||
|         }, | ||||
|         'add_ie': [FacebookIE.ie_key()], | ||||
|     }, { | ||||
|         'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         return self.url_result( | ||||
|             compat_urllib_parse_unquote(self._match_id(url)), | ||||
|             FacebookIE.ie_key()) | ||||
|   | ||||
| @@ -1,10 +1,12 @@ | ||||
| #! -*- coding: utf-8 -*- | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| @@ -16,7 +18,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class FC2IE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)' | ||||
|     IE_NAME = 'fc2' | ||||
|     _NETRC_MACHINE = 'fc2' | ||||
|     _TESTS = [{ | ||||
| @@ -75,10 +77,15 @@ class FC2IE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         self._login() | ||||
|         webpage = None | ||||
|         if not url.startswith('fc2:'): | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             self._downloader.cookiejar.clear_session_cookies()  # must clear | ||||
|             self._login() | ||||
|  | ||||
|         title = 'FC2 video %s' % video_id | ||||
|         thumbnail = None | ||||
|         if webpage is not None: | ||||
|             title = self._og_search_title(webpage) | ||||
|             thumbnail = self._og_search_thumbnail(webpage) | ||||
|         refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url | ||||
| @@ -113,3 +120,41 @@ class FC2IE(InfoExtractor): | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class FC2EmbedIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)' | ||||
|     IE_NAME = 'fc2:embed' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】', | ||||
|         'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a', | ||||
|         'info_dict': { | ||||
|             'id': '201403223kCqB3Ez', | ||||
|             'ext': 'flv', | ||||
|             'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         query = compat_parse_qs(mobj.group('query')) | ||||
|  | ||||
|         video_id = query['i'][-1] | ||||
|         title = query.get('tl', ['FC2 video %s' % video_id])[0] | ||||
|  | ||||
|         sj = query.get('sj', [None])[0] | ||||
|         thumbnail = None | ||||
|         if sj: | ||||
|             # See thumbnailImagePath() in ServerConst.as of flv2.swf | ||||
|             thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( | ||||
|                 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': FC2IE.ie_key(), | ||||
|             'url': 'fc2:%s' % video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -2,44 +2,40 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_xpath | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
|     xpath_attr, | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FirstTVIE(InfoExtractor): | ||||
|     IE_NAME = '1tv' | ||||
|     IE_DESC = 'Первый канал' | ||||
|     _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # single format via video_materials.json API | ||||
|         'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', | ||||
|         'md5': '82a2777648acae812d58b3f5bd42882b', | ||||
|         # single format | ||||
|         'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', | ||||
|         'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', | ||||
|         'info_dict': { | ||||
|             'id': '35930', | ||||
|             'id': '40049', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', | ||||
|             'description': 'md5:357933adeede13b202c7c21f91b871b2', | ||||
|             'description': 'md5:36a39c1d19618fec57d12efe212a8370', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'upload_date': '20150212', | ||||
|             'duration': 2694, | ||||
|         }, | ||||
|     }, { | ||||
|         # multiple formats via video_materials.json API | ||||
|         'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641', | ||||
|         # multiple formats | ||||
|         'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', | ||||
|         'info_dict': { | ||||
|             'id': '113641', | ||||
|             'id': '364746', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', | ||||
|             'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2', | ||||
|             'description': 'md5:a242eea0031fd180a4497d52640a9572', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'upload_date': '20160407', | ||||
|             'duration': 179, | ||||
| @@ -48,76 +44,39 @@ class FirstTVIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API | ||||
|         'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038', | ||||
|         'md5': '519d306c5b5669761fd8906c39dbee23', | ||||
|         'info_dict': { | ||||
|             'id': '47038', | ||||
|             'ext': 'mp4', | ||||
|             'title': '"Побег". Второй сезон. 3 серия', | ||||
|             'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'upload_date': '20120516', | ||||
|             'duration': 3080, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.1tv.ru/videoarchive/9967', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         # Videos with multiple formats only available via this API | ||||
|         video = self._download_json( | ||||
|             'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id, | ||||
|             video_id, fatal=False) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         playlist_url = compat_urlparse.urljoin(url, self._search_regex( | ||||
|             r'data-playlist-url="([^"]+)', webpage, 'playlist url')) | ||||
|  | ||||
|         description, thumbnail, upload_date, duration = [None] * 4 | ||||
|  | ||||
|         if video: | ||||
|             item = video[0] | ||||
|             title = item['title'] | ||||
|         item = self._download_json(playlist_url, display_id)[0] | ||||
|         video_id = item['id'] | ||||
|         quality = qualities(('ld', 'sd', 'hd', )) | ||||
|             formats = [{ | ||||
|                 'url': f['src'], | ||||
|                 'format_id': f.get('name'), | ||||
|                 'quality': quality(f.get('name')), | ||||
|             } for f in item['mbr'] if f.get('src')] | ||||
|             thumbnail = item.get('poster') | ||||
|         else: | ||||
|             # Some videos are not available via video_materials.json | ||||
|             video = self._download_xml( | ||||
|                 'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id, | ||||
|                 video_id) | ||||
|  | ||||
|             NS_MAP = { | ||||
|                 'media': 'http://search.yahoo.com/mrss/', | ||||
|             } | ||||
|  | ||||
|             item = xpath_element(video, './channel/item', fatal=True) | ||||
|             title = xpath_text(item, './title', fatal=True) | ||||
|             formats = [{ | ||||
|                 'url': content.attrib['url'], | ||||
|             } for content in item.findall( | ||||
|                 compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')] | ||||
|             thumbnail = xpath_attr( | ||||
|                 item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url') | ||||
|  | ||||
|         formats = [] | ||||
|         for f in item.get('mbr', []): | ||||
|             src = f.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             fname = f.get('name') | ||||
|             formats.append({ | ||||
|                 'url': src, | ||||
|                 'format_id': fname, | ||||
|                 'quality': quality(fname), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False) | ||||
|         if webpage: | ||||
|         title = self._html_search_regex( | ||||
|             (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', | ||||
|              r"'title'\s*:\s*'([^']+)'"), | ||||
|                 webpage, 'title', default=None) or title | ||||
|             webpage, 'title', default=None) or item['title'] | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', | ||||
|             webpage, 'description', default=None) or self._html_search_meta( | ||||
|             'description', webpage, 'description') | ||||
|             thumbnail = thumbnail or self._og_search_thumbnail(webpage) | ||||
|         duration = int_or_none(self._html_search_meta( | ||||
|             'video:duration', webpage, 'video duration', fatal=False)) | ||||
|         upload_date = unified_strdate(self._html_search_meta( | ||||
| @@ -125,7 +84,7 @@ class FirstTVIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'thumbnail': thumbnail, | ||||
|             'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'upload_date': upload_date, | ||||
|   | ||||
| @@ -1,24 +1,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_duration, | ||||
|     replace_extension, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FiveMinIE(InfoExtractor): | ||||
|     IE_NAME = '5min' | ||||
|     _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))' | ||||
|     _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -29,8 +16,16 @@ class FiveMinIE(InfoExtractor): | ||||
|                 'id': '518013791', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'iPad Mini with Retina Display Review', | ||||
|                 'description': 'iPad mini with Retina Display review', | ||||
|                 'duration': 177, | ||||
|                 'uploader': 'engadget', | ||||
|                 'upload_date': '20131115', | ||||
|                 'timestamp': 1384515288, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247 | ||||
| @@ -44,108 +39,16 @@ class FiveMinIE(InfoExtractor): | ||||
|             }, | ||||
|             'skip': 'no longer available', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://embed.5min.com/518726732/', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://delivery.vidible.tv/aol?playList=518013791', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|     _ERRORS = { | ||||
|         'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.', | ||||
|         'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.', | ||||
|         'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.', | ||||
|         'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.', | ||||
|         'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', | ||||
|         'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', | ||||
|     } | ||||
|     _QUALITIES = { | ||||
|         1: { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         2: { | ||||
|             'width': 854, | ||||
|             'height': 480, | ||||
|         }, | ||||
|         4: { | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         }, | ||||
|         8: { | ||||
|             'width': 1920, | ||||
|             'height': 1080, | ||||
|         }, | ||||
|         16: { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         32: { | ||||
|             'width': 854, | ||||
|             'height': 480, | ||||
|         }, | ||||
|         64: { | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         }, | ||||
|         128: { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         sid = mobj.group('sid') | ||||
|  | ||||
|         if mobj.group('query'): | ||||
|             qs = compat_parse_qs(mobj.group('query')) | ||||
|             if not qs.get('playList'): | ||||
|                 raise ExtractorError('Invalid URL', expected=True) | ||||
|             video_id = qs['playList'][0] | ||||
|             if qs.get('sid'): | ||||
|                 sid = qs['sid'][0] | ||||
|  | ||||
|         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id | ||||
|         if not sid: | ||||
|             embed_page = self._download_webpage(embed_url, video_id, | ||||
|                                                 'Downloading embed page') | ||||
|             sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'https://syn.5min.com/handlers/SenseHandler.ashx?' + | ||||
|             compat_urllib_parse_urlencode({ | ||||
|                 'func': 'GetResults', | ||||
|                 'playlist': video_id, | ||||
|                 'sid': sid, | ||||
|                 'isPlayerSeed': 'true', | ||||
|                 'url': embed_url, | ||||
|             }), | ||||
|             video_id) | ||||
|         if not response['success']: | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % ( | ||||
|                     self.IE_NAME, | ||||
|                     self._ERRORS.get(response['errorMessage'], response['errorMessage'])), | ||||
|                 expected=True) | ||||
|         info = response['binding'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs( | ||||
|             compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0]) | ||||
|         for rendition in info['Renditions']: | ||||
|             if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8': | ||||
|                 continue | ||||
|             else: | ||||
|                 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType']))) | ||||
|                 quality = self._QUALITIES.get(rendition['ID'], {}) | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']), | ||||
|                     'url': rendition_url, | ||||
|                     'width': quality.get('width'), | ||||
|                     'height': quality.get('height'), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['Title'], | ||||
|             'thumbnail': info.get('ThumbURL'), | ||||
|             'duration': parse_duration(info.get('Duration')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result('aol-video:%s' % video_id) | ||||
|   | ||||
| @@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor): | ||||
|         flipagram = video_data['flipagram'] | ||||
|         video = flipagram['video'] | ||||
|  | ||||
|         json_ld = self._search_json_ld(webpage, video_id, default=False) | ||||
|         json_ld = self._search_json_ld(webpage, video_id, default={}) | ||||
|         title = json_ld.get('title') or flipagram['captionText'] | ||||
|         description = json_ld.get('description') or flipagram.get('captionText') | ||||
|  | ||||
|   | ||||
| @@ -5,8 +5,8 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Formula1IE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', | ||||
|         'md5': '8c79e54be72078b26b89e0e111c0502b', | ||||
|         'info_dict': { | ||||
| @@ -15,7 +15,10 @@ class Formula1IE(InfoExtractor): | ||||
|             'title': 'Race highlights - Spain 2016', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|   | ||||
| @@ -43,14 +43,14 @@ class FourTubeIE(InfoExtractor): | ||||
|             'uploadDate', webpage)) | ||||
|         thumbnail = self._html_search_meta('thumbnailUrl', webpage) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">', | ||||
|             r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">', | ||||
|             webpage, 'uploader id', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">', | ||||
|             r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         categories_html = self._search_regex( | ||||
|             r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>', | ||||
|             r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>', | ||||
|             webpage, 'categories', fatal=False) | ||||
|         categories = None | ||||
|         if categories_html: | ||||
| @@ -59,10 +59,10 @@ class FourTubeIE(InfoExtractor): | ||||
|                     r'(?s)<li><a.*?>(.*?)</a>', categories_html)] | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">', | ||||
|             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         like_count = str_to_int(self._search_regex( | ||||
|             r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">', | ||||
|             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', | ||||
|             webpage, 'like count', fatal=False)) | ||||
|         duration = parse_duration(self._html_search_meta('duration', webpage)) | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FOXIE(InfoExtractor): | ||||
| @@ -29,11 +32,12 @@ class FOXIE(InfoExtractor): | ||||
|  | ||||
|         release_url = self._parse_json(self._search_regex( | ||||
|             r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), | ||||
|             video_id)['release_url'] + '&switch=http' | ||||
|             video_id)['release_url'] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'ThePlatform', | ||||
|             'url': smuggle_url(release_url, {'force_smil_url': True}), | ||||
|             'url': smuggle_url(update_url_query( | ||||
|                 release_url, {'switch': 'http'}), {'force_smil_url': True}), | ||||
|             'id': video_id, | ||||
|         } | ||||
|   | ||||
| @@ -1,18 +1,24 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FoxgayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' | ||||
|     _TEST = { | ||||
|         'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', | ||||
|         'md5': '80d72beab5d04e1655a56ad37afe6841', | ||||
|         'md5': '344558ccfea74d33b7adbce22e577f54', | ||||
|         'info_dict': { | ||||
|             'id': '2582', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a', | ||||
|             'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf', | ||||
|             'title': 'Fuck Turkish-style', | ||||
|             'description': 'md5:6ae2d9486921891efe89231ace13ffdf', | ||||
|             'age_limit': 18, | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|         }, | ||||
| @@ -22,27 +28,35 @@ class FoxgayIE(InfoExtractor): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(?P<title>.*?)</title>', | ||||
|             webpage, 'title', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         title = remove_end(self._html_search_regex( | ||||
|             r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com') | ||||
|         description = get_element_by_id('inf_tit', webpage) | ||||
|  | ||||
|         # The default user-agent with foxgay cookies leads to pages without videos | ||||
|         self._downloader.cookiejar.clear('.foxgay.com') | ||||
|         # Find the URL for the iFrame which contains the actual video. | ||||
|         iframe_url = self._html_search_regex( | ||||
|             r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage, | ||||
|             'video frame', group='url') | ||||
|         iframe = self._download_webpage( | ||||
|             self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'), | ||||
|             video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r"v_path = '(?P<vid>http://.*?)'", iframe, 'url') | ||||
|         thumb_url = self._html_search_regex( | ||||
|             r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False) | ||||
|             iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'}, | ||||
|             note='Downloading video frame') | ||||
|         video_data = self._parse_json(self._search_regex( | ||||
|             r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': source, | ||||
|             'height': resolution, | ||||
|         } for source, resolution in zip( | ||||
|             video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'thumbnail': thumb_url, | ||||
|             'thumbnail': video_data.get('act_vid', {}).get('thumb'), | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|   | ||||
| @@ -3,11 +3,13 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .amp import AMPIE | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FoxNewsIE(AMPIE): | ||||
|     IE_NAME = 'foxnews' | ||||
|     IE_DESC = 'Fox News and Fox Business Video' | ||||
|     _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', | ||||
| @@ -49,6 +51,11 @@ class FoxNewsIE(AMPIE): | ||||
|             'url': 'http://video.foxbusiness.com/v/4442309889001', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words | ||||
|             'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -58,3 +65,76 @@ class FoxNewsIE(AMPIE): | ||||
|             'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) | ||||
|         info['id'] = video_id | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class FoxNewsArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)' | ||||
|     IE_NAME = 'foxnews:article' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', | ||||
|         'md5': '62aa5a781b308fdee212ebb6f33ae7ef', | ||||
|         'info_dict': { | ||||
|             'id': '5116295019001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Trump and Clinton asked to defend positions on Iraq War', | ||||
|             'description': 'Veterans react on \'The Kelly File\'', | ||||
|             'timestamp': 1473299755, | ||||
|             'upload_date': '20160908', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'data-video-id=([\'"])(?P<id>[^\'"]+)\1', | ||||
|             webpage, 'video ID', group='id') | ||||
|         return self.url_result( | ||||
|             'http://video.foxnews.com/v/' + video_id, | ||||
|             FoxNewsIE.ie_key()) | ||||
|  | ||||
|  | ||||
| class FoxNewsInsiderIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)' | ||||
|     IE_NAME = 'foxnews:insider' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', | ||||
|         'md5': 'a10c755e582d28120c62749b4feb4c0c', | ||||
|         'info_dict': { | ||||
|             'id': '5099377331001', | ||||
|             'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', | ||||
|             'description': 'Is campus censorship getting out of control?', | ||||
|             'timestamp': 1472168725, | ||||
|             'upload_date': '20160825', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': [FoxNewsIE.ie_key()], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': FoxNewsIE.ie_key(), | ||||
|             'url': embed_url, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -2,104 +2,56 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceCultureIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceculture.fr/player/reecouter?play=4795174', | ||||
|         'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', | ||||
|         'info_dict': { | ||||
|             'id': '4795174', | ||||
|             'id': 'rendez-vous-au-pays-des-geeks', | ||||
|             'display_id': 'rendez-vous-au-pays-des-geeks', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Rendez-vous au pays des geeks', | ||||
|             'alt_title': 'Carnet nomade | 13-14', | ||||
|             'vcodec': 'none', | ||||
|             'thumbnail': 're:^https?://.*\\.jpg$', | ||||
|             'upload_date': '20140301', | ||||
|             'thumbnail': r're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', | ||||
|             'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche', | ||||
|             'timestamp': 1393700400, | ||||
|             'vcodec': 'none', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _extract_from_player(self, url, video_id): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         video_path = self._search_regex( | ||||
|             r'<a id="player".*?href="([^"]+)"', webpage, 'video path') | ||||
|         video_url = compat_urlparse.urljoin(url, video_path) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'<a id="player".*?data-date="([0-9]+)"', | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"', | ||||
|             webpage, 'video path') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|  | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<', | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'<a id="player".*?>\s+<img src="([^"]+)"', | ||||
|             r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         display_id = self._search_regex( | ||||
|             r'<span class="path-diffusion">emission-(.*?)</span>', webpage, 'display_id') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<span class="title-diffusion">(.*?)</span>', webpage, 'title') | ||||
|         alt_title = self._html_search_regex( | ||||
|             r'<span class="title">(.*?)</span>', | ||||
|             webpage, 'alt_title', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<span class="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', | ||||
|             webpage, 'uploader', default=None) | ||||
|         vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'id': display_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'vcodec': vcodec, | ||||
|             'uploader': uploader, | ||||
|             'timestamp': timestamp, | ||||
|             'title': title, | ||||
|             'alt_title': alt_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|             'display_id': display_id, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_from_player(url, video_id) | ||||
|  | ||||
|  | ||||
| class FranceCultureEmissionIE(FranceCultureIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', | ||||
|         'info_dict': { | ||||
|             'title': 'Jean-Gabriel Périot, cinéaste', | ||||
|             'alt_title': 'Les Carnets de la création', | ||||
|             'id': '5093239', | ||||
|             'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', | ||||
|             'ext': 'mp3', | ||||
|             'timestamp': 1444762500, | ||||
|             'upload_date': '20151013', | ||||
|             'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_path = self._html_search_regex( | ||||
|             r'<a class="rf-player-open".*?href="([^"]+)"', webpage, 'video path', 'no_path_player') | ||||
|         if video_path == 'no_path_player': | ||||
|             raise ExtractorError('no player : no sound in this page.', expected=True) | ||||
|         new_id = self._search_regex('play=(?P<id>[0-9]+)', video_path, 'new_id', group='id') | ||||
|         video_url = compat_urlparse.urljoin(url, video_path) | ||||
|         return self._extract_from_player(video_url, new_id) | ||||
|   | ||||
| @@ -2,20 +2,21 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
| from ..compat import compat_str | ||||
| from ..utils import month_by_name | ||||
|  | ||||
|  | ||||
| class FranceInterIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceinter.fr/player/reecouter?play=793962', | ||||
|         'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', | ||||
|         'md5': '4764932e466e6f6c79c317d2e74f6884', | ||||
|         'info_dict': { | ||||
|             'id': '793962', | ||||
|             'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'L’Histoire dans les jeux vidéo', | ||||
|             'description': 'md5:7e93ddb4451e7530022792240a3049c7', | ||||
|             'timestamp': 1387369800, | ||||
|             'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', | ||||
|             'description': 'md5:7f2ce449894d1e585932273080fb410d', | ||||
|             'upload_date': '20131218', | ||||
|         }, | ||||
|     } | ||||
| @@ -25,23 +26,29 @@ class FranceInterIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         path = self._search_regex( | ||||
|             r'<a id="player".+?href="([^"]+)"', webpage, 'video url') | ||||
|         video_url = 'http://www.franceinter.fr/' + path | ||||
|         video_url = self._search_regex( | ||||
|             r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|             webpage, 'video url', group='url') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<span class="title-diffusion">(.+?)</span>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<span class="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         upload_date_str = self._search_regex( | ||||
|             r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', | ||||
|             webpage, 'upload date', fatal=False) | ||||
|         if upload_date_str: | ||||
|             upload_date_list = upload_date_str.split() | ||||
|             upload_date_list.reverse() | ||||
|             upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) | ||||
|             upload_date = ''.join(upload_date_list) | ||||
|         else: | ||||
|             upload_date = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': [{ | ||||
|                 'url': video_url, | ||||
|                 'vcodec': 'none', | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user