mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-10-29 09:26:20 -07:00
Compare commits
113 Commits
2016.06.04
...
2016.06.11
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
33751818d3 | ||
|
|
698f127c1a | ||
|
|
fe458b6596 | ||
|
|
21ac1a8ac3 | ||
|
|
79027c0ea0 | ||
|
|
4cad2929cd | ||
|
|
62666af99f | ||
|
|
9ddc289f88 | ||
|
|
6626c214e1 | ||
|
|
d845622b2e | ||
|
|
1058f56e96 | ||
|
|
0434358823 | ||
|
|
3841256c2c | ||
|
|
bdf16f8140 | ||
|
|
836ab0c554 | ||
|
|
6c0376fe4f | ||
|
|
1fa309da40 | ||
|
|
daa0df9e8b | ||
|
|
09728d5fbc | ||
|
|
c16f8a4659 | ||
|
|
a225238530 | ||
|
|
55b2f099c0 | ||
|
|
9631a94fb5 | ||
|
|
cc4444662c | ||
|
|
de3eb07ed6 | ||
|
|
5de008e8c3 | ||
|
|
3e74b444e7 | ||
|
|
e1e0a10c56 | ||
|
|
436214baf7 | ||
|
|
506d0e9693 | ||
|
|
55290788d3 | ||
|
|
bc7e7adf51 | ||
|
|
b0aebe702c | ||
|
|
416878f41f | ||
|
|
c0fed3bda5 | ||
|
|
bb1e44cc8e | ||
|
|
21efee5f8b | ||
|
|
e2713d32f4 | ||
|
|
e21c26daf9 | ||
|
|
1594a4932f | ||
|
|
6869d634c6 | ||
|
|
50918c4ee0 | ||
|
|
6c33d24b46 | ||
|
|
be6217b261 | ||
|
|
9d51a0a9a1 | ||
|
|
39da509f67 | ||
|
|
a479b8f687 | ||
|
|
48a5eabc48 | ||
|
|
11380753b5 | ||
|
|
411c590a1f | ||
|
|
6da8d7de69 | ||
|
|
c6308b3153 | ||
|
|
fc0a45fa41 | ||
|
|
e6e90515db | ||
|
|
22a0a95247 | ||
|
|
50ce1c331c | ||
|
|
7264e38591 | ||
|
|
33d9f3707c | ||
|
|
a26a9d6239 | ||
|
|
a4a8201c02 | ||
|
|
a6571f1073 | ||
|
|
57b6e9652e | ||
|
|
3d9b3605a3 | ||
|
|
74193838f7 | ||
|
|
fb94e260b5 | ||
|
|
345dec937f | ||
|
|
4315f74fa8 | ||
|
|
e67f688025 | ||
|
|
db59b37d0b | ||
|
|
244fe977fe | ||
|
|
7b0d1c2859 | ||
|
|
21d0a8e48b | ||
|
|
47f12ad3e3 | ||
|
|
8f1aaa97a1 | ||
|
|
9d78524cbe | ||
|
|
bc270284b5 | ||
|
|
c93b4eaceb | ||
|
|
71b9cb3107 | ||
|
|
633b444fd2 | ||
|
|
51c4d85ce7 | ||
|
|
631d4c87ee | ||
|
|
1e236d7e23 | ||
|
|
2c34735267 | ||
|
|
39b32571df | ||
|
|
db56f281d9 | ||
|
|
e92b552a10 | ||
|
|
1ae6c83bce | ||
|
|
0fc832e1b2 | ||
|
|
7def35712a | ||
|
|
cad88f96dc | ||
|
|
762d44c956 | ||
|
|
4d8856d511 | ||
|
|
c917106be4 | ||
|
|
76e9cd7f24 | ||
|
|
e7d85c4ef7 | ||
|
|
3a686853e1 | ||
|
|
949fc42e00 | ||
|
|
33a1ff7113 | ||
|
|
bec2c14f2c | ||
|
|
37f972954d | ||
|
|
3874e6ea66 | ||
|
|
93fdb14177 | ||
|
|
370d4eb8ad | ||
|
|
3452c3a27c | ||
|
|
81f35fee2f | ||
|
|
0fdbe3146c | ||
|
|
8d93c21466 | ||
|
|
1dbfd78754 | ||
|
|
22e35adefd | ||
|
|
833b644fff | ||
|
|
57cf9b7f06 | ||
|
|
14f7a2b8af | ||
|
|
c0837a12c8 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.06.03
|
[debug] youtube-dl version 2016.06.11.2
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
|||||||
25
README.md
25
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
|||||||
|
|
||||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||||
|
|
||||||
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
If you do not have curl, you can alternatively use a recent wget:
|
If you do not have curl, you can alternatively use a recent wget:
|
||||||
@@ -27,13 +27,19 @@ If you do not have curl, you can alternatively use a recent wget:
|
|||||||
|
|
||||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||||
|
|
||||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
You can also use pip:
|
||||||
|
|
||||||
|
sudo pip install --upgrade youtube-dl
|
||||||
|
|
||||||
|
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||||
|
|
||||||
|
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
|
||||||
|
|
||||||
brew install youtube-dl
|
brew install youtube-dl
|
||||||
|
|
||||||
You can also use pip:
|
Or with [MacPorts](https://www.macports.org/):
|
||||||
|
|
||||||
sudo pip install youtube-dl
|
sudo port install youtube-dl
|
||||||
|
|
||||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||||
|
|
||||||
@@ -249,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--write-info-json Write video metadata to a .info.json file
|
--write-info-json Write video metadata to a .info.json file
|
||||||
--write-annotations Write video annotations to a
|
--write-annotations Write video annotations to a
|
||||||
.annotations.xml file
|
.annotations.xml file
|
||||||
--load-info FILE JSON file containing the video information
|
--load-info-json FILE JSON file containing the video information
|
||||||
(created with the "--write-info-json"
|
(created with the "--write-info-json"
|
||||||
option)
|
option)
|
||||||
--cookies FILE File to read cookies from and dump cookie
|
--cookies FILE File to read cookies from and dump cookie
|
||||||
@@ -505,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||||
- `playlist`: Name or id of the playlist that contains the video
|
- `playlist`: Name or id of the playlist that contains the video
|
||||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||||
|
- `playlist_id`: Playlist identifier
|
||||||
|
- `playlist_title`: Playlist title
|
||||||
|
|
||||||
|
|
||||||
Available for the video that belongs to some logical chapter or section:
|
Available for the video that belongs to some logical chapter or section:
|
||||||
- `chapter`: Name or title of the chapter the video belongs to
|
- `chapter`: Name or title of the chapter the video belongs to
|
||||||
@@ -842,6 +851,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because
|
|||||||
|
|
||||||
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
||||||
|
|
||||||
|
# Why do I need to go through that much red tape when filing bugs?
|
||||||
|
|
||||||
|
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||||
|
|
||||||
|
youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
|
||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import os.path
|
|||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
|
compat_input,
|
||||||
compat_http_server,
|
compat_http_server,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@@ -30,11 +31,6 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import SocketServer as compat_socketserver
|
import SocketServer as compat_socketserver
|
||||||
|
|
||||||
try:
|
|
||||||
compat_input = raw_input
|
|
||||||
except NameError: # Python 3
|
|
||||||
compat_input = input
|
|
||||||
|
|
||||||
|
|
||||||
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
||||||
allow_reuse_address = True
|
allow_reuse_address = True
|
||||||
|
|||||||
111
devscripts/create-github-release.py
Normal file
111
devscripts/create-github-release.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import mimetypes
|
||||||
|
import netrc
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_basestring,
|
||||||
|
compat_input,
|
||||||
|
compat_getpass,
|
||||||
|
compat_print,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
make_HTTPS_handler,
|
||||||
|
sanitized_Request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GitHubReleaser(object):
|
||||||
|
_API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
|
||||||
|
_UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
|
||||||
|
_NETRC_MACHINE = 'github.com'
|
||||||
|
|
||||||
|
def __init__(self, debuglevel=0):
|
||||||
|
self._init_github_account()
|
||||||
|
https_handler = make_HTTPS_handler({}, debuglevel=debuglevel)
|
||||||
|
self._opener = compat_urllib_request.build_opener(https_handler)
|
||||||
|
|
||||||
|
def _init_github_account(self):
|
||||||
|
try:
|
||||||
|
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||||
|
if info is not None:
|
||||||
|
self._username = info[0]
|
||||||
|
self._password = info[2]
|
||||||
|
compat_print('Using GitHub credentials found in .netrc...')
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
compat_print('No GitHub credentials found in .netrc')
|
||||||
|
except (IOError, netrc.NetrcParseError):
|
||||||
|
compat_print('Unable to parse .netrc')
|
||||||
|
self._username = compat_input(
|
||||||
|
'Type your GitHub username or email address and press [Return]: ')
|
||||||
|
self._password = compat_getpass(
|
||||||
|
'Type your GitHub password and press [Return]: ')
|
||||||
|
|
||||||
|
def _call(self, req):
|
||||||
|
if isinstance(req, compat_basestring):
|
||||||
|
req = sanitized_Request(req)
|
||||||
|
# Authorizing manually since GitHub does not response with 401 with
|
||||||
|
# WWW-Authenticate header set (see
|
||||||
|
# https://developer.github.com/v3/#basic-authentication)
|
||||||
|
b64 = base64.b64encode(
|
||||||
|
('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
|
||||||
|
req.add_header('Authorization', 'Basic %s' % b64)
|
||||||
|
response = self._opener.open(req).read().decode('utf-8')
|
||||||
|
return json.loads(response)
|
||||||
|
|
||||||
|
def list_releases(self):
|
||||||
|
return self._call(self._API_URL)
|
||||||
|
|
||||||
|
def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False):
|
||||||
|
data = {
|
||||||
|
'tag_name': tag_name,
|
||||||
|
'target_commitish': 'master',
|
||||||
|
'name': name,
|
||||||
|
'body': body,
|
||||||
|
'draft': draft,
|
||||||
|
'prerelease': prerelease,
|
||||||
|
}
|
||||||
|
req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8'))
|
||||||
|
return self._call(req)
|
||||||
|
|
||||||
|
def create_asset(self, release_id, asset):
|
||||||
|
asset_name = os.path.basename(asset)
|
||||||
|
url = self._UPLOADS_URL % (release_id, asset_name)
|
||||||
|
# Our files are small enough to be loaded directly into memory.
|
||||||
|
data = open(asset, 'rb').read()
|
||||||
|
req = sanitized_Request(url, data)
|
||||||
|
mime_type, _ = mimetypes.guess_type(asset_name)
|
||||||
|
req.add_header('Content-Type', mime_type or 'application/octet-stream')
|
||||||
|
return self._call(req)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
|
||||||
|
options, args = parser.parse_args()
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.error('Expected a version and a build directory')
|
||||||
|
|
||||||
|
version, build_path = args
|
||||||
|
|
||||||
|
releaser = GitHubReleaser()
|
||||||
|
|
||||||
|
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
|
||||||
|
release_id = new_release['id']
|
||||||
|
|
||||||
|
for asset in os.listdir(build_path):
|
||||||
|
compat_print('Uploading %s...' % asset)
|
||||||
|
releaser.create_asset(release_id, os.path.join(build_path, asset))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -95,15 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
|||||||
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
||||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
||||||
|
|
||||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
|
||||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
|
||||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
ROOT=$(pwd)
|
||||||
|
python devscripts/create-github-release.py $version "$ROOT/build/$version"
|
||||||
|
|
||||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||||
|
|
||||||
/bin/echo -e "\n### Now switching to gh-pages..."
|
/bin/echo -e "\n### Now switching to gh-pages..."
|
||||||
git clone --branch gh-pages --single-branch . build/gh-pages
|
git clone --branch gh-pages --single-branch . build/gh-pages
|
||||||
ROOT=$(pwd)
|
|
||||||
(
|
(
|
||||||
set -e
|
set -e
|
||||||
ORIGIN_URL=$(git config --get remote.origin.url)
|
ORIGIN_URL=$(git config --get remote.origin.url)
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
- **AdobeTVVideo**
|
- **AdobeTVVideo**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||||
|
- **AfreecaTV**: afreecatv.com
|
||||||
- **Aftonbladet**
|
- **Aftonbladet**
|
||||||
- **AirMozilla**
|
- **AirMozilla**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
@@ -43,8 +44,8 @@
|
|||||||
- **appletrailers:section**
|
- **appletrailers:section**
|
||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
|
||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
|
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||||
- **arte.tv**
|
- **arte.tv**
|
||||||
- **arte.tv:+7**
|
- **arte.tv:+7**
|
||||||
- **arte.tv:cinema**
|
- **arte.tv:cinema**
|
||||||
@@ -253,6 +254,7 @@
|
|||||||
- **Globo**
|
- **Globo**
|
||||||
- **GloboArticle**
|
- **GloboArticle**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
|
- **GodTV**
|
||||||
- **GoldenMoustache**
|
- **GoldenMoustache**
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
@@ -738,6 +740,7 @@
|
|||||||
- **VideoPremium**
|
- **VideoPremium**
|
||||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
|
- **Vidio**
|
||||||
- **vidme**
|
- **vidme**
|
||||||
- **vidme:user**
|
- **vidme:user**
|
||||||
- **vidme:user:likes**
|
- **vidme:user:likes**
|
||||||
@@ -773,7 +776,6 @@
|
|||||||
- **VRT**
|
- **VRT**
|
||||||
- **vube**: Vube.com
|
- **vube**: Vube.com
|
||||||
- **VuClip**
|
- **VuClip**
|
||||||
- **vulture.com**
|
|
||||||
- **Walla**
|
- **Walla**
|
||||||
- **washingtonpost**
|
- **washingtonpost**
|
||||||
- **washingtonpost:article**
|
- **washingtonpost:article**
|
||||||
@@ -781,10 +783,8 @@
|
|||||||
- **WatchIndianPorn**: Watch Indian Porn
|
- **WatchIndianPorn**: Watch Indian Porn
|
||||||
- **WDR**
|
- **WDR**
|
||||||
- **wdr:mobile**
|
- **wdr:mobile**
|
||||||
- **WDRMaus**: Sendung mit der Maus
|
|
||||||
- **WebOfStories**
|
- **WebOfStories**
|
||||||
- **WebOfStoriesPlaylist**
|
- **WebOfStoriesPlaylist**
|
||||||
- **Weibo**
|
|
||||||
- **WeiqiTV**: WQTV
|
- **WeiqiTV**: WQTV
|
||||||
- **wholecloud**: WholeCloud
|
- **wholecloud**: WholeCloud
|
||||||
- **Wimp**
|
- **Wimp**
|
||||||
@@ -820,6 +820,7 @@
|
|||||||
- **Ynet**
|
- **Ynet**
|
||||||
- **YouJizz**
|
- **YouJizz**
|
||||||
- **youku**: 优酷
|
- **youku**: 优酷
|
||||||
|
- **youku:show**
|
||||||
- **YouPorn**
|
- **YouPorn**
|
||||||
- **YourUpload**
|
- **YourUpload**
|
||||||
- **youtube**: YouTube.com
|
- **youtube**: YouTube.com
|
||||||
|
|||||||
1
setup.py
1
setup.py
@@ -122,6 +122,7 @@ setup(
|
|||||||
"Programming Language :: Python :: 3.2",
|
"Programming Language :: Python :: 3.2",
|
||||||
"Programming Language :: Python :: 3.3",
|
"Programming Language :: Python :: 3.3",
|
||||||
"Programming Language :: Python :: 3.4",
|
"Programming Language :: Python :: 3.4",
|
||||||
|
"Programming Language :: Python :: 3.5",
|
||||||
],
|
],
|
||||||
|
|
||||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||||
|
|||||||
@@ -249,6 +249,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unescapeHTML('/'), '/')
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
self.assertEqual(unescapeHTML('é'), 'é')
|
self.assertEqual(unescapeHTML('é'), 'é')
|
||||||
self.assertEqual(unescapeHTML('�'), '�')
|
self.assertEqual(unescapeHTML('�'), '�')
|
||||||
|
# HTML5 entities
|
||||||
|
self.assertEqual(unescapeHTML('.''), '.\'')
|
||||||
|
|
||||||
def test_date_from_str(self):
|
def test_date_from_str(self):
|
||||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||||
|
|||||||
@@ -1223,6 +1223,10 @@ class YoutubeDL(object):
|
|||||||
if 'title' not in info_dict:
|
if 'title' not in info_dict:
|
||||||
raise ExtractorError('Missing "title" field in extractor result')
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
|
if not isinstance(info_dict['id'], compat_str):
|
||||||
|
self.report_warning('"id" field is not a string - forcing string conversion')
|
||||||
|
info_dict['id'] = compat_str(info_dict['id'])
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
info_dict['playlist'] = None
|
info_dict['playlist'] = None
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ from .options import (
|
|||||||
from .compat import (
|
from .compat import (
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_print,
|
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
@@ -76,7 +75,7 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
# Dump user agent
|
# Dump user agent
|
||||||
if opts.dump_user_agent:
|
if opts.dump_user_agent:
|
||||||
compat_print(std_headers['User-Agent'])
|
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Batch file verification
|
# Batch file verification
|
||||||
@@ -101,10 +100,10 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
for ie in list_extractors(opts.age_limit):
|
for ie in list_extractors(opts.age_limit):
|
||||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
|
||||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||||
for mu in matchedUrls:
|
for mu in matchedUrls:
|
||||||
compat_print(' ' + mu)
|
write_string(' ' + mu + '\n', out=sys.stdout)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
if opts.list_extractor_descriptions:
|
if opts.list_extractor_descriptions:
|
||||||
for ie in list_extractors(opts.age_limit):
|
for ie in list_extractors(opts.age_limit):
|
||||||
@@ -117,7 +116,7 @@ def _real_main(argv=None):
|
|||||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||||
_COUNTS = ('', '5', '10', 'all')
|
_COUNTS = ('', '5', '10', 'all')
|
||||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||||
compat_print(desc)
|
write_string(desc + '\n', out=sys.stdout)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Conflicting, missing and erroneous options
|
# Conflicting, missing and erroneous options
|
||||||
|
|||||||
2246
youtube_dl/compat.py
2246
youtube_dl/compat.py
File diff suppressed because it is too large
Load Diff
@@ -210,6 +210,7 @@ class FFmpegFD(ExternalFD):
|
|||||||
# args += ['-http_proxy', proxy]
|
# args += ['-http_proxy', proxy]
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
compat_setenv('HTTP_PROXY', proxy, env=env)
|
compat_setenv('HTTP_PROXY', proxy, env=env)
|
||||||
|
compat_setenv('http_proxy', proxy, env=env)
|
||||||
|
|
||||||
protocol = info_dict.get('protocol')
|
protocol = info_dict.get('protocol')
|
||||||
|
|
||||||
|
|||||||
@@ -23,11 +23,17 @@ class HlsFD(FragmentFD):
|
|||||||
UNSUPPORTED_FEATURES = (
|
UNSUPPORTED_FEATURES = (
|
||||||
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
||||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||||
|
|
||||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||||
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||||
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
||||||
r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
|
||||||
# event media playlists [4]
|
# This heuristic also is not correct since segments may not be appended as well.
|
||||||
|
# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
|
||||||
|
# no segments will definitely be appended to the end of the playlist.
|
||||||
|
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||||
|
# # event media playlists [4]
|
||||||
|
|
||||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||||
|
|||||||
133
youtube_dl/extractor/afreecatv.py
Normal file
133
youtube_dl/extractor/afreecatv.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
xpath_element,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AfreecaTVIE(InfoExtractor):
|
||||||
|
IE_DESC = 'afreecatv.com'
|
||||||
|
_VALID_URL = r'''(?x)^
|
||||||
|
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||||
|
(?:
|
||||||
|
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||||
|
/player/[Pp]layer\.(?:swf|html))
|
||||||
|
\?.*?\bnTitleNo=(?P<id>\d+)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||||
|
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36164052',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': 'dailyapril',
|
||||||
|
'uploader_id': 'dailyapril',
|
||||||
|
'upload_date': '20160503',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36153164',
|
||||||
|
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||||
|
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||||
|
'uploader': 'dailyapril',
|
||||||
|
'uploader_id': 'dailyapril',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36153164_1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||||
|
'upload_date': '20160502',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36153164_2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||||
|
'upload_date': '20160502',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_video_key(key):
|
||||||
|
video_key = {}
|
||||||
|
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||||
|
if m:
|
||||||
|
video_key['upload_date'] = m.group('upload_date')
|
||||||
|
video_key['part'] = m.group('part')
|
||||||
|
return video_key
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
parsed_url = compat_urllib_parse_urlparse(url)
|
||||||
|
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||||
|
netloc='afbbs.afreecatv.com:8080',
|
||||||
|
path='/api/video/get_video_info.php'))
|
||||||
|
video_xml = self._download_xml(info_url, video_id)
|
||||||
|
|
||||||
|
if xpath_element(video_xml, './track/video/file') is None:
|
||||||
|
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
title = xpath_text(video_xml, './track/title', 'title')
|
||||||
|
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||||
|
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||||
|
duration = int_or_none(xpath_text(video_xml, './track/duration',
|
||||||
|
'duration'))
|
||||||
|
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
|
||||||
|
video_key = self.parse_video_key(video_file.get('key', ''))
|
||||||
|
if not video_key:
|
||||||
|
continue
|
||||||
|
entries.append({
|
||||||
|
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||||
|
'title': title,
|
||||||
|
'upload_date': video_key.get('upload_date'),
|
||||||
|
'duration': int_or_none(video_file.get('duration')),
|
||||||
|
'url': video_file.text,
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(entries) > 1:
|
||||||
|
info['_type'] = 'multi_video'
|
||||||
|
info['entries'] = entries
|
||||||
|
elif len(entries) == 1:
|
||||||
|
info['url'] = entries[0]['url']
|
||||||
|
info['upload_date'] = entries[0].get('upload_date')
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'No files found for the specified AfreecaTV video, either'
|
||||||
|
' the URL is incorrect or the video has been made private.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
return info
|
||||||
@@ -6,6 +6,7 @@ import time
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .soundcloud import SoundcloudIE
|
from .soundcloud import SoundcloudIE
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
url_basename,
|
url_basename,
|
||||||
@@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
|||||||
result[resultkey] = api_response[apikey]
|
result[resultkey] = api_response[apikey]
|
||||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||||
result['entries'].append({
|
result['entries'].append({
|
||||||
'id': api_response.get('id', song_id),
|
'id': compat_str(api_response.get('id', song_id)),
|
||||||
'uploader': api_response.get('artist'),
|
'uploader': api_response.get('artist'),
|
||||||
'title': api_response.get('title', song_id),
|
'title': api_response.get('title', song_id),
|
||||||
'url': api_response['url'],
|
'url': api_response['url'],
|
||||||
|
|||||||
@@ -102,6 +102,22 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'uploader_id': '151066',
|
'uploader_id': '151066',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
|
}, {
|
||||||
|
# Missing upload time
|
||||||
|
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2880301',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||||
|
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||||
|
'uploader': '黑夜为猫',
|
||||||
|
'uploader_id': '610729',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Just to test metadata extraction
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['upload time'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||||
@@ -172,6 +188,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
datetime_str = self._html_search_regex(
|
datetime_str = self._html_search_regex(
|
||||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||||
|
timestamp = None
|
||||||
if datetime_str:
|
if datetime_str:
|
||||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||||
|
|
||||||
|
|||||||
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_basename,
|
|
||||||
qualities,
|
qualities,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
@@ -16,24 +16,38 @@ from ..utils import (
|
|||||||
|
|
||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
(?:(?:www|m)\.)?canalplus\.fr|
|
||||||
|
(?:www\.)?piwiplus\.fr|
|
||||||
|
(?:www\.)?d8\.tv|
|
||||||
|
(?:www\.)?d17\.tv|
|
||||||
|
(?:www\.)?itele\.fr
|
||||||
|
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||||
|
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||||
|
)
|
||||||
|
|
||||||
|
'''
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
||||||
_SITE_ID_MAP = {
|
_SITE_ID_MAP = {
|
||||||
'canalplus.fr': 'cplus',
|
'canalplus': 'cplus',
|
||||||
'piwiplus.fr': 'teletoon',
|
'piwiplus': 'teletoon',
|
||||||
'd8.tv': 'd8',
|
'd8': 'd8',
|
||||||
'itele.fr': 'itele',
|
'd17': 'd17',
|
||||||
|
'itele': 'itele',
|
||||||
}
|
}
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
|
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||||
'md5': '12164a6f14ff6df8bd628e8ba9b10b78',
|
'md5': '41f438a4904f7664b91b4ed0dec969dc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1263092',
|
'id': '1192814',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Le Zapping - 13/05/15',
|
'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
|
||||||
'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
|
'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
|
||||||
'upload_date': '20150513',
|
'upload_date': '20150105',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||||
@@ -46,35 +60,45 @@ class CanalplusIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'Only works from France',
|
'skip': 'Only works from France',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '966289',
|
'id': '1390231',
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Campagne intime - Documentaire exceptionnel',
|
|
||||||
'description': 'md5:d2643b799fb190846ae09c61e59a859f',
|
|
||||||
'upload_date': '20131108',
|
|
||||||
},
|
|
||||||
'skip': 'videos get deleted after a while',
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
|
||||||
'md5': '38b8f7934def74f0d6f3ba6c036a5f82',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1213714',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
|
'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
|
||||||
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
|
'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
|
||||||
'upload_date': '20150211',
|
'upload_date': '20160512',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1398334',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
|
||||||
|
'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
|
||||||
|
'upload_date': '20160607',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.groupdict().get('id')
|
video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
|
||||||
|
|
||||||
site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal']
|
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||||
|
|
||||||
# Beware, some subclasses do not define an id group
|
# Beware, some subclasses do not define an id group
|
||||||
display_id = url_basename(mobj.group('path'))
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|||||||
@@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
IE_DESC = 'Channel 9'
|
IE_DESC = 'Channel 9'
|
||||||
IE_NAME = 'channel9'
|
IE_NAME = 'channel9'
|
||||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
'duration': 4576,
|
||||||
'duration': 4576,
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'session_code': 'KOS002',
|
||||||
'session_code': 'KOS002',
|
'session_day': 'Day 1',
|
||||||
'session_day': 'Day 1',
|
'session_room': 'Arena 1A',
|
||||||
'session_room': 'Arena 1A',
|
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
|
||||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
|
'Mads Kristensen'],
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||||
'duration': 1540,
|
'duration': 1540,
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
'authors': ['Mike Wilmot'],
|
'authors': ['Mike Wilmot'],
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
# low quality mp4 is best
|
# low quality mp4 is best
|
||||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ranges for the Standard Library',
|
'title': 'Ranges for the Standard Library',
|
||||||
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
||||||
'duration': 5646,
|
'duration': 5646,
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
]
|
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
||||||
|
'title': 'Channel 9',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||||
|
|
||||||
@@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor):
|
|||||||
|
|
||||||
return self.playlist_result(contents)
|
return self.playlist_result(contents)
|
||||||
|
|
||||||
def _extract_list(self, content_path):
|
def _extract_list(self, video_id, rss_url=None):
|
||||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
if not rss_url:
|
||||||
|
rss_url = self._RSS_URL % video_id
|
||||||
|
rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
|
||||||
entries = [self.url_result(session_url.text, 'Channel9')
|
entries = [self.url_result(session_url.text, 'Channel9')
|
||||||
for session_url in rss.findall('./channel/item/link')]
|
for session_url in rss.findall('./channel/item/link')]
|
||||||
title_text = rss.find('./channel/title').text
|
title_text = rss.find('./channel/title').text
|
||||||
return self.playlist_result(entries, content_path, title_text)
|
return self.playlist_result(entries, video_id, title_text)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
content_path = mobj.group('contentpath')
|
content_path = mobj.group('contentpath')
|
||||||
|
rss = mobj.group('rss')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
if rss:
|
||||||
|
return self._extract_list(content_path, url)
|
||||||
|
|
||||||
page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
|
webpage = self._download_webpage(
|
||||||
if page_type_m is not None:
|
url, content_path, 'Downloading web page')
|
||||||
page_type = page_type_m.group('pagetype')
|
|
||||||
|
page_type = self._search_regex(
|
||||||
|
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
|
||||||
|
webpage, 'page type', default=None, group='pagetype')
|
||||||
|
if page_type:
|
||||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||||
return self._extract_entry_item(webpage, content_path)
|
return self._extract_entry_item(webpage, content_path)
|
||||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||||
@@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor):
|
|||||||
return self._extract_list(content_path)
|
return self._extract_list(content_path)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||||
|
|
||||||
else: # Assuming list
|
else: # Assuming list
|
||||||
return self._extract_list(content_path)
|
return self._extract_list(content_path)
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
@@ -1030,7 +1031,7 @@ class InfoExtractor(object):
|
|||||||
if base_url:
|
if base_url:
|
||||||
base_url = base_url.strip()
|
base_url = base_url.strip()
|
||||||
|
|
||||||
bootstrap_info = xpath_text(
|
bootstrap_info = xpath_element(
|
||||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||||
'bootstrap info', default=None)
|
'bootstrap info', default=None)
|
||||||
|
|
||||||
@@ -1085,7 +1086,7 @@ class InfoExtractor(object):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'ext': 'flv' if bootstrap_info else None,
|
'ext': 'flv' if bootstrap_info is not None else None,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ class DWIE(InfoExtractor):
|
|||||||
'upload_date': '20160311',
|
'upload_date': '20160311',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# DW documentaries, only last for one or two weeks
|
||||||
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
||||||
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -44,6 +45,7 @@ class DWIE(InfoExtractor):
|
|||||||
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
||||||
'upload_date': '20160521',
|
'upload_date': '20160521',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video removed',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from .adobetv import (
|
|||||||
)
|
)
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aenetworks import AENetworksIE
|
from .aenetworks import AENetworksIE
|
||||||
|
from .afreecatv import AfreecaTVIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
from .airmozilla import AirMozillaIE
|
from .airmozilla import AirMozillaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
@@ -291,6 +292,7 @@ from .globo import (
|
|||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
)
|
)
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
|
from .godtv import GodTVIE
|
||||||
from .goldenmoustache import GoldenMoustacheIE
|
from .goldenmoustache import GoldenMoustacheIE
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
@@ -910,6 +912,7 @@ from .videomore import (
|
|||||||
)
|
)
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .videott import VideoTtIE
|
from .videott import VideoTtIE
|
||||||
|
from .vidio import VidioIE
|
||||||
from .vidme import (
|
from .vidme import (
|
||||||
VidmeIE,
|
VidmeIE,
|
||||||
VidmeUserIE,
|
VidmeUserIE,
|
||||||
@@ -955,7 +958,6 @@ from .vporn import VpornIE
|
|||||||
from .vrt import VRTIE
|
from .vrt import VRTIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
from .vulture import VultureIE
|
|
||||||
from .walla import WallaIE
|
from .walla import WallaIE
|
||||||
from .washingtonpost import (
|
from .washingtonpost import (
|
||||||
WashingtonPostIE,
|
WashingtonPostIE,
|
||||||
@@ -966,13 +968,11 @@ from .watchindianporn import WatchIndianPornIE
|
|||||||
from .wdr import (
|
from .wdr import (
|
||||||
WDRIE,
|
WDRIE,
|
||||||
WDRMobileIE,
|
WDRMobileIE,
|
||||||
WDRMausIE,
|
|
||||||
)
|
)
|
||||||
from .webofstories import (
|
from .webofstories import (
|
||||||
WebOfStoriesIE,
|
WebOfStoriesIE,
|
||||||
WebOfStoriesPlaylistIE,
|
WebOfStoriesPlaylistIE,
|
||||||
)
|
)
|
||||||
from .weibo import WeiboIE
|
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
@@ -1013,7 +1013,10 @@ from .yesjapan import YesJapanIE
|
|||||||
from .yinyuetai import YinYueTaiIE
|
from .yinyuetai import YinYueTaiIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import (
|
||||||
|
YoukuIE,
|
||||||
|
YoukuShowIE,
|
||||||
|
)
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .yourupload import YourUploadIE
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ from .instagram import InstagramIE
|
|||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
|
from .vessel import VesselIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -626,13 +627,13 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
# MTVSercices embed
|
# MTVSercices embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
|
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||||
'md5': '35727f82f58c76d996fc188f9755b0d5',
|
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
|
'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Review',
|
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
||||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
'description': 'Two valets share their love for movie star Liam Neesons.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# YouTube embed via <data-embed-url="">
|
# YouTube embed via <data-embed-url="">
|
||||||
@@ -1031,6 +1032,17 @@ class GenericIE(InfoExtractor):
|
|||||||
'timestamp': 1389118457,
|
'timestamp': 1389118457,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# NBC News embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
|
||||||
|
'md5': '1aa589c675898ae6d37a17913cf68d66',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '701714499682',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'PREVIEW: On Assignment: David Letterman',
|
||||||
|
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
|
||||||
|
},
|
||||||
|
},
|
||||||
# UDN embed
|
# UDN embed
|
||||||
{
|
{
|
||||||
'url': 'https://video.udn.com/news/300346',
|
'url': 'https://video.udn.com/news/300346',
|
||||||
@@ -1533,6 +1545,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if tp_urls:
|
if tp_urls:
|
||||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||||
|
|
||||||
|
# Look for Vessel embeds
|
||||||
|
vessel_urls = VesselIE._extract_urls(webpage)
|
||||||
|
if vessel_urls:
|
||||||
|
return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded rtl.nl player
|
# Look for embedded rtl.nl player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||||
@@ -1840,14 +1857,6 @@ class GenericIE(InfoExtractor):
|
|||||||
url = unescapeHTML(mobj.group('url'))
|
url = unescapeHTML(mobj.group('url'))
|
||||||
return self.url_result(url)
|
return self.url_result(url)
|
||||||
|
|
||||||
# Look for embedded vulture.com player
|
|
||||||
mobj = re.search(
|
|
||||||
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
|
|
||||||
webpage)
|
|
||||||
if mobj is not None:
|
|
||||||
url = unescapeHTML(mobj.group('url'))
|
|
||||||
return self.url_result(url, ie='Vulture')
|
|
||||||
|
|
||||||
# Look for embedded mtvservices player
|
# Look for embedded mtvservices player
|
||||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||||
if mtvservices_url:
|
if mtvservices_url:
|
||||||
@@ -1960,6 +1969,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if nbc_sports_url:
|
if nbc_sports_url:
|
||||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
# Look for NBC News embeds
|
||||||
|
nbc_news_embed_url = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
|
||||||
|
if nbc_news_embed_url:
|
||||||
|
return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
|
||||||
|
|
||||||
# Look for Google Drive embeds
|
# Look for Google Drive embeds
|
||||||
google_drive_url = GoogleDriveIE._extract_url(webpage)
|
google_drive_url = GoogleDriveIE._extract_url(webpage)
|
||||||
if google_drive_url:
|
if google_drive_url:
|
||||||
|
|||||||
66
youtube_dl/extractor/godtv.py
Normal file
66
youtube_dl/extractor/godtv.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .ooyala import OoyalaIE
|
||||||
|
from ..utils import js_to_json
|
||||||
|
|
||||||
|
|
||||||
|
class GodTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Randy Needham',
|
||||||
|
'duration': 3615.08,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://god.tv/playlist/bible-study',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bible-study',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 37,
|
||||||
|
}, {
|
||||||
|
'url': 'http://god.tv/node/15097',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://god.tv/live/africa',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://god.tv/liveevents',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
settings = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||||
|
webpage, 'settings', default='{}'),
|
||||||
|
display_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
ooyala_id = None
|
||||||
|
|
||||||
|
if settings:
|
||||||
|
playlist = settings.get('playlist')
|
||||||
|
if playlist and isinstance(playlist, list):
|
||||||
|
entries = [
|
||||||
|
OoyalaIE._build_url_result(video['content_id'])
|
||||||
|
for video in playlist if video.get('content_id')]
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(entries, display_id)
|
||||||
|
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
||||||
|
|
||||||
|
if not ooyala_id:
|
||||||
|
ooyala_id = self._search_regex(
|
||||||
|
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
||||||
|
webpage, 'ooyala id', group='id')
|
||||||
|
|
||||||
|
return OoyalaIE._build_url_result(ooyala_id)
|
||||||
@@ -60,7 +60,8 @@ class IndavideoEmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
|
||||||
} for video_url in video_urls]
|
} for video_url in video_urls]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|||||||
@@ -1,20 +1,24 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_filesize,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LibraryOfCongressIE(InfoExtractor):
|
class LibraryOfCongressIE(InfoExtractor):
|
||||||
IE_NAME = 'loc'
|
IE_NAME = 'loc'
|
||||||
IE_DESC = 'Library of Congress'
|
IE_DESC = 'Library of Congress'
|
||||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# embedded via <div class="media-player"
|
||||||
'url': 'http://loc.gov/item/90716351/',
|
'url': 'http://loc.gov/item/90716351/',
|
||||||
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -25,7 +29,35 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
'duration': 0,
|
'duration': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# webcast embedded via mediaObjectId
|
||||||
|
'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5578',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Help! Preservation Training Needs Here, There & Everywhere',
|
||||||
|
'duration': 3765,
|
||||||
|
'view_count': int,
|
||||||
|
'subtitles': 'mincount:1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# with direct download links
|
||||||
|
'url': 'https://www.loc.gov/item/78710669/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78710669',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La vie et la passion de Jesus-Christ',
|
||||||
|
'duration': 0,
|
||||||
|
'view_count': int,
|
||||||
|
'formats': 'mincount:4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -34,18 +66,20 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
media_id = self._search_regex(
|
media_id = self._search_regex(
|
||||||
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
||||||
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
||||||
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1'),
|
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
||||||
|
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
|
||||||
webpage, 'media id', group='id')
|
webpage, 'media id', group='id')
|
||||||
|
|
||||||
data = self._parse_json(
|
data = self._download_json(
|
||||||
self._download_webpage(
|
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
||||||
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
|
||||||
video_id),
|
|
||||||
video_id)['mediaObject']
|
video_id)['mediaObject']
|
||||||
|
|
||||||
derivative = data['derivatives'][0]
|
derivative = data['derivatives'][0]
|
||||||
media_url = derivative['derivativeUrl']
|
media_url = derivative['derivativeUrl']
|
||||||
|
|
||||||
|
title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
|
||||||
|
webpage)
|
||||||
|
|
||||||
# Following algorithm was extracted from setAVSource js function
|
# Following algorithm was extracted from setAVSource js function
|
||||||
# found in webpage
|
# found in webpage
|
||||||
media_url = media_url.replace('rtmp', 'https')
|
media_url = media_url.replace('rtmp', 'https')
|
||||||
@@ -61,6 +95,7 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
'format_id': 'hls',
|
'format_id': 'hls',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'protocol': 'm3u8_native',
|
'protocol': 'm3u8_native',
|
||||||
|
'quality': 1,
|
||||||
}]
|
}]
|
||||||
elif 'vod/mp3:' in media_url:
|
elif 'vod/mp3:' in media_url:
|
||||||
formats = [{
|
formats = [{
|
||||||
@@ -68,17 +103,41 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
download_urls = set()
|
||||||
|
for m in re.finditer(
|
||||||
|
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
||||||
|
format_id = m.group('id').lower()
|
||||||
|
if format_id == 'gif':
|
||||||
|
continue
|
||||||
|
download_url = m.group('url')
|
||||||
|
if download_url in download_urls:
|
||||||
|
continue
|
||||||
|
download_urls.add(download_url)
|
||||||
|
formats.append({
|
||||||
|
'url': download_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize_approx': parse_filesize(m.group('size')),
|
||||||
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage)
|
|
||||||
duration = float_or_none(data.get('duration'))
|
duration = float_or_none(data.get('duration'))
|
||||||
view_count = int_or_none(data.get('viewCount'))
|
view_count = int_or_none(data.get('viewCount'))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
cc_url = data.get('ccUrl')
|
||||||
|
if cc_url:
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': cc_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -98,13 +98,19 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
|
} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for caption in properties.get('captions', {}):
|
for caption in properties.get('captions', []):
|
||||||
lang = caption.get('language_code')
|
lang = caption.get('language_code')
|
||||||
subtitles_url = caption.get('url')
|
subtitles_url = caption.get('url')
|
||||||
if lang and subtitles_url:
|
if lang and subtitles_url:
|
||||||
subtitles[lang] = [{
|
subtitles.setdefault(lang, []).append({
|
||||||
'url': subtitles_url,
|
'url': subtitles_url,
|
||||||
}]
|
})
|
||||||
|
closed_captions_url = properties.get('closed_captions_url')
|
||||||
|
if closed_captions_url:
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': closed_captions_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -123,7 +129,18 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class LimelightMediaIE(LimelightBaseIE):
|
class LimelightMediaIE(LimelightBaseIE):
|
||||||
IE_NAME = 'limelight'
|
IE_NAME = 'limelight'
|
||||||
_VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
limelight:media:|
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
link\.videoplatform\.limelight\.com/media/|
|
||||||
|
assets\.delvenetworks\.com/player/loader\.swf
|
||||||
|
)
|
||||||
|
\?.*?\bmediaId=
|
||||||
|
)
|
||||||
|
(?P<id>[a-z0-9]{32})
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -158,6 +175,9 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_PLAYLIST_SERVICE_PATH = 'media'
|
_PLAYLIST_SERVICE_PATH = 'media'
|
||||||
_API_PATH = 'media'
|
_API_PATH = 'media'
|
||||||
@@ -176,15 +196,29 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
|
|
||||||
class LimelightChannelIE(LimelightBaseIE):
|
class LimelightChannelIE(LimelightBaseIE):
|
||||||
IE_NAME = 'limelight:channel'
|
IE_NAME = 'limelight:channel'
|
||||||
_VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
|
_VALID_URL = r'''(?x)
|
||||||
_TEST = {
|
(?:
|
||||||
|
limelight:channel:|
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
link\.videoplatform\.limelight\.com/media/|
|
||||||
|
assets\.delvenetworks\.com/player/loader\.swf
|
||||||
|
)
|
||||||
|
\?.*?\bchannelId=
|
||||||
|
)
|
||||||
|
(?P<id>[a-z0-9]{32})
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
|
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ab6a524c379342f9b23642917020c082',
|
'id': 'ab6a524c379342f9b23642917020c082',
|
||||||
'title': 'Javascript Sample Code',
|
'title': 'Javascript Sample Code',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
_PLAYLIST_SERVICE_PATH = 'channel'
|
_PLAYLIST_SERVICE_PATH = 'channel'
|
||||||
_API_PATH = 'channels'
|
_API_PATH = 'channels'
|
||||||
|
|
||||||
@@ -207,15 +241,29 @@ class LimelightChannelIE(LimelightBaseIE):
|
|||||||
|
|
||||||
class LimelightChannelListIE(LimelightBaseIE):
|
class LimelightChannelListIE(LimelightBaseIE):
|
||||||
IE_NAME = 'limelight:channel_list'
|
IE_NAME = 'limelight:channel_list'
|
||||||
_VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
|
_VALID_URL = r'''(?x)
|
||||||
_TEST = {
|
(?:
|
||||||
|
limelight:channel_list:|
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
link\.videoplatform\.limelight\.com/media/|
|
||||||
|
assets\.delvenetworks\.com/player/loader\.swf
|
||||||
|
)
|
||||||
|
\?.*?\bchannelListId=
|
||||||
|
)
|
||||||
|
(?P<id>[a-z0-9]{32})
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
|
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '301b117890c4465c8179ede21fd92e2b',
|
'id': '301b117890c4465c8179ede21fd92e2b',
|
||||||
'title': 'Website - Hero Player',
|
'title': 'Website - Hero Player',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
_PLAYLIST_SERVICE_PATH = 'channel_list'
|
_PLAYLIST_SERVICE_PATH = 'channel_list'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|||||||
@@ -203,9 +203,10 @@ class LivestreamIE(InfoExtractor):
|
|||||||
if not videos_info:
|
if not videos_info:
|
||||||
break
|
break
|
||||||
for v in videos_info:
|
for v in videos_info:
|
||||||
|
v_id = compat_str(v['id'])
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']),
|
'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id),
|
||||||
'Livestream', v['id'], v['caption']))
|
'Livestream', v_id, v.get('caption')))
|
||||||
last_video = videos_info[-1]['id']
|
last_video = videos_info[-1]['id']
|
||||||
return self.playlist_result(entries, event_id, event_data['full_name'])
|
return self.playlist_result(entries, event_id, event_data['full_name'])
|
||||||
|
|
||||||
|
|||||||
@@ -1,93 +1,94 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LyndaBaseIE(InfoExtractor):
|
class LyndaBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
_SIGNIN_URL = 'https://www.lynda.com/signin'
|
||||||
|
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
||||||
|
_USER_URL = 'https://www.lynda.com/signin/user'
|
||||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||||
_NETRC_MACHINE = 'lynda'
|
_NETRC_MACHINE = 'lynda'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_error(json_string, key_or_keys):
|
||||||
|
keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
|
||||||
|
for key in keys:
|
||||||
|
error = json_string.get(key)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
|
def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
|
||||||
|
action_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
|
||||||
|
'post url', default=fallback_action_url, group='url')
|
||||||
|
|
||||||
|
if not action_url.startswith('http'):
|
||||||
|
action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)
|
||||||
|
|
||||||
|
form_data = self._hidden_inputs(form_html)
|
||||||
|
form_data.update(extra_form_data)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self._download_json(
|
||||||
|
action_url, None, note,
|
||||||
|
data=urlencode_postdata(form_data),
|
||||||
|
headers={
|
||||||
|
'Referer': referrer_url,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||||
|
response = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||||
|
self._check_error(response, ('email', 'password'))
|
||||||
|
raise
|
||||||
|
|
||||||
|
self._check_error(response, 'ErrorMessage')
|
||||||
|
|
||||||
|
return response, action_url
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
login_form = {
|
# Step 1: download signin page
|
||||||
'username': username,
|
signin_page = self._download_webpage(
|
||||||
'password': password,
|
self._SIGNIN_URL, None, 'Downloading signin page')
|
||||||
'remember': 'false',
|
|
||||||
'stayPut': 'false'
|
|
||||||
}
|
|
||||||
request = sanitized_Request(
|
|
||||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
|
||||||
login_page = self._download_webpage(
|
|
||||||
request, None, 'Logging in as %s' % username)
|
|
||||||
|
|
||||||
# Not (yet) logged in
|
# Already logged in
|
||||||
m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page)
|
if any(re.search(p, signin_page) for p in (
|
||||||
if m is not None:
|
'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||||
response = m.group('json')
|
|
||||||
response_json = json.loads(response)
|
|
||||||
state = response_json['state']
|
|
||||||
|
|
||||||
if state == 'notlogged':
|
|
||||||
raise ExtractorError(
|
|
||||||
'Unable to login, incorrect username and/or password',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
# This is when we get popup:
|
|
||||||
# > You're already logged in to lynda.com on two devices.
|
|
||||||
# > If you log in here, we'll log you out of another device.
|
|
||||||
# So, we need to confirm this.
|
|
||||||
if state == 'conflicted':
|
|
||||||
confirm_form = {
|
|
||||||
'username': '',
|
|
||||||
'password': '',
|
|
||||||
'resolve': 'true',
|
|
||||||
'remember': 'false',
|
|
||||||
'stayPut': 'false',
|
|
||||||
}
|
|
||||||
request = sanitized_Request(
|
|
||||||
self._LOGIN_URL, urlencode_postdata(confirm_form))
|
|
||||||
login_page = self._download_webpage(
|
|
||||||
request, None,
|
|
||||||
'Confirming log in and log out from another device')
|
|
||||||
|
|
||||||
if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
|
||||||
if 'login error' in login_page:
|
|
||||||
mobj = re.search(
|
|
||||||
r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
|
|
||||||
login_page)
|
|
||||||
if mobj:
|
|
||||||
raise ExtractorError(
|
|
||||||
'lynda returned error: %s - %s'
|
|
||||||
% (mobj.group('title'), clean_html(mobj.group('description'))),
|
|
||||||
expected=True)
|
|
||||||
raise ExtractorError('Unable to log in')
|
|
||||||
|
|
||||||
def _logout(self):
|
|
||||||
username, _ = self._get_login_info()
|
|
||||||
if username is None:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
self._download_webpage(
|
# Step 2: submit email
|
||||||
'http://www.lynda.com/ajax/logout.aspx', None,
|
signin_form = self._search_regex(
|
||||||
'Logging out', 'Unable to log out', fatal=False)
|
r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
|
||||||
|
signin_page, 'signin form')
|
||||||
|
signin_page, signin_url = self._login_step(
|
||||||
|
signin_form, self._PASSWORD_URL, {'email': username},
|
||||||
|
'Submitting email', self._SIGNIN_URL)
|
||||||
|
|
||||||
|
# Step 3: submit password
|
||||||
|
password_form = signin_page['body']
|
||||||
|
self._login_step(
|
||||||
|
password_form, self._USER_URL, {'email': username, 'password': password},
|
||||||
|
'Submitting password', signin_url)
|
||||||
|
|
||||||
|
|
||||||
class LyndaIE(LyndaBaseIE):
|
class LyndaIE(LyndaBaseIE):
|
||||||
@@ -212,8 +213,6 @@ class LyndaCourseIE(LyndaBaseIE):
|
|||||||
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||||
course_id, 'Downloading course JSON')
|
course_id, 'Downloading course JSON')
|
||||||
|
|
||||||
self._logout()
|
|
||||||
|
|
||||||
if course.get('Status') == 'NotFound':
|
if course.get('Status') == 'NotFound':
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Course %s does not exist' % course_id, expected=True)
|
'Course %s does not exist' % course_id, expected=True)
|
||||||
@@ -246,5 +245,6 @@ class LyndaCourseIE(LyndaBaseIE):
|
|||||||
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
|
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
|
||||||
|
|
||||||
course_title = course.get('Title')
|
course_title = course.get('Title')
|
||||||
|
course_description = course.get('Description')
|
||||||
|
|
||||||
return self.playlist_result(entries, course_id, course_title)
|
return self.playlist_result(entries, course_id, course_title, course_description)
|
||||||
|
|||||||
@@ -67,6 +67,23 @@ class NBCIE(InfoExtractor):
|
|||||||
# This video has expired but with an escaped embedURL
|
# This video has expired but with an escaped embedURL
|
||||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# HLS streams requires the 'hdnea3' cookie
|
||||||
|
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'n1806',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Goliath',
|
||||||
|
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||||
|
'timestamp': 1237100400,
|
||||||
|
'upload_date': '20090315',
|
||||||
|
'uploader': 'NBCU-COM',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from US',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -249,6 +266,11 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
|
||||||
|
'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -272,18 +294,17 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
info = None
|
info = None
|
||||||
bootstrap_json = self._search_regex(
|
bootstrap_json = self._search_regex(
|
||||||
r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||||
|
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
||||||
webpage, 'bootstrap json', default=None)
|
webpage, 'bootstrap json', default=None)
|
||||||
if bootstrap_json:
|
bootstrap = self._parse_json(
|
||||||
bootstrap = self._parse_json(bootstrap_json, display_id)
|
bootstrap_json, display_id, transform_source=unescapeHTML)
|
||||||
|
if 'results' in bootstrap:
|
||||||
info = bootstrap['results'][0]['video']
|
info = bootstrap['results'][0]['video']
|
||||||
|
elif 'video' in bootstrap:
|
||||||
|
info = bootstrap['video']
|
||||||
else:
|
else:
|
||||||
player_instance_json = self._search_regex(
|
info = bootstrap
|
||||||
r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None)
|
|
||||||
if not player_instance_json:
|
|
||||||
player_instance_json = self._html_search_regex(
|
|
||||||
r'data-video="([^"]+)"', webpage, 'video json')
|
|
||||||
info = self._parse_json(player_instance_json, display_id)
|
|
||||||
video_id = info['mpxId']
|
video_id = info['mpxId']
|
||||||
title = info['title']
|
title = info['title']
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)'
|
_VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||||
@@ -31,6 +31,9 @@ class OpenloadIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://openload.io/f/ZAn6oz-VZGE/',
|
'url': 'https://openload.io/f/ZAn6oz-VZGE/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://openload.co/f/_-ztPaZtMhM/',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
|
# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
|
||||||
# for title and ext
|
# for title and ext
|
||||||
@@ -100,7 +103,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True)
|
||||||
|
|
||||||
code = self._search_regex(
|
code = self._search_regex(
|
||||||
r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>',
|
r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
|
||||||
webpage, 'JS code')
|
webpage, 'JS code')
|
||||||
|
|
||||||
decoded = self.openload_decode(code)
|
decoded = self.openload_decode(code)
|
||||||
|
|||||||
@@ -14,11 +14,13 @@ from ..compat import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url_query,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@@ -48,6 +50,12 @@ class ThePlatformBaseIE(OnceIE):
|
|||||||
if OnceIE.suitable(_format['url']):
|
if OnceIE.suitable(_format['url']):
|
||||||
formats.extend(self._extract_once_formats(_format['url']))
|
formats.extend(self._extract_once_formats(_format['url']))
|
||||||
else:
|
else:
|
||||||
|
media_url = _format['url']
|
||||||
|
if determine_ext(media_url) == 'm3u8':
|
||||||
|
hdnea2 = self._get_cookies(media_url).get('hdnea2')
|
||||||
|
if hdnea2:
|
||||||
|
_format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
|
||||||
|
|
||||||
formats.append(_format)
|
formats.append(_format)
|
||||||
|
|
||||||
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ class TheSixtyOneIE(InfoExtractor):
|
|||||||
s|
|
s|
|
||||||
song/comments/list|
|
song/comments/list|
|
||||||
song
|
song
|
||||||
)/(?P<id>[A-Za-z0-9]+)/?$'''
|
)/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$'''
|
||||||
_SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
|
_SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
|
||||||
_SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
|
_SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
|
||||||
_THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
|
_THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
|
||||||
@@ -45,6 +45,10 @@ class TheSixtyOneIE(InfoExtractor):
|
|||||||
'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
|
'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_DECODE_MAP = {
|
_DECODE_MAP = {
|
||||||
|
|||||||
@@ -260,7 +260,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
'nauth': access_token['token'],
|
'nauth': access_token['token'],
|
||||||
'nauthsig': access_token['sig'],
|
'nauthsig': access_token['sig'],
|
||||||
})),
|
})),
|
||||||
item_id, 'mp4')
|
item_id, 'mp4', entry_protocol='m3u8_native')
|
||||||
|
|
||||||
self._prefer_source(formats)
|
self._prefer_source(formats)
|
||||||
info['formats'] = formats
|
info['formats'] = formats
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
remove_end,
|
remove_end,
|
||||||
@@ -52,7 +53,7 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
'id': 'dq4Oj5quskI',
|
'id': 'dq4Oj5quskI',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ubuntu 11.10 Overview',
|
'title': 'Ubuntu 11.10 Overview',
|
||||||
'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
|
'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...',
|
||||||
'upload_date': '20111013',
|
'upload_date': '20111013',
|
||||||
'uploader': 'OMG! Ubuntu!',
|
'uploader': 'OMG! Ubuntu!',
|
||||||
'uploader_id': 'omgubuntu',
|
'uploader_id': 'omgubuntu',
|
||||||
@@ -116,13 +117,16 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
|
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
|
||||||
|
|
||||||
if video_url:
|
if video_url:
|
||||||
f = {
|
if determine_ext(video_url) == 'm3u8':
|
||||||
'url': video_url,
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||||
}
|
else:
|
||||||
|
f = {
|
||||||
|
'url': video_url,
|
||||||
|
}
|
||||||
|
|
||||||
_search_dimensions_in_video_url(f, video_url)
|
_search_dimensions_in_video_url(f, video_url)
|
||||||
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
|
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
|
||||||
if vmap_url:
|
if vmap_url:
|
||||||
@@ -207,6 +211,7 @@ class TwitterIE(InfoExtractor):
|
|||||||
'uploader_id': 'giphz',
|
'uploader_id': 'giphz',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['height', 'width'],
|
'expected_warnings': ['height', 'width'],
|
||||||
|
'skip': 'Account suspended',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/starwars/status/665052190608723968',
|
'url': 'https://twitter.com/starwars/status/665052190608723968',
|
||||||
'md5': '39b7199856dee6cd4432e72c74bc69d4',
|
'md5': '39b7199856dee6cd4432e72c74bc69d4',
|
||||||
@@ -239,10 +244,10 @@ class TwitterIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '700207533655363584',
|
'id': '700207533655363584',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel',
|
'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel',
|
||||||
'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'uploader': 'jay',
|
'uploader': 'Donte The Dumbass',
|
||||||
'uploader_id': 'jaydingeer',
|
'uploader_id': 'jaydingeer',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -262,7 +267,6 @@ class TwitterIE(InfoExtractor):
|
|||||||
'add_ie': ['Vine'],
|
'add_ie': ['Vine'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
|
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
|
||||||
# md5 constantly changes
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '719944021058060289',
|
'id': '719944021058060289',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -271,6 +275,9 @@ class TwitterIE(InfoExtractor):
|
|||||||
'uploader_id': 'captainamerica',
|
'uploader_id': 'captainamerica',
|
||||||
'uploader': 'Captain America',
|
'uploader': 'Captain America',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -278,7 +285,11 @@ class TwitterIE(InfoExtractor):
|
|||||||
user_id = mobj.group('user_id')
|
user_id = mobj.group('user_id')
|
||||||
twid = mobj.group('id')
|
twid = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
|
webpage, urlh = self._download_webpage_handle(
|
||||||
|
self._TEMPLATE_URL % (user_id, twid), twid)
|
||||||
|
|
||||||
|
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||||
|
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||||
|
|
||||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -12,11 +13,11 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class VesselIE(InfoExtractor):
|
class VesselIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||||
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
|
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
|
||||||
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
|
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
|
||||||
_NETRC_MACHINE = 'vessel'
|
_NETRC_MACHINE = 'vessel'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.vessel.com/videos/HDN7G5UMs',
|
'url': 'https://www.vessel.com/videos/HDN7G5UMs',
|
||||||
'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
|
'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -28,7 +29,16 @@ class VesselIE(InfoExtractor):
|
|||||||
'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
|
'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
|
||||||
'timestamp': int,
|
'timestamp': int,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [url for _, url in re.findall(
|
||||||
|
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_json_request(url, data):
|
def make_json_request(url, data):
|
||||||
@@ -98,16 +108,24 @@ class VesselIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_asset.get('sources', []):
|
for f in video_asset.get('sources', []):
|
||||||
if f['name'] == 'hls-index':
|
location = f.get('location')
|
||||||
|
if not location:
|
||||||
|
continue
|
||||||
|
name = f.get('name')
|
||||||
|
if name == 'hls-index':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
|
location, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False))
|
||||||
|
elif name == 'dash-index':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
location, video_id, mpd_id='dash', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': f['name'],
|
'format_id': name,
|
||||||
'tbr': f.get('bitrate'),
|
'tbr': f.get('bitrate'),
|
||||||
'height': f.get('height'),
|
'height': f.get('height'),
|
||||||
'width': f.get('width'),
|
'width': f.get('width'),
|
||||||
'url': f['location'],
|
'url': location,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|||||||
73
youtube_dl/extractor/vidio.py
Normal file
73
youtube_dl/extractor/vidio.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class VidioIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
|
||||||
|
'md5': 'cd2801394afc164e9775db6a140b91fe',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '165683',
|
||||||
|
'display_id': 'dj_ambred-booyah-live-2015',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'DJ_AMBRED - Booyah (Live 2015)',
|
||||||
|
'description': 'md5:27dc15f819b6a78a626490881adbadf8',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 149,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id, display_id = mobj.group('id', 'display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
m3u8_url, duration, thumbnail = [None] * 3
|
||||||
|
|
||||||
|
clips = self._parse_json(
|
||||||
|
self._html_search_regex(
|
||||||
|
r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
|
||||||
|
webpage, 'video data', default='[]', group='data'),
|
||||||
|
display_id, fatal=False)
|
||||||
|
if clips:
|
||||||
|
clip = clips[0]
|
||||||
|
m3u8_url = clip.get('sources', [{}])[0].get('file')
|
||||||
|
duration = clip.get('clip_duration')
|
||||||
|
thumbnail = clip.get('image')
|
||||||
|
|
||||||
|
m3u8_url = m3u8_url or self._search_regex(
|
||||||
|
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url')
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
|
||||||
|
|
||||||
|
duration = int_or_none(duration or self._search_regex(
|
||||||
|
r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
|
||||||
|
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
(r'<span[^>]+data-comment-vote-count=["\'](\d+)',
|
||||||
|
r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'like_count': like_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
remove_start,
|
||||||
)
|
)
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
|
|
||||||
@@ -39,6 +40,7 @@ class VLiveIE(InfoExtractor):
|
|||||||
webpage, 'video params')
|
webpage, 'video params')
|
||||||
status, _, _, live_params, long_video_id, key = re.split(
|
status, _, _, live_params, long_video_id, key = re.split(
|
||||||
r'"\s*,\s*"', video_params)[2:8]
|
r'"\s*,\s*"', video_params)[2:8]
|
||||||
|
status = remove_start(status, 'PRODUCT_')
|
||||||
|
|
||||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||||
live_params = self._parse_json('"%s"' % live_params, video_id)
|
live_params = self._parse_json('"%s"' % live_params, video_id)
|
||||||
|
|||||||
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -16,13 +19,13 @@ class VoiceRepublicIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
|
_VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
|
'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
|
||||||
'md5': '0554a24d1657915aa8e8f84e15dc9353',
|
'md5': 'b9174d651323f17783000876347116e3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2296',
|
'id': '2296',
|
||||||
'display_id': 'watching-the-watchers-building-a-sousveillance-state',
|
'display_id': 'watching-the-watchers-building-a-sousveillance-state',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Watching the Watchers: Building a Sousveillance State',
|
'title': 'Watching the Watchers: Building a Sousveillance State',
|
||||||
'description': 'md5:715ba964958afa2398df615809cfecb1',
|
'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
|
||||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||||
'duration': 1800,
|
'duration': 1800,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@@ -52,7 +55,7 @@ class VoiceRepublicIE(InfoExtractor):
|
|||||||
if data:
|
if data:
|
||||||
title = data['title']
|
title = data['title']
|
||||||
description = data.get('teaser')
|
description = data.get('teaser')
|
||||||
talk_id = data.get('talk_id') or display_id
|
talk_id = compat_str(data.get('talk_id') or display_id)
|
||||||
talk = data['talk']
|
talk = data['talk']
|
||||||
duration = int_or_none(talk.get('duration'))
|
duration = int_or_none(talk.get('duration'))
|
||||||
formats = [{
|
formats = [{
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
@@ -27,7 +28,8 @@ class VpornIE(InfoExtractor):
|
|||||||
'duration': 393,
|
'duration': 393,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
}
|
},
|
||||||
|
'skip': 'video removed',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.vporn.com/female/hana-shower/523564/',
|
'url': 'http://www.vporn.com/female/hana-shower/523564/',
|
||||||
@@ -40,7 +42,7 @@ class VpornIE(InfoExtractor):
|
|||||||
'description': 'Hana showers at the bathroom.',
|
'description': 'Hana showers at the bathroom.',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Hmmmmm',
|
'uploader': 'Hmmmmm',
|
||||||
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
|
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
|
||||||
'duration': 588,
|
'duration': 588,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@@ -55,6 +57,10 @@ class VpornIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!'
|
||||||
|
if errmsg in webpage:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
|
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
|
|||||||
@@ -1,69 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os.path
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VultureIE(InfoExtractor):
|
|
||||||
IE_NAME = 'vulture.com'
|
|
||||||
_VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
|
|
||||||
'md5': '8d997845642a2b5152820f7257871bc8',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '6GHRQL3RV7MSD1H4',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
|
|
||||||
'uploader_id': 'Sarah',
|
|
||||||
'thumbnail': 're:^http://.*\.jpg$',
|
|
||||||
'timestamp': 1401288564,
|
|
||||||
'upload_date': '20140528',
|
|
||||||
'description': 'Uplifting and witty, as predicted.',
|
|
||||||
'duration': 1015,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
query_string = self._search_regex(
|
|
||||||
r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
|
|
||||||
video_id = self._search_regex(
|
|
||||||
r'content=([^&]+)', query_string, 'video ID')
|
|
||||||
query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
|
|
||||||
|
|
||||||
query_webpage = self._download_webpage(
|
|
||||||
query_url, display_id, note='Downloading query page')
|
|
||||||
params_json = self._search_regex(
|
|
||||||
r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
|
|
||||||
query_webpage,
|
|
||||||
'player params')
|
|
||||||
params = json.loads(params_json)
|
|
||||||
|
|
||||||
upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
|
|
||||||
uploader_id = params.get('user', {}).get('handle')
|
|
||||||
|
|
||||||
media_item = params['media_item']
|
|
||||||
title = os.path.splitext(media_item['title'])[0]
|
|
||||||
duration = int_or_none(media_item.get('duration_seconds'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'url': media_item['pipeline_xid'],
|
|
||||||
'title': title,
|
|
||||||
'timestamp': upload_timestamp,
|
|
||||||
'thumbnail': params.get('thumbnail_url'),
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'description': params.get('description'),
|
|
||||||
'duration': duration,
|
|
||||||
}
|
|
||||||
@@ -1,214 +1,224 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
strip_jsonp,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
qualities,
|
update_url_query,
|
||||||
|
urlhandle_detect_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class WDRIE(InfoExtractor):
|
class WDRIE(InfoExtractor):
|
||||||
_PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
|
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
||||||
_VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
|
_PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
|
||||||
|
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
|
||||||
|
# HDS download, MD5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-362427',
|
'id': 'mdb-1058683',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Servicezeit',
|
'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100',
|
||||||
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
|
'title': 'Geheimnis Aachener Dom',
|
||||||
'upload_date': '20140310',
|
'alt_title': 'Doku am Freitag',
|
||||||
'is_live': False
|
'upload_date': '20160304',
|
||||||
|
'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318',
|
||||||
|
'is_live': False,
|
||||||
|
'subtitles': {'de': [{
|
||||||
|
'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml',
|
||||||
|
'ext': 'ttml',
|
||||||
|
}]},
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Page Not Found',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
|
'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
|
||||||
|
'md5': 'f4c1f96d01cf285240f53ea4309663d8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-363194',
|
'id': 'mdb-1072000',
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Marga Spiegel ist tot',
|
|
||||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
|
||||||
'upload_date': '20140311',
|
|
||||||
'is_live': False
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Page Not Found',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
|
|
||||||
'md5': '83e9e8fefad36f357278759870805898',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'mdb-194332',
|
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
|
'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
|
||||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
'title': 'Schriftstellerin Juli Zeh',
|
||||||
'upload_date': '20091129',
|
'alt_title': 'WDR 3 Gespräch am Samstag',
|
||||||
'is_live': False
|
'upload_date': '20160312',
|
||||||
|
'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
|
||||||
|
'is_live': False,
|
||||||
|
'subtitles': {}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
|
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||||
'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'mdb-478135',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Flavia Coelho: Amar é Amar',
|
|
||||||
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
|
|
||||||
'upload_date': '20140717',
|
|
||||||
'is_live': False
|
|
||||||
},
|
|
||||||
'skip': 'Page Not Found',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
|
|
||||||
'playlist_mincount': 146,
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-103364',
|
'id': 'mdb-103364',
|
||||||
'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'ext': 'mp4',
|
||||||
|
'display_id': 'index',
|
||||||
|
'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'alt_title': 'WDR Fernsehen Live',
|
||||||
|
'upload_date': None,
|
||||||
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
|
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
|
||||||
'ext': 'flv',
|
'is_live': True,
|
||||||
'upload_date': '20150101',
|
'subtitles': {}
|
||||||
'is_live': True
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aktuelle-stunde/aktuelle-stunde-120',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-1096487',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': 're:^[0-9]{8}$',
|
||||||
|
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
||||||
|
'description': '- Die Sendung mit der Maus -',
|
||||||
|
},
|
||||||
|
'skip': 'The id changes from week to week because of the new episode'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
|
||||||
|
# HDS download, MD5 is unstable
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-186083',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': '20130919',
|
||||||
|
'title': 'Sachgeschichte - Achterbahn ',
|
||||||
|
'description': '- Die Sendung mit der Maus -',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
||||||
|
# Live stream, MD5 unstable
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-869971',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Funkhaus Europa Livestream',
|
||||||
|
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||||
|
'upload_date': '20160101',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_url = mobj.group('url')
|
url_type = mobj.group('type')
|
||||||
page_id = mobj.group('id')
|
page_url = mobj.group('page_url')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, page_id)
|
# for wdr.de the data-extension is in a tag with the class "mediaLink"
|
||||||
|
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
|
||||||
|
# for wdrmaus its in a link to the page in a multiline "videoLink"-tag
|
||||||
|
json_metadata = self._html_search_regex(
|
||||||
|
r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
|
||||||
|
webpage, 'media link', default=None, flags=re.MULTILINE)
|
||||||
|
|
||||||
if mobj.group('player') is None:
|
if not json_metadata:
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(page_url + href, 'WDR')
|
self.url_result(page_url + href[0], 'WDR')
|
||||||
for href in re.findall(
|
for href in re.findall(
|
||||||
r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX,
|
r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX,
|
||||||
webpage)
|
webpage)
|
||||||
]
|
]
|
||||||
|
|
||||||
if entries: # Playlist page
|
if entries: # Playlist page
|
||||||
return self.playlist_result(entries, page_id)
|
return self.playlist_result(entries, playlist_id=display_id)
|
||||||
|
|
||||||
# Overview page
|
raise ExtractorError('No downloadable streams found', expected=True)
|
||||||
entries = []
|
|
||||||
for page_num in itertools.count(2):
|
|
||||||
hrefs = re.findall(
|
|
||||||
r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
|
|
||||||
webpage)
|
|
||||||
entries.extend(
|
|
||||||
self.url_result(page_url + href, 'WDR')
|
|
||||||
for href in hrefs)
|
|
||||||
next_url_m = re.search(
|
|
||||||
r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
|
|
||||||
if not next_url_m:
|
|
||||||
break
|
|
||||||
next_url = page_url + next_url_m.group(1)
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
next_url, page_id,
|
|
||||||
note='Downloading playlist page %d' % page_num)
|
|
||||||
return self.playlist_result(entries, page_id)
|
|
||||||
|
|
||||||
flashvars = compat_parse_qs(self._html_search_regex(
|
media_link_obj = self._parse_json(json_metadata, display_id,
|
||||||
r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
|
transform_source=js_to_json)
|
||||||
|
jsonp_url = media_link_obj['mediaObj']['url']
|
||||||
|
|
||||||
page_id = flashvars['trackerClipId'][0]
|
metadata = self._download_json(
|
||||||
video_url = flashvars['dslSrc'][0]
|
jsonp_url, 'metadata', transform_source=strip_jsonp)
|
||||||
title = flashvars['trackerClipTitle'][0]
|
|
||||||
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
|
metadata_tracker_data = metadata['trackerData']
|
||||||
is_live = flashvars.get('isLive', ['0'])[0] == '1'
|
metadata_media_resource = metadata['mediaResource']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# check if the metadata contains a direct URL to a file
|
||||||
|
for kind, media_resource in metadata_media_resource.items():
|
||||||
|
if kind not in ('dflt', 'alt'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for tag_name, medium_url in media_resource.items():
|
||||||
|
if tag_name not in ('videoURL', 'audioURL'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
ext = determine_ext(medium_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
medium_url, display_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls'))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
manifest_url = update_url_query(
|
||||||
|
medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
manifest_url, display_id, f4m_id='hds', fatal=False))
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
medium_url, 'stream', fatal=False))
|
||||||
|
else:
|
||||||
|
a_format = {
|
||||||
|
'url': medium_url
|
||||||
|
}
|
||||||
|
if ext == 'unknown_video':
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
medium_url, display_id, note='Determining extension')
|
||||||
|
ext = urlhandle_detect_ext(urlh)
|
||||||
|
a_format['ext'] = ext
|
||||||
|
formats.append(a_format)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
caption_url = metadata_media_resource.get('captionURL')
|
||||||
|
if caption_url:
|
||||||
|
subtitles['de'] = [{
|
||||||
|
'url': caption_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
}]
|
||||||
|
|
||||||
|
title = metadata_tracker_data.get('trackerClipTitle')
|
||||||
|
is_live = url_type == 'live'
|
||||||
|
|
||||||
if is_live:
|
if is_live:
|
||||||
title = self._live_title(title)
|
title = self._live_title(title)
|
||||||
|
upload_date = None
|
||||||
if 'trackerClipAirTime' in flashvars:
|
elif 'trackerClipAirTime' in metadata_tracker_data:
|
||||||
upload_date = flashvars['trackerClipAirTime'][0]
|
upload_date = metadata_tracker_data['trackerClipAirTime']
|
||||||
else:
|
else:
|
||||||
upload_date = self._html_search_meta(
|
upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
|
||||||
'DC.Date', webpage, 'upload date')
|
|
||||||
|
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
formats = []
|
|
||||||
preference = qualities(['S', 'M', 'L', 'XL'])
|
|
||||||
|
|
||||||
if video_url.endswith('.f4m'):
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id,
|
|
||||||
f4m_id='hds', fatal=False))
|
|
||||||
elif video_url.endswith('.smil'):
|
|
||||||
formats.extend(self._extract_smil_formats(
|
|
||||||
video_url, page_id, False, {
|
|
||||||
'hdcore': '3.3.0',
|
|
||||||
'plugin': 'aasp-3.3.0.99.43',
|
|
||||||
}))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'http_headers': {
|
|
||||||
'User-Agent': 'mobile',
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
m3u8_url = self._search_regex(
|
|
||||||
r'rel="adaptiv"[^>]+href="([^"]+)"',
|
|
||||||
webpage, 'm3u8 url', default=None)
|
|
||||||
if m3u8_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
|
|
||||||
direct_urls = re.findall(
|
|
||||||
r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage)
|
|
||||||
if direct_urls:
|
|
||||||
for quality, video_url in direct_urls:
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'preference': preference(quality),
|
|
||||||
'http_headers': {
|
|
||||||
'User-Agent': 'mobile',
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
description = self._html_search_meta('Description', webpage, 'description')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': page_id,
|
'id': metadata_tracker_data.get('trackerClipId', display_id),
|
||||||
'formats': formats,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'alt_title': metadata_tracker_data.get('trackerClipSubcategory'),
|
||||||
'thumbnail': thumbnail,
|
'formats': formats,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'is_live': is_live
|
'description': self._html_search_meta('Description', webpage),
|
||||||
|
'is_live': is_live,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -241,81 +251,3 @@ class WDRMobileIE(InfoExtractor):
|
|||||||
'User-Agent': 'mobile',
|
'User-Agent': 'mobile',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WDRMausIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
|
|
||||||
IE_DESC = 'Sendung mit der Maus'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'aktuelle-sendung',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'thumbnail': 're:^http://.+\.jpg',
|
|
||||||
'upload_date': 're:^[0-9]{8}$',
|
|
||||||
'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
|
|
||||||
'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '40_jahre_maus',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'thumbnail': 're:^http://.+\.jpg',
|
|
||||||
'upload_date': '20131007',
|
|
||||||
'title': '12.03.2011 - 40 Jahre Maus',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
param_code = self._html_search_regex(
|
|
||||||
r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters')
|
|
||||||
|
|
||||||
title_date = self._search_regex(
|
|
||||||
r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
|
|
||||||
webpage, 'air date')
|
|
||||||
title_str = self._html_search_regex(
|
|
||||||
r'<h1>(.*?)</h1>', webpage, 'title')
|
|
||||||
title = '%s - %s' % (title_date, title_str)
|
|
||||||
upload_date = unified_strdate(
|
|
||||||
self._html_search_meta('dc.date', webpage))
|
|
||||||
|
|
||||||
fields = compat_parse_qs(param_code)
|
|
||||||
video_url = fields['firstVideo'][0]
|
|
||||||
thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'format_id': 'rtmp',
|
|
||||||
'url': video_url,
|
|
||||||
}]
|
|
||||||
|
|
||||||
jscode = self._download_webpage(
|
|
||||||
'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
|
|
||||||
video_id, fatal=False,
|
|
||||||
note='Downloading URL translation table',
|
|
||||||
errnote='Could not download URL translation table')
|
|
||||||
if jscode:
|
|
||||||
for m in re.finditer(
|
|
||||||
r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
|
|
||||||
jscode):
|
|
||||||
if video_url.startswith(m.group('stream')):
|
|
||||||
http_url = video_url.replace(
|
|
||||||
m.group('stream'), m.group('dl'))
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'http',
|
|
||||||
'url': http_url,
|
|
||||||
})
|
|
||||||
break
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,49 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class WeiboIE(InfoExtractor):
|
|
||||||
"""
|
|
||||||
The videos in Weibo come from different sites, this IE just finds the link
|
|
||||||
to the external video and returns it.
|
|
||||||
"""
|
|
||||||
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '98322879',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '魔声耳机最新广告“All Eyes On Us”',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'add_ie': ['Sina'],
|
|
||||||
}
|
|
||||||
|
|
||||||
# Additional example videos from different sites
|
|
||||||
# Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
|
|
||||||
# 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
|
|
||||||
info = self._download_json(info_url, video_id)
|
|
||||||
|
|
||||||
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
|
|
||||||
# Prefer sina video since they have thumbnails
|
|
||||||
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
|
|
||||||
player_url = videos_urls[-1]
|
|
||||||
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
|
|
||||||
player_url)
|
|
||||||
if m_sina is not None:
|
|
||||||
self.to_screen('Sina video detected')
|
|
||||||
sina_id = m_sina.group(1)
|
|
||||||
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
|
|
||||||
return self.url_result(player_url)
|
|
||||||
@@ -62,7 +62,8 @@ class XFileShareIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
}
|
},
|
||||||
|
'skip': 'Video removed',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vidto.me/ku5glz52nqe1.html',
|
'url': 'http://vidto.me/ku5glz52nqe1.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ class XuiteIE(InfoExtractor):
|
|||||||
'uploader_id': '242127761',
|
'uploader_id': '242127761',
|
||||||
'categories': ['電玩動漫'],
|
'categories': ['電玩動漫'],
|
||||||
},
|
},
|
||||||
|
'skip': 'Video removed',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|||||||
@@ -343,7 +343,7 @@ class YahooIE(InfoExtractor):
|
|||||||
webpage, 'region', fatal=False, default='US')
|
webpage, 'region', fatal=False, default='US')
|
||||||
data = compat_urllib_parse_urlencode({
|
data = compat_urllib_parse_urlencode({
|
||||||
'protocol': 'http',
|
'protocol': 'http',
|
||||||
'region': region,
|
'region': region.upper(),
|
||||||
})
|
})
|
||||||
query_url = (
|
query_url = (
|
||||||
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import itertools
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import string
|
import string
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@@ -13,6 +15,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
get_element_by_attribute,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -285,3 +288,52 @@ class YoukuIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class YoukuShowIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html'
|
||||||
|
IE_NAME = 'youku:show'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'zc7c670be07ff11e48b3f',
|
||||||
|
'title': '花千骨 未删减版',
|
||||||
|
'description': 'md5:578d4f2145ae3f9128d9d4d863312910',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
_PAGE_SIZE = 40
|
||||||
|
|
||||||
|
def _find_videos_in_page(self, webpage):
|
||||||
|
videos = re.findall(
|
||||||
|
r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
|
||||||
|
return [
|
||||||
|
self.url_result(video_url, YoukuIE.ie_key(), title)
|
||||||
|
for video_url, title in videos]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
|
entries = self._find_videos_in_page(webpage)
|
||||||
|
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False)
|
||||||
|
detail_div = get_element_by_attribute('class', 'detail', webpage) or ''
|
||||||
|
playlist_description = self._html_search_regex(
|
||||||
|
r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>',
|
||||||
|
detail_div, 'playlist description', fatal=False)
|
||||||
|
|
||||||
|
for idx in itertools.count(1):
|
||||||
|
episodes_page = self._download_webpage(
|
||||||
|
'http://www.youku.com/show_episode/id_%s.html' % show_id,
|
||||||
|
show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)},
|
||||||
|
note='Downloading episodes page %d' % idx)
|
||||||
|
new_entries = self._find_videos_in_page(episodes_page)
|
||||||
|
entries.extend(new_entries)
|
||||||
|
if len(new_entries) < self._PAGE_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.playlist_result(entries, show_id, playlist_title, playlist_description)
|
||||||
|
|||||||
@@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
|
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||||
|
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||||
|
|
||||||
# Dash webm
|
# Dash webm
|
||||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
@@ -1986,7 +1988,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubeUserIE(YoutubeChannelIE):
|
class YoutubeUserIE(YoutubeChannelIE):
|
||||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
||||||
IE_NAME = 'youtube:user'
|
IE_NAME = 'youtube:user'
|
||||||
|
|
||||||
@@ -1999,6 +2001,9 @@ class YoutubeUserIE(YoutubeChannelIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'ytuser:phihag',
|
'url': 'ytuser:phihag',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/c/gametrailers',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -668,7 +668,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='store_true', dest='writeannotations', default=False,
|
action='store_true', dest='writeannotations', default=False,
|
||||||
help='Write video annotations to a .annotations.xml file')
|
help='Write video annotations to a .annotations.xml file')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--load-info',
|
'--load-info-json', '--load-info',
|
||||||
dest='load_info_filename', metavar='FILE',
|
dest='load_info_filename', metavar='FILE',
|
||||||
help='JSON file containing the video information (created with the "--write-info-json" option)')
|
help='JSON file containing the video information (created with the "--write-info-json" option)')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ from .compat import (
|
|||||||
compat_chr,
|
compat_chr,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_html_entities,
|
compat_html_entities,
|
||||||
|
compat_html_entities_html5,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
@@ -456,12 +457,19 @@ def orderedSet(iterable):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def _htmlentity_transform(entity):
|
def _htmlentity_transform(entity_with_semicolon):
|
||||||
"""Transforms an HTML entity to a character."""
|
"""Transforms an HTML entity to a character."""
|
||||||
|
entity = entity_with_semicolon[:-1]
|
||||||
|
|
||||||
# Known non-numeric HTML entity
|
# Known non-numeric HTML entity
|
||||||
if entity in compat_html_entities.name2codepoint:
|
if entity in compat_html_entities.name2codepoint:
|
||||||
return compat_chr(compat_html_entities.name2codepoint[entity])
|
return compat_chr(compat_html_entities.name2codepoint[entity])
|
||||||
|
|
||||||
|
# TODO: HTML5 allows entities without a semicolon. For example,
|
||||||
|
# 'Éric' should be decoded as 'Éric'.
|
||||||
|
if entity_with_semicolon in compat_html_entities_html5:
|
||||||
|
return compat_html_entities_html5[entity_with_semicolon]
|
||||||
|
|
||||||
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
|
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
numstr = mobj.group(1)
|
numstr = mobj.group(1)
|
||||||
@@ -486,7 +494,7 @@ def unescapeHTML(s):
|
|||||||
assert type(s) == compat_str
|
assert type(s) == compat_str
|
||||||
|
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
|
r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
|
||||||
|
|
||||||
|
|
||||||
def get_subprocess_encoding():
|
def get_subprocess_encoding():
|
||||||
@@ -2020,6 +2028,9 @@ def mimetype2ext(mt):
|
|||||||
|
|
||||||
ext = {
|
ext = {
|
||||||
'audio/mp4': 'm4a',
|
'audio/mp4': 'm4a',
|
||||||
|
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
|
||||||
|
# it's the most popular one
|
||||||
|
'audio/mpeg': 'mp3',
|
||||||
}.get(mt)
|
}.get(mt)
|
||||||
if ext is not None:
|
if ext is not None:
|
||||||
return ext
|
return ext
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.06.03'
|
__version__ = '2016.06.11.2'
|
||||||
|
|||||||
Reference in New Issue
Block a user