Просмотр исходного кода

v2.0.0
- add get_playlist, get_search
- add docs
- bug fixes

Cheskel Twersky 4 лет назад
Родитель
Сommit
14ec0a26f7
10 измененных файлов с 246 добавлено и 25 удалено
  1. 3 0
      .gitignore
  2. 33 1
      README.md
  3. 20 0
      docs/Makefile
  4. 24 0
      docs/conf.py
  5. 23 0
      docs/index.rst
  6. 35 0
      docs/make.bat
  7. 2 0
      docs/requirements.txt
  8. 2 2
      list_youtube_channel/__init__.py
  9. 103 21
      list_youtube_channel/get_channel.py
  10. 1 1
      tests/test.py

+ 3 - 0
.gitignore

@@ -2,3 +2,6 @@
 build/
 dist/
 *.egg-info/
+.vscode
+docs/_build/
+docs/source/_build

+ 33 - 1
README.md

@@ -1,3 +1,13 @@
+# List Yuotube channel
+This module was originally made to get a list of all the videos from a Yuotube channle, but was then extended to include some other functionality.
+
+With this module you can:
+
+
+* Get all videos from a Youtube channel.
+* Get all videos from a playlist.
+* Search youtube.
+
 # Installation
 
 ```bash
@@ -5,11 +15,33 @@ pip3 install list_youtube_channel
 ```
 
 # Usage
+Here a few short code examples.
+
+## Get all videos for a channel
+```python
+import list_youtube_channel
+
+videos = list_youtube_channel.get_channel("UCCezIgC97PvUuR4_gbFUs5g")
+
+for video in videos:
+    print(video['videoId'])
+```
+
+## Get all videos for a playlist
+```python
+import list_youtube_channel
+
+videos = list_youtube_channel.get_playlist("PL-osiE80TeTt2d9bfVyTiXJA-UTHn6WwU")
+
+for video in videos:
+    print(video['videoId'])
+```
 
+## Make a search
 ```python
 import list_youtube_channel
 
-videos = list_youtube_channel.get_channel("UC9-y-6csu5WGm29I7JiwpnA")
+videos = list_youtube_channel.get_search("python")
 
 for video in videos:
     print(video['videoId'])

+ 20 - 0
docs/Makefile

@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

+ 24 - 0
docs/conf.py

@@ -0,0 +1,24 @@
+
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../'))
+
+
+
+project = 'List youtube channel'
+copyright = '2021, Cheskel Twersky'
+author = 'Cheskel Twersky'
+
+
+
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.napoleon']
+
+templates_path = ['_templates']
+
+
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+
+html_theme = "sphinx_rtd_theme"
+

+ 23 - 0
docs/index.rst

@@ -0,0 +1,23 @@
+
+Welcome to List youtube channel's documentation!
+================================================
+This module was originally made to get a list of all the videos from a Yuotube channle, but was then extended to include some other functionality.
+
+With this module you can:
+
+
+* Get all videos from a Youtube channel.
+* Get all videos from a playlist.
+* Search youtube.
+
+
+Reference
+=========
+.. currentmodule:: list_youtube_channel
+
+.. autofunction:: get_channel
+
+.. autofunction:: get_search
+
+.. autofunction:: get_playlist
+

+ 35 - 0
docs/make.bat

@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd

+ 2 - 0
docs/requirements.txt

@@ -0,0 +1,2 @@
+sphinx
+sphinx_rtd_theme

+ 2 - 2
list_youtube_channel/__init__.py

@@ -1,3 +1,3 @@
-from .get_channel import get_channel, SORT_BY_NEWEST, SORT_BY_OLDEST, SORT_BY_POPULAR
+from .get_channel import get_channel, get_search, get_playlist
 
-__version__ = '1.1.0'
+__version__ = '2.0.0'

+ 103 - 21
list_youtube_channel/get_channel.py

@@ -4,18 +4,108 @@ import json
 import time
 
 
-SORT_BY_NEWEST = 'newest'
-SORT_BY_OLDEST = 'oldest'
-SORT_BY_POPULAR = 'popular'
+def get_channel(channel_id: str, limit: int = None, sleep: int = 1, sort_by: str = 'newest') -> Generator:
+    """Get videos for a channel.
+
+    Parameters:
+        channel_id (``str``):
+            The channel id from the channel you want to get the videos for.
+        limit (``int``, *optional*):
+            Limit to number of videos you want to get.
+        sleep (``int``, *optional*):
+            Time to sleep between api calls to youtube in order to prevent getting blocked. Defaults to `1`.
+        sort_by (``str``, *optional*):
+            In what order to retrive to videos. Pass one of the following values.
+            ``"newest"``: Get the new videos first.
+            ``"oldest"``: Get the old videos first.
+            ``"popular"``: Get the popular videos first.
+            Defaults to ``"newest"``.
+    """
 
-def get_channel(channel_id: str, limit: int = None, sleep: int = 1, sort_by: str = SORT_BY_NEWEST) -> Generator:
+    sort_by_map = {
+        'newest': 'dd',
+        'oldest': 'da',
+        'popular': 'p'
+    }
+    url = f'https://www.youtube.com/channel/{channel_id}/videos?view=0&sort={sort_by_map[sort_by]}&flow=grid'
+    api_endpoint = 'https://www.youtube.com/youtubei/v1/browse'
+    videos = get_videos(url, api_endpoint, 'gridVideoRenderer', limit, sleep)
+    for video in videos:
+        yield video
+
+
+def get_playlist(playlist_id: str, limit: int = None, sleep: int = 1):
+    """Get videos for a playlist.
+
+    Parameters:
+        playlist_id (``str``):
+            The playlist id from the playlist you want to get the videos for.
+        limit (``int``, *optional*):
+            Limit to number of videos you want to get.
+        sleep (``int``, *optional*):
+            Time to sleep between api calls to youtube in order to prevent getting blocked. Defaults to `1`.
+    """
+
+    url = f'https://www.youtube.com/playlist?list={playlist_id}'
+    api_endpoint = 'https://www.youtube.com/youtubei/v1/browse'
+    videos = get_videos(url, api_endpoint, 'playlistVideoRenderer', limit, sleep)
+    for video in videos:
+        yield video
+
+
+def get_search(query: str, limit: int = None, sleep: int = 1, sort_by: str = 'relevance', results_type: str = 'video') -> Generator:
+    """Search youtube and get videos.
+
+    Parameters:
+        query (``str``):
+            The term you want to search for.
+        limit (``int``, *optional*):
+            Limit to number of videos you want to get.
+        sleep (``int``, *optional*):
+            Time to sleep between api calls to youtube in order to prevent getting blocked. Defaults to `1`.
+        sort_by (``str``, *optional*):
+            In what order to retrive to videos. Pass one of the following values.
+            ``"relevance"``: Get the new videos in order of relevance.
+            ``"upload_date"``: Get the new videos first.
+            ``"view_count"``: Get the popular videos first.
+            ``"rating"``: Get videos with more likes first.
+            Defaults to ``"relevance"``.
+        results_type (``str``, *optional*):
+            What type you want to search for. Pass one of the following values.
+            ``"video"``|``"channel"``|``"playlist"``|``"movie"``
+            Defaults to `"video"`.
+    """
+
+    sort_by_map = {
+        'relevance': 'A',
+        'upload_date': 'I',
+        'view_count': 'M',
+        'rating': 'E'
+    }
+
+    results_type_map = {
+        'video': ['B', 'videoRenderer'],
+        'channel': ['C', 'channelRenderer'],
+        'playlist': ['D', 'playlistRenderer'],
+        'movie': ['E', 'videoRenderer']
+    }
+
+    param_string = f'CA{sort_by_map[sort_by]}SAhA{results_type_map[results_type][0]}'
+    url = f'https://www.youtube.com/results?search_query={query}&sp={param_string}'
+    api_endpoint = 'https://www.youtube.com/youtubei/v1/search'
+    videos = get_videos(url, api_endpoint, results_type_map[results_type][1], limit, sleep)
+    for video in videos:
+        yield video
+
+
+def get_videos(url: str, api_endpoint: str, selector: str, limit: int, sleep: int) -> Generator:
     session = requests.Session()
     session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
     is_first = True
     count = 0
     while True:
         if is_first:
-            html = get_initial_data(session, channel_id, sort_by)
+            html = get_initial_data(session, url)
             client = json.loads(get_json_from_html(
                 html, 'INNERTUBE_CONTEXT', 2, '"}},') + '"}}')['client']
             api_key = get_json_from_html(html, 'innertubeApiKey', 3)
@@ -26,10 +116,9 @@ def get_channel(channel_id: str, limit: int = None, sleep: int = 1, sort_by: str
             next_data = get_next_data(data)
             is_first = False
         else:
-            data = get_ajax_data(session, api_key, next_data, client)
+            data = get_ajax_data(session, api_endpoint, api_key, next_data, client)
             next_data = get_next_data(data)
-
-        for result in get_videos(data):
+        for result in get_videos_items(data, selector):
             count += 1
             yield result
             if count == limit:
@@ -41,13 +130,7 @@ def get_channel(channel_id: str, limit: int = None, sleep: int = 1, sort_by: str
         time.sleep(sleep)
 
 
-def get_initial_data(session: requests.Session, channel_id: str, sort_by: str) -> str:
-    sort_by_map = {
-        SORT_BY_NEWEST: 'dd',
-        SORT_BY_OLDEST: 'da',
-        SORT_BY_POPULAR: 'p'
-    }
-    url = f'https://www.youtube.com/channel/{channel_id}/videos?view=0&sort={sort_by_map[sort_by]}&flow=grid'
+def get_initial_data(session: requests.Session, url: str) -> str:
     response = session.get(url)
     if 'uxe=' in response.request.url:
         session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')
@@ -57,7 +140,7 @@ def get_initial_data(session: requests.Session, channel_id: str, sort_by: str) -
     return html
 
 
-def get_ajax_data(session: requests.Session, api_key: str, next_data: dict, client: dict) -> dict:
+def get_ajax_data(session: requests.Session, api_endpoint: str, api_key: str, next_data: dict, client: dict) -> dict:
     data = {
         "context": {
             'clickTracking': next_data['click_params'],
@@ -65,8 +148,7 @@ def get_ajax_data(session: requests.Session, api_key: str, next_data: dict, clie
         },
         'continuation': next_data['token']
     }
-    response = session.post(
-        'https://www.youtube.com/youtubei/v1/browse', params={'key': api_key}, json=data)
+    response = session.post(api_endpoint, params={'key': api_key}, json=data)
     return response.json()
 
 
@@ -89,7 +171,7 @@ def get_next_data(data: dict) -> dict:
 def search_dict(partial: dict, search_key: str) -> Generator:
     stack = [partial]
     while stack:
-        current_item = stack.pop()
+        current_item = stack.pop(0)
         if isinstance(current_item, dict):
             for key, value in current_item.items():
                 if key == search_key:
@@ -101,5 +183,5 @@ def search_dict(partial: dict, search_key: str) -> Generator:
                 stack.append(value)
 
 
-def get_videos(data: dict) -> Generator:
-    return search_dict(data, 'gridVideoRenderer')
+def get_videos_items(data: dict, selector: str) -> Generator:
+    return search_dict(data, selector)

+ 1 - 1
tests/test.py

@@ -12,7 +12,7 @@ sys.path.insert(0, '/'.join(os.path.dirname(os.path.realpath(__file__)).split(se
 import list_youtube_channel
 
 
-videos = list_youtube_channel.get_channel("UC9-y-6csu5WGm29I7JiwpnA", sort_by=list_youtube_channel.SORT_BY_POPULAR)
+videos = list_youtube_channel.get_channel("UC9-y-6csu5WGm29I7JiwpnA", sort_by='popular')
 
 for video in videos:
     print(video['videoId'])