浏览代码

Merge pull request #64 from Klesh-/master

Fixed get_playlist is limited to 100 videos
Cheskel Twersky 1 年之前
父节点
当前提交
8fd5b4ff50
共有 2 个文件被更改,包括 7 次插入5 次删除
  1. 1 0
      .gitignore
  2. 6 5
      scrapetube/scrapetube.py

+ 1 - 0
.gitignore

@@ -7,3 +7,4 @@ docs/_build/
 docs/source/_build
 .tox
 .env
+.DS_store

+ 6 - 5
scrapetube/scrapetube.py

@@ -76,7 +76,7 @@ def get_channel(
         content_type=content_type,
     )
     api_endpoint = "https://www.youtube.com/youtubei/v1/browse"
-    videos = get_videos(url, api_endpoint, type_property_map[content_type], limit, sleep, proxies, sort_by)
+    videos = get_videos(url, api_endpoint, "contents", type_property_map[content_type], limit, sleep, proxies, sort_by)
     for video in videos:
         yield video
 
@@ -105,7 +105,7 @@ def get_playlist(
 
     url = f"https://www.youtube.com/playlist?list={playlist_id}"
     api_endpoint = "https://www.youtube.com/youtubei/v1/browse"
-    videos = get_videos(url, api_endpoint, "playlistVideoRenderer", limit, sleep, proxies)
+    videos = get_videos(url, api_endpoint, "playlistVideoListRenderer", "playlistVideoRenderer", limit, sleep, proxies)
     for video in videos:
         yield video
 
@@ -168,7 +168,7 @@ def get_search(
     url = f"https://www.youtube.com/results?search_query={query}&sp={param_string}"
     api_endpoint = "https://www.youtube.com/youtubei/v1/search"
     videos = get_videos(
-        url, api_endpoint, results_type_map[results_type][1], limit, sleep, proxies
+        url, api_endpoint, "contents", results_type_map[results_type][1], limit, sleep, proxies
     )
     for video in videos:
         yield video
@@ -202,7 +202,7 @@ def get_video(
 
 
 def get_videos(
-    url: str, api_endpoint: str, selector: str, limit: int, sleep: float, proxies: dict = None, sort_by: str = None
+    url: str, api_endpoint: str, selector_list: str, selector_item: str, limit: int, sleep: float, proxies: dict = None, sort_by: str = None
 ) -> Generator[dict, None, None]:
     session = get_session(proxies)
     is_first = True
@@ -220,6 +220,7 @@ def get_videos(
             data = json.loads(
                 get_json_from_html(html, "var ytInitialData = ", 0, "};") + "}"
             )
+            data = next(search_dict(data, selector_list), None)
             next_data = get_next_data(data, sort_by)
             is_first = False
             if sort_by and sort_by != "newest": 
@@ -227,7 +228,7 @@ def get_videos(
         else:
             data = get_ajax_data(session, api_endpoint, api_key, next_data, client)
             next_data = get_next_data(data)
-        for result in get_videos_items(data, selector):
+        for result in get_videos_items(data, selector_item):
             try:
                 count += 1
                 yield result