Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Syng
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Christoph Stahl
Syng
Commits
705169a1
Commit
705169a1
authored
Jul 9, 2024
by
Christoph Stahl
Browse files
Options
Downloads
Patches
Plain Diff
missing metadata for youtube
parent
b46d5175
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
syng/sources/youtube.py
+56
-17
56 additions, 17 deletions
syng/sources/youtube.py
with
56 additions
and
17 deletions
syng/sources/youtube.py
+
56
−
17
View file @
705169a1
...
@@ -16,6 +16,7 @@ from urllib.parse import urlencode
...
@@ -16,6 +16,7 @@ from urllib.parse import urlencode
from
typing
import
Any
,
Optional
,
Tuple
from
typing
import
Any
,
Optional
,
Tuple
from
yt_dlp
import
YoutubeDL
from
yt_dlp
import
YoutubeDL
from
yt_dlp.utils
import
DownloadError
from
..entry
import
Entry
from
..entry
import
Entry
from
..result
import
Result
from
..result
import
Result
...
@@ -27,29 +28,46 @@ class YouTube:
...
@@ -27,29 +28,46 @@ class YouTube:
A minimal compatibility layer for the YouTube object of pytube, implemented via yt-dlp
A minimal compatibility layer for the YouTube object of pytube, implemented via yt-dlp
"""
"""
__cache__
:
dict
[
__cache__
:
dict
[
str
,
Any
]
=
(
str
,
Any
{}
]
=
{}
# TODO: this may grow fast... but atm it fixed youtubes anti bot measures
)
# TODO: this may grow fast... but atm it fixed youtubes anti bot measures
def
__init__
(
self
,
url
:
Optional
[
str
]
=
None
):
def
__init__
(
self
,
url
:
Optional
[
str
]
=
None
):
if
url
is
not
None
:
if
url
is
not
None
:
if
url
in
YouTube
.
__cache__
:
if
url
in
YouTube
.
__cache__
:
self
.
_infos
=
YouTube
.
__cache__
[
url
]
self
.
_infos
=
YouTube
.
__cache__
[
url
]
else
:
else
:
self
.
_infos
=
YoutubeDL
({
"
quiet
"
:
True
}).
extract_info
(
url
,
download
=
False
)
try
:
self
.
_infos
=
YoutubeDL
({
"
quiet
"
:
True
}).
extract_info
(
url
,
download
=
False
)
except
DownloadError
:
self
.
length
=
300
self
.
_title
=
None
self
.
_author
=
None
self
.
watch_url
=
url
return
if
self
.
_infos
is
None
:
if
self
.
_infos
is
None
:
raise
RuntimeError
(
f
'
Extraction not possible for
"
{
url
}
"'
)
raise
RuntimeError
(
f
'
Extraction not possible for
"
{
url
}
"'
)
self
.
length
=
self
.
_infos
[
"
duration
"
]
self
.
length
=
self
.
_infos
[
"
duration
"
]
self
.
title
=
self
.
_infos
[
"
title
"
]
self
.
_
title
=
self
.
_infos
[
"
title
"
]
self
.
author
=
self
.
_infos
[
"
channel
"
]
self
.
_
author
=
self
.
_infos
[
"
channel
"
]
self
.
watch_url
=
url
self
.
watch_url
=
url
else
:
else
:
self
.
length
=
0
self
.
length
=
0
self
.
title
=
""
self
.
_
title
=
""
self
.
channel
=
""
self
.
channel
=
""
self
.
author
=
""
self
.
_
author
=
""
self
.
watch_url
=
""
self
.
watch_url
=
""
@property
def
title
(
self
)
->
str
:
return
""
if
self
.
_title
is
None
else
self
.
_title
@property
def
author
(
self
)
->
str
:
return
""
if
self
.
_author
is
None
else
self
.
_author
@classmethod
@classmethod
def
from_result
(
cls
,
search_result
:
dict
[
str
,
Any
])
->
YouTube
:
def
from_result
(
cls
,
search_result
:
dict
[
str
,
Any
])
->
YouTube
:
"""
"""
...
@@ -77,9 +95,7 @@ class Search:
...
@@ -77,9 +95,7 @@ class Search:
else
:
else
:
if
channel
[
0
]
==
"
/
"
:
if
channel
[
0
]
==
"
/
"
:
channel
=
channel
[
1
:]
channel
=
channel
[
1
:]
query_url
=
(
query_url
=
f
"
https://www.youtube.com/
{
channel
}
/search?
{
urlencode
(
{
'
query
'
:
query
,
'
sp
'
:
sp
}
)
}
"
f
"
https://www.youtube.com/
{
channel
}
/search?
{
urlencode
(
{
'
query
'
:
query
,
'
sp
'
:
sp
}
)
}
"
)
results
=
YoutubeDL
(
results
=
YoutubeDL
(
{
{
...
@@ -93,7 +109,9 @@ class Search:
...
@@ -93,7 +109,9 @@ class Search:
)
)
self
.
results
=
[]
self
.
results
=
[]
if
results
is
not
None
:
if
results
is
not
None
:
filtered_entries
=
filter
(
lambda
entry
:
"
short
"
not
in
entry
[
"
url
"
],
results
[
"
entries
"
])
filtered_entries
=
filter
(
lambda
entry
:
"
short
"
not
in
entry
[
"
url
"
],
results
[
"
entries
"
]
)
for
r
in
filtered_entries
:
for
r
in
filtered_entries
:
try
:
try
:
...
@@ -142,7 +160,8 @@ class YoutubeSource(Source):
...
@@ -142,7 +160,8 @@ class YoutubeSource(Source):
config
[
"
start_streaming
"
]
if
"
start_streaming
"
in
config
else
False
config
[
"
start_streaming
"
]
if
"
start_streaming
"
in
config
else
False
)
)
self
.
formatstring
=
(
self
.
formatstring
=
(
f
"
bestvideo[height<=
{
self
.
max_res
}
]+
"
f
"
bestaudio/best[height<=
{
self
.
max_res
}
]
"
f
"
bestvideo[height<=
{
self
.
max_res
}
]+
"
f
"
bestaudio/best[height<=
{
self
.
max_res
}
]
"
)
)
self
.
_yt_dlp
=
YoutubeDL
(
self
.
_yt_dlp
=
YoutubeDL
(
params
=
{
params
=
{
...
@@ -212,8 +231,8 @@ class YoutubeSource(Source):
...
@@ -212,8 +231,8 @@ class YoutubeSource(Source):
source
=
"
youtube
"
,
source
=
"
youtube
"
,
album
=
"
YouTube
"
,
album
=
"
YouTube
"
,
duration
=
length
,
duration
=
length
,
title
=
yt_song
.
title
,
title
=
yt_song
.
_
title
,
artist
=
yt_song
.
author
,
artist
=
yt_song
.
_
author
,
performer
=
performer
,
performer
=
performer
,
)
)
...
@@ -250,10 +269,15 @@ class YoutubeSource(Source):
...
@@ -250,10 +269,15 @@ class YoutubeSource(Source):
results
:
list
[
YouTube
]
=
[]
results
:
list
[
YouTube
]
=
[]
results_lists
:
list
[
list
[
YouTube
]]
=
await
asyncio
.
gather
(
results_lists
:
list
[
list
[
YouTube
]]
=
await
asyncio
.
gather
(
*
[
asyncio
.
to_thread
(
self
.
_channel_search
,
query
,
channel
)
for
channel
in
self
.
channels
],
*
[
asyncio
.
to_thread
(
self
.
_channel_search
,
query
,
channel
)
for
channel
in
self
.
channels
],
asyncio
.
to_thread
(
self
.
_yt_search
,
query
),
asyncio
.
to_thread
(
self
.
_yt_search
,
query
),
)
)
results
=
[
search_result
for
yt_result
in
results_lists
for
search_result
in
yt_result
]
results
=
[
search_result
for
yt_result
in
results_lists
for
search_result
in
yt_result
]
results
.
sort
(
key
=
partial
(
_contains_index
,
query
))
results
.
sort
(
key
=
partial
(
_contains_index
,
query
))
...
@@ -283,6 +307,21 @@ class YoutubeSource(Source):
...
@@ -283,6 +307,21 @@ class YoutubeSource(Source):
"""
"""
return
Search
(
f
"
{
query
}
karaoke
"
,
channel
).
results
return
Search
(
f
"
{
query
}
karaoke
"
,
channel
).
results
async
def
get_missing_metadata
(
self
,
entry
:
Entry
)
->
dict
[
str
,
Any
]:
"""
Video metadata should be read on the client to avoid banning
the server.
"""
if
entry
.
title
is
None
or
entry
.
artist
is
None
:
print
(
f
"
Looking up
{
entry
.
ident
}
"
)
youtube_video
:
YouTube
=
await
asyncio
.
to_thread
(
YouTube
,
entry
.
ident
)
return
{
"
duration
"
:
youtube_video
.
length
,
"
artist
"
:
youtube_video
.
author
,
"
title
"
:
youtube_video
.
title
,
}
return
{}
async
def
do_buffer
(
self
,
entry
:
Entry
)
->
Tuple
[
str
,
Optional
[
str
]]:
async
def
do_buffer
(
self
,
entry
:
Entry
)
->
Tuple
[
str
,
Optional
[
str
]]:
"""
"""
Download the video.
Download the video.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment