mirror of
https://github.com/Rafostar/clapper.git
synced 2025-08-29 23:32:04 +02:00
YT: decipher videos with signatures
Increase amount of playable YouTube videos by deciphering the ones that require to do so. Many thanks to "node-ytdl-core" devs for JS regular expressions needed for YouTube player parsing.
This commit is contained in:
151
src/assets/node-ytdl-core/sig.js
Normal file
151
src/assets/node-ytdl-core/sig.js
Normal file
@@ -0,0 +1,151 @@
|
||||
/* Copyright (C) 2012-present by fent
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
const jsVarStr = '[a-zA-Z_\\$][a-zA-Z_0-9]*';
|
||||
const jsSingleQuoteStr = `'[^'\\\\]*(:?\\\\[\\s\\S][^'\\\\]*)*'`;
|
||||
const jsDoubleQuoteStr = `"[^"\\\\]*(:?\\\\[\\s\\S][^"\\\\]*)*"`;
|
||||
const jsQuoteStr = `(?:${jsSingleQuoteStr}|${jsDoubleQuoteStr})`;
|
||||
const jsKeyStr = `(?:${jsVarStr}|${jsQuoteStr})`;
|
||||
const jsPropStr = `(?:\\.${jsVarStr}|\\[${jsQuoteStr}\\])`;
|
||||
const jsEmptyStr = `(?:''|"")`;
|
||||
const reverseStr = ':function\\(a\\)\\{' +
|
||||
'(?:return )?a\\.reverse\\(\\)' +
|
||||
'\\}';
|
||||
const sliceStr = ':function\\(a,b\\)\\{' +
|
||||
'return a\\.slice\\(b\\)' +
|
||||
'\\}';
|
||||
const spliceStr = ':function\\(a,b\\)\\{' +
|
||||
'a\\.splice\\(0,b\\)' +
|
||||
'\\}';
|
||||
const swapStr = ':function\\(a,b\\)\\{' +
|
||||
'var c=a\\[0\\];a\\[0\\]=a\\[b(?:%a\\.length)?\\];a\\[b(?:%a\\.length)?\\]=c(?:;return a)?' +
|
||||
'\\}';
|
||||
const actionsObjRegexp = new RegExp(
|
||||
`var (${jsVarStr})=\\{((?:(?:${
|
||||
jsKeyStr}${reverseStr}|${
|
||||
jsKeyStr}${sliceStr}|${
|
||||
jsKeyStr}${spliceStr}|${
|
||||
jsKeyStr}${swapStr
|
||||
}),?\\r?\\n?)+)\\};`);
|
||||
const actionsFuncRegexp = new RegExp(`${`function(?: ${jsVarStr})?\\(a\\)\\{` +
|
||||
`a=a\\.split\\(${jsEmptyStr}\\);\\s*` +
|
||||
`((?:(?:a=)?${jsVarStr}`}${
|
||||
jsPropStr
|
||||
}\\(a,\\d+\\);)+)` +
|
||||
`return a\\.join\\(${jsEmptyStr}\\)` +
|
||||
`\\}`);
|
||||
const reverseRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${reverseStr}`, 'm');
|
||||
const sliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${sliceStr}`, 'm');
|
||||
const spliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${spliceStr}`, 'm');
|
||||
const swapRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${swapStr}`, 'm');
|
||||
|
||||
const swapHeadAndPosition = (arr, position) => {
|
||||
const first = arr[0];
|
||||
arr[0] = arr[position % arr.length];
|
||||
arr[position] = first;
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
function decipher(sig, tokens) {
|
||||
sig = sig.split('');
|
||||
tokens = tokens.split(',');
|
||||
|
||||
for(let i = 0, len = tokens.length; i < len; i++) {
|
||||
let token = tokens[i], pos;
|
||||
switch (token[0]) {
|
||||
case 'r':
|
||||
sig = sig.reverse();
|
||||
break;
|
||||
case 'w':
|
||||
pos = ~~token.slice(1);
|
||||
sig = swapHeadAndPosition(sig, pos);
|
||||
break;
|
||||
case 's':
|
||||
pos = ~~token.slice(1);
|
||||
sig = sig.slice(pos);
|
||||
break;
|
||||
case 'p':
|
||||
pos = ~~token.slice(1);
|
||||
sig.splice(0, pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return sig.join('');
|
||||
};
|
||||
|
||||
function extractActions(body) {
|
||||
const objResult = actionsObjRegexp.exec(body);
|
||||
const funcResult = actionsFuncRegexp.exec(body);
|
||||
|
||||
if(!objResult || !funcResult)
|
||||
return null;
|
||||
|
||||
const obj = objResult[1].replace(/\$/g, '\\$');
|
||||
const objBody = objResult[2].replace(/\$/g, '\\$');
|
||||
const funcBody = funcResult[1].replace(/\$/g, '\\$');
|
||||
|
||||
let result = reverseRegexp.exec(objBody);
|
||||
const reverseKey = result && result[1]
|
||||
.replace(/\$/g, '\\$')
|
||||
.replace(/\$|^'|^"|'$|"$/g, '');
|
||||
result = sliceRegexp.exec(objBody);
|
||||
const sliceKey = result && result[1]
|
||||
.replace(/\$/g, '\\$')
|
||||
.replace(/\$|^'|^"|'$|"$/g, '');
|
||||
result = spliceRegexp.exec(objBody);
|
||||
const spliceKey = result && result[1]
|
||||
.replace(/\$/g, '\\$')
|
||||
.replace(/\$|^'|^"|'$|"$/g, '');
|
||||
result = swapRegexp.exec(objBody);
|
||||
const swapKey = result && result[1]
|
||||
.replace(/\$/g, '\\$')
|
||||
.replace(/\$|^'|^"|'$|"$/g, '');
|
||||
|
||||
const keys = `(${[reverseKey, sliceKey, spliceKey, swapKey].join('|')})`;
|
||||
const myreg = `(?:a=)?${obj
|
||||
}(?:\\.${keys}|\\['${keys}'\\]|\\["${keys}"\\])` +
|
||||
`\\(a,(\\d+)\\)`;
|
||||
const tokenizeRegexp = new RegExp(myreg, 'g');
|
||||
const tokens = [];
|
||||
|
||||
while((result = tokenizeRegexp.exec(funcBody)) !== null) {
|
||||
const key = result[1] || result[2] || result[3];
|
||||
const pos = result[4];
|
||||
switch (key) {
|
||||
case swapKey:
|
||||
tokens.push(`w${result[4]}`);
|
||||
break;
|
||||
case reverseKey:
|
||||
tokens.push('r');
|
||||
break;
|
||||
case sliceKey:
|
||||
tokens.push(`s${result[4]}`);
|
||||
break;
|
||||
case spliceKey:
|
||||
tokens.push(`p${result[4]}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return tokens.join(',');
|
||||
}
|
269
src/youtube.js
269
src/youtube.js
@@ -1,6 +1,7 @@
|
||||
const { GLib, GObject, Gst, Soup } = imports.gi;
|
||||
const ByteArray = imports.byteArray;
|
||||
const Debug = imports.src.debug;
|
||||
const YTDL = imports.src.assets['node-ytdl-core'];
|
||||
|
||||
const { debug } = Debug;
|
||||
|
||||
@@ -21,7 +22,6 @@ var YouTubeClient = GObject.registerClass({
|
||||
/* videoID of current active download */
|
||||
this.downloadingVideoId = null;
|
||||
|
||||
this.downloadAborted = false;
|
||||
this.lastInfo = null;
|
||||
}
|
||||
|
||||
@@ -47,9 +47,9 @@ var YouTubeClient = GObject.registerClass({
|
||||
debug(`obtaining YouTube video info: ${videoId}`);
|
||||
this.downloadingVideoId = videoId;
|
||||
|
||||
const info = await this._getInfoPromise(videoId).catch(debug);
|
||||
const [info, isAborted] = await this._getInfoPromise(videoId).catch(debug);
|
||||
if(!info) {
|
||||
if(this.downloadAborted)
|
||||
if(isAborted)
|
||||
return reject(new Error('download aborted'));
|
||||
|
||||
debug(`failed, remaining tries: ${tries}`);
|
||||
@@ -62,15 +62,71 @@ var YouTubeClient = GObject.registerClass({
|
||||
)
|
||||
? 'video is not playable'
|
||||
: (!info.streamingData)
|
||||
? 'video response data is missing URIs'
|
||||
? 'video response data is missing streaming data'
|
||||
: null;
|
||||
|
||||
if(invalidInfoMsg) {
|
||||
this.lastInfo = null;
|
||||
this.emit('info-resolved', false);
|
||||
this.downloadingVideoId = null;
|
||||
|
||||
return reject(new Error(invalidInfoMsg));
|
||||
debug(new Error(invalidInfoMsg));
|
||||
break;
|
||||
}
|
||||
|
||||
/* Make sure we have all formats arrays,
|
||||
* so we will not have to keep checking */
|
||||
if(!info.streamingData.formats)
|
||||
info.streamingData.formats = [];
|
||||
if(!info.streamingData.adaptiveFormats)
|
||||
info.streamingData.adaptiveFormats = [];
|
||||
|
||||
const isCipher = this._getIsCipher(info.streamingData);
|
||||
if(isCipher) {
|
||||
debug('video requires deciphering');
|
||||
|
||||
const embedUri = `https://www.youtube.com/embed/${videoId}`;
|
||||
const [body, isAbortedBody] =
|
||||
await this._downloadDataPromise(embedUri).catch(debug);
|
||||
|
||||
if(isAbortedBody)
|
||||
break;
|
||||
|
||||
/* We need matching info, so start from beginning */
|
||||
if(!body)
|
||||
continue;
|
||||
|
||||
const ytPath = body.match(/(?<=jsUrl\":\").*?(?=\")/gs)[0];
|
||||
if(!ytPath) {
|
||||
debug(new Error('could not find YouTube player URI'));
|
||||
break;
|
||||
}
|
||||
const ytUri = `https://www.youtube.com${ytPath}`;
|
||||
debug(`found player URI: ${ytUri}`);
|
||||
|
||||
/* TODO: cache */
|
||||
let actions;
|
||||
|
||||
if(!actions) {
|
||||
const [pBody, isAbortedPlayer] =
|
||||
await this._downloadDataPromise(ytUri).catch(debug);
|
||||
if(!pBody || isAbortedPlayer) {
|
||||
debug(new Error('could not download player body'));
|
||||
break;
|
||||
}
|
||||
actions = YTDL.sig.extractActions(pBody);
|
||||
}
|
||||
|
||||
if(!actions || !actions.length) {
|
||||
debug(new Error('could not extract decipher actions'));
|
||||
break;
|
||||
}
|
||||
debug('successfully obtained decipher actions');
|
||||
const isDeciphered = this._decipherStreamingData(
|
||||
info.streamingData, actions
|
||||
);
|
||||
if(!isDeciphered) {
|
||||
debug('streaming data could not be deciphered');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this.lastInfo = info;
|
||||
@@ -80,7 +136,7 @@ var YouTubeClient = GObject.registerClass({
|
||||
return resolve(info);
|
||||
}
|
||||
|
||||
/* Do not clear video info here, as we still have
|
||||
/* Do not clear video info here, as we might still have
|
||||
* valid info from last video that can be reused */
|
||||
this.emit('info-resolved', false);
|
||||
this.downloadingVideoId = null;
|
||||
@@ -91,10 +147,7 @@ var YouTubeClient = GObject.registerClass({
|
||||
|
||||
getBestCombinedUri(info)
|
||||
{
|
||||
if(
|
||||
!info.streamingData.formats
|
||||
|| !info.streamingData.formats.length
|
||||
)
|
||||
if(!info.streamingData.formats.length)
|
||||
return null;
|
||||
|
||||
const combinedStream = info.streamingData.formats[
|
||||
@@ -123,6 +176,41 @@ var YouTubeClient = GObject.registerClass({
|
||||
return true;
|
||||
}
|
||||
|
||||
_downloadDataPromise(url)
|
||||
{
|
||||
return new Promise((resolve, reject) => {
|
||||
const message = Soup.Message.new('GET', url);
|
||||
let data = '';
|
||||
|
||||
const chunkSignal = message.connect('got-chunk', (msg, chunk) => {
|
||||
debug(`got chunk of data, length: ${chunk.length}`);
|
||||
|
||||
const chunkData = chunk.get_data();
|
||||
data += (chunkData instanceof Uint8Array)
|
||||
? ByteArray.toString(chunkData)
|
||||
: chunkData;
|
||||
});
|
||||
|
||||
this.queue_message(message, (session, msg) => {
|
||||
msg.disconnect(chunkSignal);
|
||||
|
||||
debug('got message response');
|
||||
const statusCode = msg.status_code;
|
||||
|
||||
/* Internal Soup codes mean download aborted
|
||||
* or some other error that cannot be handled
|
||||
* and we do not want to retry in such case */
|
||||
if(statusCode < 10)
|
||||
return resolve([null, true]);
|
||||
|
||||
if(statusCode !== 200)
|
||||
return reject(new Error(`response code: ${statusCode}`));
|
||||
|
||||
resolve([data, false]);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
_getCurrentDownloadPromise()
|
||||
{
|
||||
debug('resolving after current download finishes');
|
||||
@@ -145,36 +233,20 @@ var YouTubeClient = GObject.registerClass({
|
||||
_getInfoPromise(videoId)
|
||||
{
|
||||
return new Promise((resolve, reject) => {
|
||||
const url = `https://www.youtube.com/get_video_info?video_id=${videoId}&el=embedded`;
|
||||
const message = Soup.Message.new('GET', url);
|
||||
let data = '';
|
||||
const query = [
|
||||
`video_id=${videoId}`,
|
||||
`el=embedded`,
|
||||
`eurl=https://youtube.googleapis.com/v/${videoId}`,
|
||||
].join('&');
|
||||
const url = `https://www.youtube.com/get_video_info?${query}`;
|
||||
|
||||
const chunkSignal = message.connect('got-chunk', (msg, chunk) => {
|
||||
debug(`got chunk of data, length: ${chunk.length}`);
|
||||
|
||||
const chunkData = chunk.get_data();
|
||||
data += (chunkData instanceof Uint8Array)
|
||||
? ByteArray.toString(chunkData)
|
||||
: chunkData;
|
||||
});
|
||||
|
||||
this.queue_message(message, (session, msg) => {
|
||||
msg.disconnect(chunkSignal);
|
||||
|
||||
debug('got message response');
|
||||
|
||||
const statusCode = msg.status_code;
|
||||
|
||||
/* Internal Soup codes mean download abort
|
||||
* or some other error that cannot be handled */
|
||||
this.downloadAborted = (statusCode < 10);
|
||||
|
||||
if(statusCode !== 200)
|
||||
return reject(new Error(`response code: ${statusCode}`));
|
||||
this._downloadDataPromise(url).then(res => {
|
||||
if(res[1])
|
||||
return resolve([null, true]);
|
||||
|
||||
debug('parsing video info JSON');
|
||||
|
||||
const gstUri = Gst.Uri.from_string('?' + data);
|
||||
const gstUri = Gst.Uri.from_string('?' + res[0]);
|
||||
|
||||
if(!gstUri)
|
||||
return reject(new Error('could not convert query to URI'));
|
||||
@@ -193,11 +265,126 @@ var YouTubeClient = GObject.registerClass({
|
||||
return reject(new Error('could not parse video info JSON'));
|
||||
|
||||
debug('successfully parsed video info JSON');
|
||||
|
||||
resolve(info);
|
||||
});
|
||||
resolve([info, false]);
|
||||
})
|
||||
.catch(err => reject(err));
|
||||
});
|
||||
}
|
||||
|
||||
_getIsCipher(data)
|
||||
{
|
||||
/* Check only first best combined,
|
||||
* AFAIK there are no videos without it */
|
||||
if(data.formats[0].url)
|
||||
return false;
|
||||
|
||||
if(
|
||||
data.formats[0].signatureCipher
|
||||
|| data.formats[0].cipher
|
||||
)
|
||||
return true;
|
||||
|
||||
/* FIXME: no URLs and no cipher, what now? */
|
||||
debug(new Error('no url or cipher in streams'));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
_decipherStreamingData(data, actions)
|
||||
{
|
||||
debug('checking cipher query keys');
|
||||
|
||||
/* Cipher query keys should be the same for all
|
||||
* streams, so parse any stream to get their names */
|
||||
const anyStream = data.formats[0] || data.adaptiveFormats[0];
|
||||
const sigQuery = anyStream.signatureCipher || anyStream.cipher;
|
||||
|
||||
if(!sigQuery)
|
||||
return false;
|
||||
|
||||
const gstUri = Gst.Uri.from_string('?' + sigQuery);
|
||||
const queryKeys = gstUri.get_query_keys();
|
||||
|
||||
const cipherKey = queryKeys.find(key => {
|
||||
const value = gstUri.get_query_value(key);
|
||||
/* A long value that is not URI */
|
||||
return (
|
||||
value.length > 32
|
||||
&& !Gst.Uri.is_valid(value)
|
||||
);
|
||||
});
|
||||
if(!cipherKey) {
|
||||
debug('no stream cipher key name');
|
||||
return false;
|
||||
}
|
||||
|
||||
const sigKey = queryKeys.find(key => {
|
||||
const value = gstUri.get_query_value(key);
|
||||
/* A short value that is not URI */
|
||||
return (
|
||||
value.length < 32
|
||||
&& !Gst.Uri.is_valid(value)
|
||||
);
|
||||
});
|
||||
if(!sigKey) {
|
||||
debug('no stream signature key name');
|
||||
return false;
|
||||
}
|
||||
|
||||
const urlKey = queryKeys.find(key =>
|
||||
Gst.Uri.is_valid(gstUri.get_query_value(key))
|
||||
);
|
||||
if(!urlKey) {
|
||||
debug('no stream URL key name');
|
||||
return false;
|
||||
}
|
||||
|
||||
const cipherKeys = {
|
||||
url: urlKey,
|
||||
sig: sigKey,
|
||||
cipher: cipherKey,
|
||||
};
|
||||
|
||||
debug('deciphering streams');
|
||||
|
||||
for(let format of [data.formats, data.adaptiveFormats]) {
|
||||
for(let stream of format) {
|
||||
const formatUrl = this._getDecipheredUrl(
|
||||
stream, actions, cipherKeys
|
||||
);
|
||||
if(!formatUrl) {
|
||||
debug('undecipherable stream');
|
||||
debug(stream);
|
||||
|
||||
return false;
|
||||
}
|
||||
stream.url = formatUrl;
|
||||
}
|
||||
}
|
||||
debug('all streams deciphered');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
_getDecipheredUrl(stream, actions, queryKeys)
|
||||
{
|
||||
debug(`deciphering stream id: ${stream.itag}`);
|
||||
|
||||
const sigQuery = stream.signatureCipher || stream.cipher;
|
||||
if(!sigQuery) return null;
|
||||
|
||||
const gstUri = Gst.Uri.from_string('?' + sigQuery);
|
||||
|
||||
const url = gstUri.get_query_value(queryKeys.url);
|
||||
const cipher = gstUri.get_query_value(queryKeys.cipher);
|
||||
const sig = gstUri.get_query_value(queryKeys.sig);
|
||||
|
||||
const key = YTDL.sig.decipher(cipher, actions);
|
||||
|
||||
debug('stream deciphered');
|
||||
|
||||
return `${url}&${sig}=${key}`;
|
||||
}
|
||||
});
|
||||
|
||||
function checkYouTubeUri(uri)
|
||||
|
Reference in New Issue
Block a user