YT: decipher videos with signatures

Increase amount of playable YouTube videos by deciphering the ones that require to do so.

Many thanks to "node-ytdl-core" devs for JS regular expressions needed for YouTube player parsing.
This commit is contained in:
Rafostar
2021-03-14 15:51:19 +01:00
parent 46d24536c0
commit ec18ca989a
2 changed files with 379 additions and 41 deletions

View File

@@ -0,0 +1,151 @@
/* Copyright (C) 2012-present by fent
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
const jsVarStr = '[a-zA-Z_\\$][a-zA-Z_0-9]*';
const jsSingleQuoteStr = `'[^'\\\\]*(:?\\\\[\\s\\S][^'\\\\]*)*'`;
const jsDoubleQuoteStr = `"[^"\\\\]*(:?\\\\[\\s\\S][^"\\\\]*)*"`;
const jsQuoteStr = `(?:${jsSingleQuoteStr}|${jsDoubleQuoteStr})`;
const jsKeyStr = `(?:${jsVarStr}|${jsQuoteStr})`;
const jsPropStr = `(?:\\.${jsVarStr}|\\[${jsQuoteStr}\\])`;
const jsEmptyStr = `(?:''|"")`;
const reverseStr = ':function\\(a\\)\\{' +
'(?:return )?a\\.reverse\\(\\)' +
'\\}';
const sliceStr = ':function\\(a,b\\)\\{' +
'return a\\.slice\\(b\\)' +
'\\}';
const spliceStr = ':function\\(a,b\\)\\{' +
'a\\.splice\\(0,b\\)' +
'\\}';
const swapStr = ':function\\(a,b\\)\\{' +
'var c=a\\[0\\];a\\[0\\]=a\\[b(?:%a\\.length)?\\];a\\[b(?:%a\\.length)?\\]=c(?:;return a)?' +
'\\}';
const actionsObjRegexp = new RegExp(
`var (${jsVarStr})=\\{((?:(?:${
jsKeyStr}${reverseStr}|${
jsKeyStr}${sliceStr}|${
jsKeyStr}${spliceStr}|${
jsKeyStr}${swapStr
}),?\\r?\\n?)+)\\};`);
const actionsFuncRegexp = new RegExp(`${`function(?: ${jsVarStr})?\\(a\\)\\{` +
`a=a\\.split\\(${jsEmptyStr}\\);\\s*` +
`((?:(?:a=)?${jsVarStr}`}${
jsPropStr
}\\(a,\\d+\\);)+)` +
`return a\\.join\\(${jsEmptyStr}\\)` +
`\\}`);
const reverseRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${reverseStr}`, 'm');
const sliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${sliceStr}`, 'm');
const spliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${spliceStr}`, 'm');
const swapRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${swapStr}`, 'm');
const swapHeadAndPosition = (arr, position) => {
const first = arr[0];
arr[0] = arr[position % arr.length];
arr[position] = first;
return arr;
}
function decipher(sig, tokens) {
sig = sig.split('');
tokens = tokens.split(',');
for(let i = 0, len = tokens.length; i < len; i++) {
let token = tokens[i], pos;
switch (token[0]) {
case 'r':
sig = sig.reverse();
break;
case 'w':
pos = ~~token.slice(1);
sig = swapHeadAndPosition(sig, pos);
break;
case 's':
pos = ~~token.slice(1);
sig = sig.slice(pos);
break;
case 'p':
pos = ~~token.slice(1);
sig.splice(0, pos);
break;
}
}
return sig.join('');
};
function extractActions(body) {
const objResult = actionsObjRegexp.exec(body);
const funcResult = actionsFuncRegexp.exec(body);
if(!objResult || !funcResult)
return null;
const obj = objResult[1].replace(/\$/g, '\\$');
const objBody = objResult[2].replace(/\$/g, '\\$');
const funcBody = funcResult[1].replace(/\$/g, '\\$');
let result = reverseRegexp.exec(objBody);
const reverseKey = result && result[1]
.replace(/\$/g, '\\$')
.replace(/\$|^'|^"|'$|"$/g, '');
result = sliceRegexp.exec(objBody);
const sliceKey = result && result[1]
.replace(/\$/g, '\\$')
.replace(/\$|^'|^"|'$|"$/g, '');
result = spliceRegexp.exec(objBody);
const spliceKey = result && result[1]
.replace(/\$/g, '\\$')
.replace(/\$|^'|^"|'$|"$/g, '');
result = swapRegexp.exec(objBody);
const swapKey = result && result[1]
.replace(/\$/g, '\\$')
.replace(/\$|^'|^"|'$|"$/g, '');
const keys = `(${[reverseKey, sliceKey, spliceKey, swapKey].join('|')})`;
const myreg = `(?:a=)?${obj
}(?:\\.${keys}|\\['${keys}'\\]|\\["${keys}"\\])` +
`\\(a,(\\d+)\\)`;
const tokenizeRegexp = new RegExp(myreg, 'g');
const tokens = [];
while((result = tokenizeRegexp.exec(funcBody)) !== null) {
const key = result[1] || result[2] || result[3];
const pos = result[4];
switch (key) {
case swapKey:
tokens.push(`w${result[4]}`);
break;
case reverseKey:
tokens.push('r');
break;
case sliceKey:
tokens.push(`s${result[4]}`);
break;
case spliceKey:
tokens.push(`p${result[4]}`);
break;
}
}
return tokens.join(',');
}

View File

@@ -1,6 +1,7 @@
const { GLib, GObject, Gst, Soup } = imports.gi;
const ByteArray = imports.byteArray;
const Debug = imports.src.debug;
const YTDL = imports.src.assets['node-ytdl-core'];
const { debug } = Debug;
@@ -21,7 +22,6 @@ var YouTubeClient = GObject.registerClass({
/* videoID of current active download */
this.downloadingVideoId = null;
this.downloadAborted = false;
this.lastInfo = null;
}
@@ -47,9 +47,9 @@ var YouTubeClient = GObject.registerClass({
debug(`obtaining YouTube video info: ${videoId}`);
this.downloadingVideoId = videoId;
const info = await this._getInfoPromise(videoId).catch(debug);
const [info, isAborted] = await this._getInfoPromise(videoId).catch(debug);
if(!info) {
if(this.downloadAborted)
if(isAborted)
return reject(new Error('download aborted'));
debug(`failed, remaining tries: ${tries}`);
@@ -62,15 +62,71 @@ var YouTubeClient = GObject.registerClass({
)
? 'video is not playable'
: (!info.streamingData)
? 'video response data is missing URIs'
? 'video response data is missing streaming data'
: null;
if(invalidInfoMsg) {
this.lastInfo = null;
this.emit('info-resolved', false);
this.downloadingVideoId = null;
return reject(new Error(invalidInfoMsg));
debug(new Error(invalidInfoMsg));
break;
}
/* Make sure we have all formats arrays,
* so we will not have to keep checking */
if(!info.streamingData.formats)
info.streamingData.formats = [];
if(!info.streamingData.adaptiveFormats)
info.streamingData.adaptiveFormats = [];
const isCipher = this._getIsCipher(info.streamingData);
if(isCipher) {
debug('video requires deciphering');
const embedUri = `https://www.youtube.com/embed/${videoId}`;
const [body, isAbortedBody] =
await this._downloadDataPromise(embedUri).catch(debug);
if(isAbortedBody)
break;
/* We need matching info, so start from beginning */
if(!body)
continue;
const ytPath = body.match(/(?<=jsUrl\":\").*?(?=\")/gs)[0];
if(!ytPath) {
debug(new Error('could not find YouTube player URI'));
break;
}
const ytUri = `https://www.youtube.com${ytPath}`;
debug(`found player URI: ${ytUri}`);
/* TODO: cache */
let actions;
if(!actions) {
const [pBody, isAbortedPlayer] =
await this._downloadDataPromise(ytUri).catch(debug);
if(!pBody || isAbortedPlayer) {
debug(new Error('could not download player body'));
break;
}
actions = YTDL.sig.extractActions(pBody);
}
if(!actions || !actions.length) {
debug(new Error('could not extract decipher actions'));
break;
}
debug('successfully obtained decipher actions');
const isDeciphered = this._decipherStreamingData(
info.streamingData, actions
);
if(!isDeciphered) {
debug('streaming data could not be deciphered');
break;
}
}
this.lastInfo = info;
@@ -80,7 +136,7 @@ var YouTubeClient = GObject.registerClass({
return resolve(info);
}
/* Do not clear video info here, as we still have
/* Do not clear video info here, as we might still have
* valid info from last video that can be reused */
this.emit('info-resolved', false);
this.downloadingVideoId = null;
@@ -91,10 +147,7 @@ var YouTubeClient = GObject.registerClass({
getBestCombinedUri(info)
{
if(
!info.streamingData.formats
|| !info.streamingData.formats.length
)
if(!info.streamingData.formats.length)
return null;
const combinedStream = info.streamingData.formats[
@@ -123,6 +176,41 @@ var YouTubeClient = GObject.registerClass({
return true;
}
_downloadDataPromise(url)
{
return new Promise((resolve, reject) => {
const message = Soup.Message.new('GET', url);
let data = '';
const chunkSignal = message.connect('got-chunk', (msg, chunk) => {
debug(`got chunk of data, length: ${chunk.length}`);
const chunkData = chunk.get_data();
data += (chunkData instanceof Uint8Array)
? ByteArray.toString(chunkData)
: chunkData;
});
this.queue_message(message, (session, msg) => {
msg.disconnect(chunkSignal);
debug('got message response');
const statusCode = msg.status_code;
/* Internal Soup codes mean download aborted
* or some other error that cannot be handled
* and we do not want to retry in such case */
if(statusCode < 10)
return resolve([null, true]);
if(statusCode !== 200)
return reject(new Error(`response code: ${statusCode}`));
resolve([data, false]);
});
});
}
_getCurrentDownloadPromise()
{
debug('resolving after current download finishes');
@@ -145,36 +233,20 @@ var YouTubeClient = GObject.registerClass({
_getInfoPromise(videoId)
{
return new Promise((resolve, reject) => {
const url = `https://www.youtube.com/get_video_info?video_id=${videoId}&el=embedded`;
const message = Soup.Message.new('GET', url);
let data = '';
const query = [
`video_id=${videoId}`,
`el=embedded`,
`eurl=https://youtube.googleapis.com/v/${videoId}`,
].join('&');
const url = `https://www.youtube.com/get_video_info?${query}`;
const chunkSignal = message.connect('got-chunk', (msg, chunk) => {
debug(`got chunk of data, length: ${chunk.length}`);
const chunkData = chunk.get_data();
data += (chunkData instanceof Uint8Array)
? ByteArray.toString(chunkData)
: chunkData;
});
this.queue_message(message, (session, msg) => {
msg.disconnect(chunkSignal);
debug('got message response');
const statusCode = msg.status_code;
/* Internal Soup codes mean download abort
* or some other error that cannot be handled */
this.downloadAborted = (statusCode < 10);
if(statusCode !== 200)
return reject(new Error(`response code: ${statusCode}`));
this._downloadDataPromise(url).then(res => {
if(res[1])
return resolve([null, true]);
debug('parsing video info JSON');
const gstUri = Gst.Uri.from_string('?' + data);
const gstUri = Gst.Uri.from_string('?' + res[0]);
if(!gstUri)
return reject(new Error('could not convert query to URI'));
@@ -193,11 +265,126 @@ var YouTubeClient = GObject.registerClass({
return reject(new Error('could not parse video info JSON'));
debug('successfully parsed video info JSON');
resolve(info);
});
resolve([info, false]);
})
.catch(err => reject(err));
});
}
_getIsCipher(data)
{
/* Check only first best combined,
* AFAIK there are no videos without it */
if(data.formats[0].url)
return false;
if(
data.formats[0].signatureCipher
|| data.formats[0].cipher
)
return true;
/* FIXME: no URLs and no cipher, what now? */
debug(new Error('no url or cipher in streams'));
return false;
}
_decipherStreamingData(data, actions)
{
debug('checking cipher query keys');
/* Cipher query keys should be the same for all
* streams, so parse any stream to get their names */
const anyStream = data.formats[0] || data.adaptiveFormats[0];
const sigQuery = anyStream.signatureCipher || anyStream.cipher;
if(!sigQuery)
return false;
const gstUri = Gst.Uri.from_string('?' + sigQuery);
const queryKeys = gstUri.get_query_keys();
const cipherKey = queryKeys.find(key => {
const value = gstUri.get_query_value(key);
/* A long value that is not URI */
return (
value.length > 32
&& !Gst.Uri.is_valid(value)
);
});
if(!cipherKey) {
debug('no stream cipher key name');
return false;
}
const sigKey = queryKeys.find(key => {
const value = gstUri.get_query_value(key);
/* A short value that is not URI */
return (
value.length < 32
&& !Gst.Uri.is_valid(value)
);
});
if(!sigKey) {
debug('no stream signature key name');
return false;
}
const urlKey = queryKeys.find(key =>
Gst.Uri.is_valid(gstUri.get_query_value(key))
);
if(!urlKey) {
debug('no stream URL key name');
return false;
}
const cipherKeys = {
url: urlKey,
sig: sigKey,
cipher: cipherKey,
};
debug('deciphering streams');
for(let format of [data.formats, data.adaptiveFormats]) {
for(let stream of format) {
const formatUrl = this._getDecipheredUrl(
stream, actions, cipherKeys
);
if(!formatUrl) {
debug('undecipherable stream');
debug(stream);
return false;
}
stream.url = formatUrl;
}
}
debug('all streams deciphered');
return true;
}
_getDecipheredUrl(stream, actions, queryKeys)
{
debug(`deciphering stream id: ${stream.itag}`);
const sigQuery = stream.signatureCipher || stream.cipher;
if(!sigQuery) return null;
const gstUri = Gst.Uri.from_string('?' + sigQuery);
const url = gstUri.get_query_value(queryKeys.url);
const cipher = gstUri.get_query_value(queryKeys.cipher);
const sig = gstUri.get_query_value(queryKeys.sig);
const key = YTDL.sig.decipher(cipher, actions);
debug('stream deciphered');
return `${url}&${sig}=${key}`;
}
});
function checkYouTubeUri(uri)