clapper: playlist-demux: Improved "claps" typefinding

Suggesting "possible" type does not always work as intended, since lots
of text might be determined as "text/plain" due to higher probability.

We should give at least a "likely" typefind result instead of "possible",
but in order for that lets also check some more lines of playlist just to
be safe when announcing likeness.
This commit is contained in:
Rafał Dzięgiel
2025-12-11 22:13:09 +01:00
parent c9561032d3
commit ddce88bce8

View File

@@ -132,6 +132,30 @@ clapper_playlist_type_find (GstTypeFind *tf, ClapperEnhancerProxy *proxy)
CLAPPER_PLAYLIST_MEDIA_TYPE, "enhancer", G_TYPE_STRING, module_name, NULL);
}
static gboolean
_looks_like_path (const guint8 *data, gsize len)
{
gboolean possible;
if (len < 3)
return FALSE;
/* Linux file path */
possible = (data[0] == '/' && g_ascii_isalnum (data[1]));
#ifdef G_OS_WIN32
/* Windows file path ("C:\..." or "D:/...") */
if (!possible)
possible = (g_ascii_isalpha (data[0]) && data[1] == ':' && (data[2] == '\\' || data[2] == '/'));
/* Windows UNC path */
if (!possible)
possible = (data[0] == '\\' && data[1] == '\\' && g_ascii_isalnum (data[2]));
#endif
return possible;
}
/* Finds text file of full file paths. Claps file might also use URIs,
* but in that case lets GStreamer built-in type finders find that as
* "text/uri-list" and we will handle it with this element too. */
@@ -140,25 +164,47 @@ clapper_claps_type_find (GstTypeFind *tf, gpointer user_data G_GNUC_UNUSED)
{
const guint8 *data;
if ((data = gst_type_find_peek (tf, 0, 3))) {
gboolean possible;
if (!(data = gst_type_find_peek (tf, 0, 3)))
return;
/* Linux file path */
possible = (data[0] == '/' && g_ascii_isalnum (data[1]));
/* Continue parsing only if start looks like
* file path, otherwise reject data early */
if (_looks_like_path (data, 3)) {
guint probability = GST_TYPE_FIND_POSSIBLE;
guint64 data_size = 1024;
#ifdef G_OS_WIN32
/* Windows file path ("C:\..." or "D:/...") */
if (!possible)
possible = (g_ascii_isalpha (data[0]) && data[1] == ':' && (data[2] == '\\' || data[2] == '/'));
if (!(data = gst_type_find_peek (tf, 0, data_size)))
if ((data_size = gst_type_find_get_length (tf)) > 3)
data = gst_type_find_peek (tf, 0, data_size);
/* Windows UNC Path */
if (!possible)
possible = (data[0] == '\\' && data[1] == '\\' && g_ascii_isalnum (data[2]));
#endif
if (data) {
const guint8 *line_start = data;
const guint8 *end = data + data_size;
guint pathlike = 0, total = 0;
if (possible) {
GST_INFO ("Suggesting possible type: " CLAPPER_CLAPS_MEDIA_TYPE);
gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_POSSIBLE, CLAPPER_CLAPS_MEDIA_TYPE);
while (line_start < end) {
const guint8 *newline = memchr (line_start, '\n', end - line_start);
gsize len = newline ? (newline - line_start) : (end - line_start);
if (len > 0) {
total++;
if (_looks_like_path (line_start, len))
pathlike++;
}
if (!newline)
break;
line_start = newline + 1;
}
/* Multiple lines and most of them looks like a file path */
if (total > 1 && pathlike >= total * 0.75)
probability = GST_TYPE_FIND_LIKELY;
GST_INFO ("Suggesting %s type: " CLAPPER_CLAPS_MEDIA_TYPE,
(probability >= GST_TYPE_FIND_LIKELY) ? "likely" : "possible");
gst_type_find_suggest_empty_simple (tf, probability, CLAPPER_CLAPS_MEDIA_TYPE);
}
}
}