musikr.pipeline: shuffle songs to extract

This helps avoid the entire tag parsing flow from getting blocked up by several tracks that are blocking trying to write a single cover.
2024-12-19 16:13:16 -05:00 · 2024-12-19 16:13:16 -05:00 · 249d2fad67
commit 249d2fad67
parent a77dd3ff7a
2 changed files with 16 additions and 0 deletions
--- a/musikr/src/main/java/org/oxycblt/musikr/pipeline/ExtractStep.kt
+++ b/musikr/src/main/java/org/oxycblt/musikr/pipeline/ExtractStep.kt
@ -89,6 +89,7 @@ private class ExtractStepImpl(
        val fds =
            uncachedSongs
                .shuffle()
                .mapNotNull {
                    wrap(it) { file ->
                        withContext(Dispatchers.IO) {
--- a/musikr/src/main/java/org/oxycblt/musikr/pipeline/FlowUtil.kt
+++ b/musikr/src/main/java/org/oxycblt/musikr/pipeline/FlowUtil.kt
@ -20,9 +20,12 @@ package org.oxycblt.musikr.pipeline
 import kotlinx.coroutines.channels.Channel
 import kotlinx.coroutines.flow.Flow
 import kotlinx.coroutines.flow.asFlow
 import kotlinx.coroutines.flow.emitAll
 import kotlinx.coroutines.flow.flow
 import kotlinx.coroutines.flow.map
 import kotlinx.coroutines.flow.receiveAsFlow
 import kotlinx.coroutines.flow.toList
 import kotlinx.coroutines.flow.withIndex
 internal sealed interface Divert<L, R> {
@ -79,3 +82,15 @@ internal fun <T> Flow<T>.distribute(n: Int): DistributedFlow<T> {
    val hotFlows = posChannels.map { it.receiveAsFlow() }.toTypedArray()
    return DistributedFlow(managerFlow, hotFlows)
 }
 internal fun <T> Flow<T>.shuffle() = flow {
    // As far as I'm aware, the only way to get a truly normal distribution
    // on a flow is by evaluating it. I tried a bunch of different strategies
    // on lazily shuffling a flow and it simply doesn't produce a good enough
    // distribution since you need to emit late stuff early and early stuff
    // late. It's best to just eval and re-emit.
    val output = mutableListOf<T>()
    toList(output)
    output.shuffle()
    emitAll(output.asFlow())
 }