{"api_version":"1","generated_at":"2026-04-23T04:11:30+00:00","cve":"CVE-2026-34760","urls":{"html":"https://cve.report/CVE-2026-34760","api":"https://cve.report/api/cve/CVE-2026-34760.json","docs":"https://cve.report/api","cve_org":"https://www.cve.org/CVERecord?id=CVE-2026-34760","nvd":"https://nvd.nist.gov/vuln/detail/CVE-2026-34760"},"summary":{"title":"vLLM: Downmix Implementation Differences as Attack Vectors Against Audio AI Models","description":"vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before version 0.18.0, Librosa defaults to using numpy.mean for mono downmixing (to_mono), while the international standard ITU-R BS.775-4 specifies a weighted downmixing algorithm. This discrepancy results in inconsistency between audio heard by humans (e.g., through headphones/regular speakers) and audio processed by AI models (Which infra via Librosa, such as vllm, transformer). This issue has been patched in version 0.18.0.","state":"PUBLISHED","assigner":"GitHub_M","published_at":"2026-04-02 20:16:25","updated_at":"2026-04-03 16:10:23"},"problem_types":["CWE-20","CWE-20 CWE-20: Improper Input Validation"],"metrics":[{"version":"3.1","source":"security-advisories@github.com","type":"Secondary","score":"5.9","severity":"MEDIUM","vector":"CVSS:3.1/AV:N/AC:H/PR:L/UI:N/S:U/C:N/I:H/A:L","data":{"version":"3.1","vectorString":"CVSS:3.1/AV:N/AC:H/PR:L/UI:N/S:U/C:N/I:H/A:L","baseScore":5.9,"baseSeverity":"MEDIUM","attackVector":"NETWORK","attackComplexity":"HIGH","privilegesRequired":"LOW","userInteraction":"NONE","scope":"UNCHANGED","confidentialityImpact":"NONE","integrityImpact":"HIGH","availabilityImpact":"LOW"}},{"version":"3.1","source":"CNA","type":"DECLARED","score":"5.9","severity":"MEDIUM","vector":"CVSS:3.1/AV:N/AC:H/PR:L/UI:N/S:U/C:N/I:H/A:L","data":{"attackComplexity":"HIGH","attackVector":"NETWORK","availabilityImpact":"LOW","baseScore":5.9,"baseSeverity":"MEDIUM","confidentialityImpact":"NONE","integrityImpact":"HIGH","privilegesRequired":"LOW","scope":"UNCHANGED","userInteraction":"NONE","vectorString":"CVSS:3.1/AV:N/AC:H/PR:L/UI:N/S:U/C:N/I:H/A:L","version":"3.1"}}],"references":[{"url":"https://github.com/vllm-project/vllm/security/advisories/GHSA-6c4r-fmh3-7rh8","name":"https://github.com/vllm-project/vllm/security/advisories/GHSA-6c4r-fmh3-7rh8","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://github.com/vllm-project/vllm/pull/37058","name":"https://github.com/vllm-project/vllm/pull/37058","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://github.com/vllm-project/vllm/commit/c7f98b4d0a63b32ed939e2b6dfaa8a626e9b46c4","name":"https://github.com/vllm-project/vllm/commit/c7f98b4d0a63b32ed939e2b6dfaa8a626e9b46c4","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://github.com/vllm-project/vllm/releases/tag/v0.18.0","name":"https://github.com/vllm-project/vllm/releases/tag/v0.18.0","refsource":"security-advisories@github.com","tags":[],"title":"","mime":"","httpstatus":"","archivestatus":"0"},{"url":"https://www.cve.org/CVERecord?id=CVE-2026-34760","name":"CVE Program record","refsource":"CVE.ORG","tags":["canonical"]},{"url":"https://nvd.nist.gov/vuln/detail/CVE-2026-34760","name":"NVD vulnerability detail","refsource":"NVD","tags":["canonical","analysis"]}],"affected":[{"source":"CNA","vendor":"vllm-project","product":"vllm","version":"affected >= 0.5.5, < 0.18.0","platforms":[]}],"timeline":[],"solutions":[],"workarounds":[],"exploits":[],"credits":[],"nvd_cpes":[],"vendor_comments":[],"enrichments":{"kev":null,"epss":{"cve_year":"2026","cve_id":"34760","cve":"CVE-2026-34760","epss":"0.000570000","percentile":"0.178760000","score_date":"2026-04-07","updated_at":"2026-04-08 00:03:39"},"legacy_qids":[]},"source_records":{"cve_program":{"containers":{"adp":[{"metrics":[{"other":{"content":{"id":"CVE-2026-34760","options":[{"Exploitation":"none"},{"Automatable":"no"},{"Technical Impact":"partial"}],"role":"CISA Coordinator","timestamp":"2026-04-03T14:42:25.211772Z","version":"2.0.3"},"type":"ssvc"}}],"providerMetadata":{"dateUpdated":"2026-04-03T14:42:34.842Z","orgId":"134c704f-9b21-4f2e-91b3-4a467353bcc0","shortName":"CISA-ADP"},"title":"CISA ADP Vulnrichment"}],"cna":{"affected":[{"product":"vllm","vendor":"vllm-project","versions":[{"status":"affected","version":">= 0.5.5, < 0.18.0"}]}],"descriptions":[{"lang":"en","value":"vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before version 0.18.0, Librosa defaults to using numpy.mean for mono downmixing (to_mono), while the international standard ITU-R BS.775-4 specifies a weighted downmixing algorithm. This discrepancy results in inconsistency between audio heard by humans (e.g., through headphones/regular speakers) and audio processed by AI models (Which infra via Librosa, such as vllm, transformer). This issue has been patched in version 0.18.0."}],"metrics":[{"cvssV3_1":{"attackComplexity":"HIGH","attackVector":"NETWORK","availabilityImpact":"LOW","baseScore":5.9,"baseSeverity":"MEDIUM","confidentialityImpact":"NONE","integrityImpact":"HIGH","privilegesRequired":"LOW","scope":"UNCHANGED","userInteraction":"NONE","vectorString":"CVSS:3.1/AV:N/AC:H/PR:L/UI:N/S:U/C:N/I:H/A:L","version":"3.1"}}],"problemTypes":[{"descriptions":[{"cweId":"CWE-20","description":"CWE-20: Improper Input Validation","lang":"en","type":"CWE"}]}],"providerMetadata":{"dateUpdated":"2026-04-02T18:59:49.638Z","orgId":"a0819718-46f1-4df5-94e2-005712e83aaa","shortName":"GitHub_M"},"references":[{"name":"https://github.com/vllm-project/vllm/security/advisories/GHSA-6c4r-fmh3-7rh8","tags":["x_refsource_CONFIRM"],"url":"https://github.com/vllm-project/vllm/security/advisories/GHSA-6c4r-fmh3-7rh8"},{"name":"https://github.com/vllm-project/vllm/pull/37058","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/pull/37058"},{"name":"https://github.com/vllm-project/vllm/commit/c7f98b4d0a63b32ed939e2b6dfaa8a626e9b46c4","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/commit/c7f98b4d0a63b32ed939e2b6dfaa8a626e9b46c4"},{"name":"https://github.com/vllm-project/vllm/releases/tag/v0.18.0","tags":["x_refsource_MISC"],"url":"https://github.com/vllm-project/vllm/releases/tag/v0.18.0"}],"source":{"advisory":"GHSA-6c4r-fmh3-7rh8","discovery":"UNKNOWN"},"title":"vLLM: Downmix Implementation Differences as Attack Vectors Against Audio AI Models"}},"cveMetadata":{"assignerOrgId":"a0819718-46f1-4df5-94e2-005712e83aaa","assignerShortName":"GitHub_M","cveId":"CVE-2026-34760","datePublished":"2026-04-02T18:59:49.638Z","dateReserved":"2026-03-30T19:17:10.225Z","dateUpdated":"2026-04-03T14:42:34.842Z","state":"PUBLISHED"},"dataType":"CVE_RECORD","dataVersion":"5.2"},"nvd":{"publishedDate":"2026-04-02 20:16:25","lastModifiedDate":"2026-04-03 16:10:23","problem_types":["CWE-20","CWE-20 CWE-20: Improper Input Validation"],"metrics":{"cvssMetricV31":[{"source":"security-advisories@github.com","type":"Secondary","cvssData":{"version":"3.1","vectorString":"CVSS:3.1/AV:N/AC:H/PR:L/UI:N/S:U/C:N/I:H/A:L","baseScore":5.9,"baseSeverity":"MEDIUM","attackVector":"NETWORK","attackComplexity":"HIGH","privilegesRequired":"LOW","userInteraction":"NONE","scope":"UNCHANGED","confidentialityImpact":"NONE","integrityImpact":"HIGH","availabilityImpact":"LOW"},"exploitabilityScore":1.6,"impactScore":4.2}]},"configurations":[]},"legacy_mitre":{"record":{"CveYear":"2026","CveId":"34760","Ordinal":"1","Title":"vLLM: Downmix Implementation Differences as Attack Vectors Again","CVE":"CVE-2026-34760","Year":"2026"},"notes":[{"CveYear":"2026","CveId":"34760","Ordinal":"1","NoteData":"vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before version 0.18.0, Librosa defaults to using numpy.mean for mono downmixing (to_mono), while the international standard ITU-R BS.775-4 specifies a weighted downmixing algorithm. This discrepancy results in inconsistency between audio heard by humans (e.g., through headphones/regular speakers) and audio processed by AI models (Which infra via Librosa, such as vllm, transformer). This issue has been patched in version 0.18.0.","Type":"Description","Title":"vLLM: Downmix Implementation Differences as Attack Vectors Again"}]}}}