{"result":{"sources":{"https://livebench.ai":{"overall_top_10":[{"rank":1,"model":"GPT-5.5 Thinking xHigh Effort","score":"80.71 Global Avg","provider":"OpenAI"},{"rank":2,"model":"GPT-5.4 Thinking xHigh Effort","score":"80.28 Global Avg","provider":"OpenAI"},{"rank":3,"model":"Gemini 3.1 Pro Preview High","score":"79.93 Global Avg","provider":"Google"},{"rank":4,"model":"Claude 4.7 Opus Thinking xHigh Effort","score":"76.91 Global Avg","provider":"Anthropic"},{"rank":5,"model":"Claude 4.6 Opus Thinking High Effort","score":"76.33 Global Avg","provider":"Anthropic"}],"price_performance_section":[]},"https://llm-stats.com":{"overall_top_10":[{"rank":1,"model":"Claude Mythos Preview","score":"70.3","provider":"Anthropic"},{"rank":2,"model":"GPT-5.5","score":"64.3","provider":"OpenAI"},{"rank":3,"model":"Claude Opus 4.7","score":"61.3","provider":"Anthropic"},{"rank":4,"model":"GPT-5.4","score":"61.3","provider":"OpenAI"},{"rank":5,"model":"GPT-5.2 Pro","score":"61.2","provider":"OpenAI"}],"price_performance_section":[{"rank":1,"model":"Kimi K2.6","metric":"$1.29/M tok"},{"rank":2,"model":"DeepSeek-V4-Pro-Max","metric":"$1.93/M tok"},{"rank":3,"model":"Gemini 3.1 Pro","metric":"$3.89/M tok"},{"rank":4,"model":"GPT-5.4","metric":"$3.89/M tok"},{"rank":5,"model":"GPT-5.5","metric":"$7.78/M tok"}]},"https://beta.lmarena.ai/leaderboard":{"overall_top_10":[],"price_performance_section":[]},"https://aider.chat/docs/leaderboards":{"overall_top_10":[{"rank":1,"model":"gpt-5 (high)","score":"88.0%","provider":"OpenAI"},{"rank":2,"model":"gpt-5 (medium)","score":"86.7%","provider":"OpenAI"},{"rank":3,"model":"o3-pro (high)","score":"84.9%","provider":"o3"},{"rank":4,"model":"gemini-2.5-pro-preview-06-05 (32k think)","score":"83.1%","provider":"Google"},{"rank":5,"model":"gpt-5 (low)","score":"81.3%","provider":"OpenAI"}],"price_performance_section":[{"rank":1,"model":"gpt-5 (low)","metric":"$10.37 per 225 tests"},{"rank":2,"model":"gpt-5 (medium)","metric":"$17.69 per 225 tests"},{"rank":3,"model":"gpt-5 (high)","metric":"$29.08 per 225 tests"},{"rank":4,"model":"gemini-2.5-pro-preview-06-05 (32k think)","metric":"$49.88 per 225 tests"},{"rank":5,"model":"o3-pro (high)","metric":"$146.32 per 225 tests"}]},"https://artificialanalysis.ai/leaderboards/models":{"overall_top_10":[{"rank":1,"model":"GPT-5.5 (xhigh)","score":"60 Intelligence Index","provider":"OpenAI"},{"rank":2,"model":"GPT-5.5 (high)","score":"59 Intelligence Index","provider":"OpenAI"},{"rank":3,"model":"Claude Opus 4.7 (max)","score":"57 Intelligence Index","provider":"Anthropic"},{"rank":4,"model":"Gemini 3.1 Pro Preview","score":"57 Intelligence Index","provider":"Google"},{"rank":5,"model":"GPT-5.5 (medium)","score":"57 Intelligence Index","provider":"OpenAI"}],"price_performance_section":[{"rank":1,"model":"Qwen3.5 0.8B","metric":"$0.02 per 1M tokens"},{"rank":2,"model":"Gemma 3n E4B","metric":"$0.03 per 1M tokens"},{"rank":3,"model":"Qwen3.5 2B","metric":"$0.04 per 1M tokens"},{"rank":4,"model":"Nova Micro","metric":"$0.06 per 1M tokens"},{"rank":5,"model":"Qwen3.5 4B","metric":"$0.06 per 1M tokens"}]}},"_metadata":{"strategy":"scraped","has_errors":false,"model_name":"o4-mini","scraped_urls":["https://aider.chat/docs/leaderboards","https://livebench.ai","https://artificialanalysis.ai/leaderboards/models","https://llm-stats.com","https://beta.lmarena.ai/leaderboard"],"success_rate":1,"timeout_used":90000,"model_provider":"openai","urls_requested":5,"scraping_errors":{},"urls_successful":5,"strategy_version":"1.2.0","collected_formats":{"html":0,"text":0,"links":0,"markdown":5,"metadata":0},"page_options_used":{},"requested_formats":["markdown"],"process_started_at":"2026-05-14T06:01:23.571Z","multi_format_analysis_enabled":true},"_strategy":"scraped","best-llms":{"price-performance":{"provider":"Moonshot AI","arguments":{"de":["Sehr kostengünstig bei moderater Spitzenleistung in offenen Gewichten.","Führt unter den Open-Weights-Modellen im GPQA mit 90.5% und niedrigen Kosten.","Bietet besten Kompromiss zwischen Performance und Nutzbarkeit für verschiedene Workloads."],"en":["Highly cost-effective with moderate frontier performance in open-weights.","Leads open-weights models on GPQA with 90.5% at a low price.","Offers the best performance-to-cost balance for diverse use cases."]},"rationale":{"de":"Kimi K2.6 liefert robuste Leistung zum niedrigsten Preis unter Spitzmodellen.","en":"Kimi K2.6 delivers solid frontier performance at the lowest cost among top models."},"model_name":"Kimi K2.6","supporting_evidence":{"https://llm-stats.com":"Cheapest in top 10 at $1.29/M tok with 59.0 score","https://artificialanalysis.ai/leaderboards/models":"Intelligence Index 54 at $1.71/M tokens"},"price_performance_score":9.2},"total-performance":{"provider":"OpenAI","arguments":{"de":["GPT-5.5 führt in mehreren unabhängigen Benchmarks wie Artificial Analysis und LiveBench.","Zeigt konsistente Spitzenleistung über Intelligence Index, LLM Stats Score und LiveBench Global Average.","Bietet fortschrittliche reasoning-Fähigkeiten mit xHigh Effort Varianten."],"en":["GPT-5.5 tops multiple independent leaderboards including Artificial Analysis and LiveBench.","Demonstrates consistent frontier performance across Intelligence Index, LLM Stats Score, and LiveBench Global Average.","Offers advanced reasoning abilities in xHigh Effort variants."]},"rationale":{"de":"GPT-5.5 kombiniert Spitzenwerte in verschiedenen Benchmarks und zeigt höchste Gesamtleistung.","en":"GPT-5.5 consistently achieves top scores across diverse benchmarks, making it the overall best."},"model_name":"GPT-5.5 (xhigh)","total_score":9.5,"supporting_evidence":{"https://livebench.ai":"Global Average 80.71 on LiveBench","https://llm-stats.com":"Ranked #2 with LLM Stats Score 64.3","https://artificialanalysis.ai/leaderboards/models":"Highest Intelligence Index 60"}}}},"updatedAt":"2026-05-14T06:01:24.063504+00:00","error":null}