<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>Shkumbin Sherifi — Blog</title>
    <link>https://shkumbins.dev/blog</link>
    <description>Technical writing on AI infrastructure, memory systems, and local-first agent architecture.</description>
    <language>en-us</language>
    <lastBuildDate>Sat, 06 Jun 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://shkumbins.dev/feed.xml" rel="self" type="application/rss+xml"/>
    <item>
      <title>Building a 5-Layer Memory System for an Autonomous AI Agent</title>
      <link>https://shkumbins.dev/blog/memory-layer</link>
      <guid>https://shkumbins.dev/blog/memory-layer</guid>
      <pubDate>Thu, 04 Jun 2026 00:00:00 +0000</pubDate>
      <description>How I built a proper memory hierarchy with consolidation for a local-first AI agent on Apple Silicon. Not just a vector DB bolted on — a layered system with session store, MCP companion, durable facts, synthesized knowledge, and full-text search.</description>
    </item>
    <item>
      <title>Local Model Benchmarks: Running LLMs on Apple Silicon via MLX</title>
      <link>https://shkumbins.dev/blog/local-model-benchmarks</link>
      <guid>https://shkumbins.dev/blog/local-model-benchmarks</guid>
      <pubDate>Thu, 04 Jun 2026 00:00:00 +0000</pubDate>
      <description>Real benchmark data from running quantized LLMs locally on M4 Pro 48GB. Three models, two benchmark types, production stats, and the memory architecture that makes it work.</description>
    </item>
  </channel>
</rss>
