envoyproxy · missBerg · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 17, 2024
diff --git a/site/.DS_Store b/site/.DS_Store
@@ -0,0 +1,5 @@
+This folder stores temp files that Docusaurus' client bundler accesses.
+
+DO NOT hand-modify files in this folder because they will be overwritten in the
+next build. You can clear all build artifacts (including this folder) with the
+`docusaurus clear` command.
@@ -0,0 +1,6 @@
+export default [
+  require("/Users/erica/repos/ai-gateway/site/node_modules/infima/dist/css/default/default.css"),
+  require("/Users/erica/repos/ai-gateway/site/node_modules/@docusaurus/theme-classic/lib/prism-include-languages"),
+  require("/Users/erica/repos/ai-gateway/site/node_modules/@docusaurus/theme-classic/lib/nprogress"),
+  require("/Users/erica/repos/ai-gateway/site/src/css/custom.css"),
+];
@@ -0,0 +1 @@
+{}
@@ -0,0 +1,4 @@
+{
+  "name": "docusaurus-plugin-content-blog",
+  "id": "default"
+}
@@ -0,0 +1,11 @@
+{
+  "title": "Recent posts",
+  "items": [
+    {
+      "title": "Introducing Envoy AI Gateway",
+      "permalink": "/blog/introducing-envoy-ai-gateway",
+      "unlisted": false,
+      "date": "2024-10-18T00:00:00.000Z"
+    }
+  ]
+}
@@ -0,0 +1,5 @@
+{
+  "blogBasePath": "/blog",
+  "blogTitle": "Blog",
+  "authorsListPath": "/blog/authors"
+}
@@ -0,0 +1 @@
+{"archive":{"blogPosts":[{"id":"introducing-envoy-ai-gateway","metadata":{"permalink":"/blog/introducing-envoy-ai-gateway","source":"@site/blog/2024-10-18-introducing-envoy-ai-gw.md","title":"Introducing Envoy AI Gateway","description":"Open collaboration to bring AI Gateway features to the Envoy community","date":"2024-10-18T00:00:00.000Z","tags":[{"inline":true,"label":"news","permalink":"/blog/tags/news"}],"readingTime":2.685,"hasTruncateMarker":true,"authors":[{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg"}],"frontMatter":{"slug":"introducing-envoy-ai-gateway","title":"Introducing Envoy AI Gateway","authors":["missberg"],"tags":["news"]},"unlisted":false},"content":"Open collaboration to bring AI Gateway features to the Envoy community\n\n<!-- truncate -->\n\nThe industry is embracing Generative AI functionality, and we need to evolve how we handle traffic on an industry-wide scale. Keeping AI traffic handling features exclusive to enterprise licenses is counterproductive to the industry’s needs. This approach limits incentives to a single commercial entity and its customers. Even single-company open-source initiatives do not promote open multi-company collaboration.\n\nA shared challenge like this presents an opportunity for open collaboration to build the necessary features. We believe bringing together different use cases and requirements through open collaboration will lead to better solutions and accelerate innovation. The industry will benefit from diverse expertise and experiences by openly collaborating on software across companies and industries.\n\nThat is why Tetrate and Bloomberg have started an open collaboration to bring critical features for this new era of Gen AI integration. Collaborating openly in the Envoy community, bringing AI traffic handling features to Envoy, via Envoy Gateway and Envoy Proxy.\n\n## Why we need AI traffic handling features\nWhat makes traffic to LLM models different from traditional API traffic?\n\nOn the surface it appears similar. Traffic comes from a client app that is making an API request, and this request has to get to the provider that hosts the LLM model.\n\nHowever, it is different. Managing LLM traffic from multiple apps, to multiple LLM providers, introduces new and different challenges where traditional API Gateway features fall short.\n\nFor example, traditional rate-limiting based on number of requests doesn’t work for controlling usage of LLM providers as they’re computationally complex services. To measure usage LLM providers tokenize the words in the request message and response message, and count the number of tokens used. This count gives a good approximation of the computational complexity and cost of serving the request.\n\nBeyond controlling usage of LLMs there are many more challenges relating to ease of integration and high-availability architectures. It’s no longer enough to just optimize for quality of service alone, adopters must consider costs of usage in real time. As adopters of Gen AI look for Gateway solutions to handle these challenges for their system, they often find the necessary features locked behind enterprise licenses.\n\n## Three key MVP features\nNow, let’s look at how handling AI traffic poses new challenges for Gateways. There are several features we discussed together with our collaborators at Bloomberg, and together we decided on three key features for the MVP:\n\n- **Usage Limiting** – to control LLM usage based on word tokens\n- **Unified API** – to simplify client integration with multiple LLM providers\n- **Upstream Authorization** – to configure Authorization to multiple upstream LLM providers\nWhat other features are you looking for? Get in touch with us to share your use case and define the future of Envoy AI Gateway.\n\nWe are really excited about these features being part of Envoy. They will benefit those integrating with LLM providers and, ultimately, also Gateway users for general API request traffic.\n\nWhen it comes to AI Gateway features, we have chosen to collaborate and build within the CNCF Envoy project because we believe multi-company, open-source projects benefit the entire industry by enabling innovation without creating single vendor risk."}]}}
@@ -0,0 +1 @@
+{"authors":[{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg","count":1}]}
@@ -0,0 +1 @@
+{"author":{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg","count":1},"listMetadata":{"permalink":"/blog/authors/missberg","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}
@@ -0,0 +1 @@
+{"metadata":{"permalink":"/blog","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}
@@ -0,0 +1 @@
+{"tags":[{"label":"news","permalink":"/blog/tags/news","count":1}]}
@@ -0,0 +1 @@
+{"tag":{"label":"news","permalink":"/blog/tags/news","allTagsPath":"/blog/tags","count":1,"unlisted":false},"listMetadata":{"permalink":"/blog/tags/news","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}
@@ -0,0 +1,42 @@
+{
+  "permalink": "/blog/introducing-envoy-ai-gateway",
+  "source": "@site/blog/2024-10-18-introducing-envoy-ai-gw.md",
+  "title": "Introducing Envoy AI Gateway",
+  "description": "Open collaboration to bring AI Gateway features to the Envoy community",
+  "date": "2024-10-18T00:00:00.000Z",
+  "tags": [
+    {
+      "inline": true,
+      "label": "news",
+      "permalink": "/blog/tags/news"
+    }
+  ],
+  "readingTime": 2.685,
+  "hasTruncateMarker": true,
+  "authors": [
+    {
+      "name": "Erica Hughberg",
+      "title": "Envoy AI Gateway Maintainer",
+      "url": "https://github.com/missBerg",
+      "page": {
+        "permalink": "/blog/authors/missberg"
+      },
+      "socials": {
+        "github": "https://github.com/missBerg"
+      },
+      "imageURL": "https://github.com/missBerg.png",
+      "key": "missberg"
+    }
+  ],
+  "frontMatter": {
+    "slug": "introducing-envoy-ai-gateway",
+    "title": "Introducing Envoy AI Gateway",
+    "authors": [
+      "missberg"
+    ],
+    "tags": [
+      "news"
+    ]
+  },
+  "unlisted": false
+}
@@ -0,0 +1,4 @@
+{
+  "name": "docusaurus-plugin-content-docs",
+  "id": "default"
+}
@@ -0,0 +1 @@
+{"version":{"pluginId":"default","version":"current","label":"Next","banner":null,"badge":false,"noIndex":false,"className":"docs-version-current","isLast":true,"docsSidebars":{"tutorialSidebar":[{"type":"link","label":"Envoy AI Gateway Docs","href":"/docs/intro","docId":"intro","unlisted":false}]},"docs":{"intro":{"id":"intro","title":"Envoy AI Gateway Docs","description":"Version 0.1.0 is expected end of January 2025.","sidebar":"tutorialSidebar"}}}}
@@ -0,0 +1,18 @@
+{
+  "id": "intro",
+  "title": "Envoy AI Gateway Docs",
+  "description": "Version 0.1.0 is expected end of January 2025.",
+  "source": "@site/docs/intro.md",
+  "sourceDirName": ".",
+  "slug": "/intro",
+  "permalink": "/docs/intro",
+  "draft": false,
+  "unlisted": false,
+  "tags": [],
+  "version": "current",
+  "sidebarPosition": 1,
+  "frontMatter": {
+    "sidebar_position": 1
+  },
+  "sidebar": "tutorialSidebar"
+}
@@ -0,0 +1,4 @@
+{
+  "name": "docusaurus-plugin-content-pages",
+  "id": "default"
+}
@@ -0,0 +1,11 @@
+{
+  "type": "mdx",
+  "permalink": "/markdown-page",
+  "source": "@site/src/pages/markdown-page.md",
+  "title": "Markdown page example",
+  "description": "You don't need React to write simple standalone pages.",
+  "frontMatter": {
+    "title": "Markdown page example"
+  },
+  "unlisted": false
+}
@@ -0,0 +1,4 @@
+{
+  "name": "docusaurus-plugin-debug",
+  "id": "default"
+}
@@ -0,0 +1 @@
+{"allContent":{"docusaurus-plugin-content-docs":{"default":{"loadedVersions":[{"versionName":"current","label":"Next","banner":null,"badge":false,"noIndex":false,"className":"docs-version-current","path":"/docs","tagsPath":"/docs/tags","isLast":true,"routePriority":-1,"sidebarFilePath":"/Users/erica/repos/ai-gateway/site/sidebars.ts","contentPath":"/Users/erica/repos/ai-gateway/site/docs","contentPathLocalized":"/Users/erica/repos/ai-gateway/site/i18n/en/docusaurus-plugin-content-docs/current","docs":[{"id":"intro","title":"Envoy AI Gateway Docs","description":"Version 0.1.0 is expected end of January 2025.","source":"@site/docs/intro.md","sourceDirName":".","slug":"/intro","permalink":"/docs/intro","draft":false,"unlisted":false,"tags":[],"version":"current","sidebarPosition":1,"frontMatter":{"sidebar_position":1},"sidebar":"tutorialSidebar"}],"drafts":[],"sidebars":{"tutorialSidebar":[{"type":"doc","id":"intro"}]}}]}},"docusaurus-plugin-content-blog":{"default":{"blogSidebarTitle":"Recent posts","blogPosts":[{"id":"introducing-envoy-ai-gateway","metadata":{"permalink":"/blog/introducing-envoy-ai-gateway","source":"@site/blog/2024-10-18-introducing-envoy-ai-gw.md","title":"Introducing Envoy AI Gateway","description":"Open collaboration to bring AI Gateway features to the Envoy community","date":"2024-10-18T00:00:00.000Z","tags":[{"inline":true,"label":"news","permalink":"/blog/tags/news"}],"readingTime":2.685,"hasTruncateMarker":true,"authors":[{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg"}],"frontMatter":{"slug":"introducing-envoy-ai-gateway","title":"Introducing Envoy AI Gateway","authors":["missberg"],"tags":["news"]},"unlisted":false},"content":"Open collaboration to bring AI Gateway features to the Envoy community\n\n<!-- truncate -->\n\nThe industry is embracing Generative AI functionality, and we need to evolve how we handle traffic on an industry-wide scale. Keeping AI traffic handling features exclusive to enterprise licenses is counterproductive to the industry’s needs. This approach limits incentives to a single commercial entity and its customers. Even single-company open-source initiatives do not promote open multi-company collaboration.\n\nA shared challenge like this presents an opportunity for open collaboration to build the necessary features. We believe bringing together different use cases and requirements through open collaboration will lead to better solutions and accelerate innovation. The industry will benefit from diverse expertise and experiences by openly collaborating on software across companies and industries.\n\nThat is why Tetrate and Bloomberg have started an open collaboration to bring critical features for this new era of Gen AI integration. Collaborating openly in the Envoy community, bringing AI traffic handling features to Envoy, via Envoy Gateway and Envoy Proxy.\n\n## Why we need AI traffic handling features\nWhat makes traffic to LLM models different from traditional API traffic?\n\nOn the surface it appears similar. Traffic comes from a client app that is making an API request, and this request has to get to the provider that hosts the LLM model.\n\nHowever, it is different. Managing LLM traffic from multiple apps, to multiple LLM providers, introduces new and different challenges where traditional API Gateway features fall short.\n\nFor example, traditional rate-limiting based on number of requests doesn’t work for controlling usage of LLM providers as they’re computationally complex services. To measure usage LLM providers tokenize the words in the request message and response message, and count the number of tokens used. This count gives a good approximation of the computational complexity and cost of serving the request.\n\nBeyond controlling usage of LLMs there are many more challenges relating to ease of integration and high-availability architectures. It’s no longer enough to just optimize for quality of service alone, adopters must consider costs of usage in real time. As adopters of Gen AI look for Gateway solutions to handle these challenges for their system, they often find the necessary features locked behind enterprise licenses.\n\n## Three key MVP features\nNow, let’s look at how handling AI traffic poses new challenges for Gateways. There are several features we discussed together with our collaborators at Bloomberg, and together we decided on three key features for the MVP:\n\n- **Usage Limiting** – to control LLM usage based on word tokens\n- **Unified API** – to simplify client integration with multiple LLM providers\n- **Upstream Authorization** – to configure Authorization to multiple upstream LLM providers\nWhat other features are you looking for? Get in touch with us to share your use case and define the future of Envoy AI Gateway.\n\nWe are really excited about these features being part of Envoy. They will benefit those integrating with LLM providers and, ultimately, also Gateway users for general API request traffic.\n\nWhen it comes to AI Gateway features, we have chosen to collaborate and build within the CNCF Envoy project because we believe multi-company, open-source projects benefit the entire industry by enabling innovation without creating single vendor risk."}],"blogListPaginated":[{"items":["introducing-envoy-ai-gateway"],"metadata":{"permalink":"/blog","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}],"blogTags":{"/blog/tags/news":{"inline":true,"label":"news","permalink":"/blog/tags/news","items":["introducing-envoy-ai-gateway"],"pages":[{"items":["introducing-envoy-ai-gateway"],"metadata":{"permalink":"/blog/tags/news","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}],"unlisted":false}},"blogTagsListPath":"/blog/tags","authorsMap":{"missberg":{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg"}}}},"docusaurus-plugin-content-pages":{"default":[{"type":"jsx","permalink":"/","source":"@site/src/pages/index.tsx"},{"type":"mdx","permalink":"/markdown-page","source":"@site/src/pages/markdown-page.md","title":"Markdown page example","description":"You don't need React to write simple standalone pages.","frontMatter":{"title":"Markdown page example"},"unlisted":false}]},"docusaurus-plugin-debug":{},"docusaurus-theme-classic":{},"docusaurus-bootstrap-plugin":{},"docusaurus-mdx-fallback-plugin":{}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"archive":{"blogPosts":[{"id":"introducing-envoy-ai-gateway","metadata":{"permalink":"/blog/introducing-envoy-ai-gateway","source":"@site/blog/2024-10-18-introducing-envoy-ai-gw.md","title":"Introducing Envoy AI Gateway","description":"Open collaboration to bring AI Gateway features to the Envoy community","date":"2024-10-18T00:00:00.000Z","tags":[{"inline":true,"label":"news","permalink":"/blog/tags/news"}],"readingTime":2.685,"hasTruncateMarker":true,"authors":[{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg"}],"frontMatter":{"slug":"introducing-envoy-ai-gateway","title":"Introducing Envoy AI Gateway","authors":["missberg"],"tags":["news"]},"unlisted":false},"content":"Open collaboration to bring AI Gateway features to the Envoy community\n\n<!-- truncate -->\n\nThe industry is embracing Generative AI functionality, and we need to evolve how we handle traffic on an industry-wide scale. Keeping AI traffic handling features exclusive to enterprise licenses is counterproductive to the industry’s needs. This approach limits incentives to a single commercial entity and its customers. Even single-company open-source initiatives do not promote open multi-company collaboration.\n\nA shared challenge like this presents an opportunity for open collaboration to build the necessary features. We believe bringing together different use cases and requirements through open collaboration will lead to better solutions and accelerate innovation. The industry will benefit from diverse expertise and experiences by openly collaborating on software across companies and industries.\n\nThat is why Tetrate and Bloomberg have started an open collaboration to bring critical features for this new era of Gen AI integration. Collaborating openly in the Envoy community, bringing AI traffic handling features to Envoy, via Envoy Gateway and Envoy Proxy.\n\n## Why we need AI traffic handling features\nWhat makes traffic to LLM models different from traditional API traffic?\n\nOn the surface it appears similar. Traffic comes from a client app that is making an API request, and this request has to get to the provider that hosts the LLM model.\n\nHowever, it is different. Managing LLM traffic from multiple apps, to multiple LLM providers, introduces new and different challenges where traditional API Gateway features fall short.\n\nFor example, traditional rate-limiting based on number of requests doesn’t work for controlling usage of LLM providers as they’re computationally complex services. To measure usage LLM providers tokenize the words in the request message and response message, and count the number of tokens used. This count gives a good approximation of the computational complexity and cost of serving the request.\n\nBeyond controlling usage of LLMs there are many more challenges relating to ease of integration and high-availability architectures. It’s no longer enough to just optimize for quality of service alone, adopters must consider costs of usage in real time. As adopters of Gen AI look for Gateway solutions to handle these challenges for their system, they often find the necessary features locked behind enterprise licenses.\n\n## Three key MVP features\nNow, let’s look at how handling AI traffic poses new challenges for Gateways. There are several features we discussed together with our collaborators at Bloomberg, and together we decided on three key features for the MVP:\n\n- Usage Limiting – to control LLM usage based on word tokens\n- Unified API – to simplify client integration with multiple LLM providers\n- Upstream Authorization – to configure Authorization to multiple upstream LLM providers\nWhat other features are you looking for? Get in touch with us to share your use case and define the future of Envoy AI Gateway.\n\nWe are really excited about these features being part of Envoy. They will benefit those integrating with LLM providers and, ultimately, also Gateway users for general API request traffic.\n\nWhen it comes to AI Gateway features, we have chosen to collaborate and build within the CNCF Envoy project because we believe multi-company, open-source projects benefit the entire industry by enabling innovation without creating single vendor risk."}]}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"authors":[{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg","count":1}]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"author":{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg","count":1},"listMetadata":{"permalink":"/blog/authors/missberg","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"metadata":{"permalink":"/blog","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"tags":[{"label":"news","permalink":"/blog/tags/news","count":1}]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"tag":{"label":"news","permalink":"/blog/tags/news","allTagsPath":"/blog/tags","count":1,"unlisted":false},"listMetadata":{"permalink":"/blog/tags/news","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"version":{"pluginId":"default","version":"current","label":"Next","banner":null,"badge":false,"noIndex":false,"className":"docs-version-current","isLast":true,"docsSidebars":{"tutorialSidebar":[{"type":"link","label":"Envoy AI Gateway Docs","href":"/docs/intro","docId":"intro","unlisted":false}]},"docs":{"intro":{"id":"intro","title":"Envoy AI Gateway Docs","description":"Version 0.1.0 is expected end of January 2025.","sidebar":"tutorialSidebar"}}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"allContent":{"docusaurus-plugin-content-docs":{"default":{"loadedVersions":[{"versionName":"current","label":"Next","banner":null,"badge":false,"noIndex":false,"className":"docs-version-current","path":"/docs","tagsPath":"/docs/tags","isLast":true,"routePriority":-1,"sidebarFilePath":"/Users/erica/repos/ai-gateway/site/sidebars.ts","contentPath":"/Users/erica/repos/ai-gateway/site/docs","contentPathLocalized":"/Users/erica/repos/ai-gateway/site/i18n/en/docusaurus-plugin-content-docs/current","docs":[{"id":"intro","title":"Envoy AI Gateway Docs","description":"Version 0.1.0 is expected end of January 2025.","source":"@site/docs/intro.md","sourceDirName":".","slug":"/intro","permalink":"/docs/intro","draft":false,"unlisted":false,"tags":[],"version":"current","sidebarPosition":1,"frontMatter":{"sidebar_position":1},"sidebar":"tutorialSidebar"}],"drafts":[],"sidebars":{"tutorialSidebar":[{"type":"doc","id":"intro"}]}}]}},"docusaurus-plugin-content-blog":{"default":{"blogSidebarTitle":"Recent posts","blogPosts":[{"id":"introducing-envoy-ai-gateway","metadata":{"permalink":"/blog/introducing-envoy-ai-gateway","source":"@site/blog/2024-10-18-introducing-envoy-ai-gw.md","title":"Introducing Envoy AI Gateway","description":"Open collaboration to bring AI Gateway features to the Envoy community","date":"2024-10-18T00:00:00.000Z","tags":[{"inline":true,"label":"news","permalink":"/blog/tags/news"}],"readingTime":2.685,"hasTruncateMarker":true,"authors":[{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg"}],"frontMatter":{"slug":"introducing-envoy-ai-gateway","title":"Introducing Envoy AI Gateway","authors":["missberg"],"tags":["news"]},"unlisted":false},"content":"Open collaboration to bring AI Gateway features to the Envoy community\n\n<!-- truncate -->\n\nThe industry is embracing Generative AI functionality, and we need to evolve how we handle traffic on an industry-wide scale. Keeping AI traffic handling features exclusive to enterprise licenses is counterproductive to the industry’s needs. This approach limits incentives to a single commercial entity and its customers. Even single-company open-source initiatives do not promote open multi-company collaboration.\n\nA shared challenge like this presents an opportunity for open collaboration to build the necessary features. We believe bringing together different use cases and requirements through open collaboration will lead to better solutions and accelerate innovation. The industry will benefit from diverse expertise and experiences by openly collaborating on software across companies and industries.\n\nThat is why Tetrate and Bloomberg have started an open collaboration to bring critical features for this new era of Gen AI integration. Collaborating openly in the Envoy community, bringing AI traffic handling features to Envoy, via Envoy Gateway and Envoy Proxy.\n\n## Why we need AI traffic handling features\nWhat makes traffic to LLM models different from traditional API traffic?\n\nOn the surface it appears similar. Traffic comes from a client app that is making an API request, and this request has to get to the provider that hosts the LLM model.\n\nHowever, it is different. Managing LLM traffic from multiple apps, to multiple LLM providers, introduces new and different challenges where traditional API Gateway features fall short.\n\nFor example, traditional rate-limiting based on number of requests doesn’t work for controlling usage of LLM providers as they’re computationally complex services. To measure usage LLM providers tokenize the words in the request message and response message, and count the number of tokens used. This count gives a good approximation of the computational complexity and cost of serving the request.\n\nBeyond controlling usage of LLMs there are many more challenges relating to ease of integration and high-availability architectures. It’s no longer enough to just optimize for quality of service alone, adopters must consider costs of usage in real time. As adopters of Gen AI look for Gateway solutions to handle these challenges for their system, they often find the necessary features locked behind enterprise licenses.\n\n## Three key MVP features\nNow, let’s look at how handling AI traffic poses new challenges for Gateways. There are several features we discussed together with our collaborators at Bloomberg, and together we decided on three key features for the MVP:\n\n- Usage Limiting – to control LLM usage based on word tokens\n- Unified API – to simplify client integration with multiple LLM providers\n- Upstream Authorization – to configure Authorization to multiple upstream LLM providers\nWhat other features are you looking for? Get in touch with us to share your use case and define the future of Envoy AI Gateway.\n\nWe are really excited about these features being part of Envoy. They will benefit those integrating with LLM providers and, ultimately, also Gateway users for general API request traffic.\n\nWhen it comes to AI Gateway features, we have chosen to collaborate and build within the CNCF Envoy project because we believe multi-company, open-source projects benefit the entire industry by enabling innovation without creating single vendor risk."}],"blogListPaginated":[{"items":["introducing-envoy-ai-gateway"],"metadata":{"permalink":"/blog","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}],"blogTags":{"/blog/tags/news":{"inline":true,"label":"news","permalink":"/blog/tags/news","items":["introducing-envoy-ai-gateway"],"pages":[{"items":["introducing-envoy-ai-gateway"],"metadata":{"permalink":"/blog/tags/news","page":1,"postsPerPage":10,"totalPages":1,"totalCount":1,"blogDescription":"Blog","blogTitle":"Blog"}}],"unlisted":false}},"blogTagsListPath":"/blog/tags","authorsMap":{"missberg":{"name":"Erica Hughberg","title":"Envoy AI Gateway Maintainer","url":"https://github.com/missBerg","page":{"permalink":"/blog/authors/missberg"},"socials":{"github":"https://github.com/missBerg"},"imageURL":"https://github.com/missBerg.png","key":"missberg"}}}},"docusaurus-plugin-content-pages":{"default":[{"type":"jsx","permalink":"/","source":"@site/src/pages/index.tsx"},{"type":"mdx","permalink":"/markdown-page","source":"@site/src/pages/markdown-page.md","title":"Markdown page example","description":"You don't need React to write simple standalone pages.","frontMatter":{"title":"Markdown page example"},"unlisted":false}]},"docusaurus-plugin-debug":{},"docusaurus-theme-classic":{},"docusaurus-bootstrap-plugin":{},"docusaurus-mdx-fallback-plugin":{}}}