/* Fonts */
@font-face {
  font-family: "Faktum";
  src: url("/assets/fonts/Faktum-Regular.woff2") format("woff2"),
    url("/assets/fonts/Faktum-Regular.woff") format("woff");
  font-weight: normal;
  font-style: normal;
  font-display: swap;
}

@font-face {
  font-family: "Faktum";
  src: url("/assets/fonts/Faktum-Medium.woff2") format("woff2"),
    url("/assets/fonts/Faktum-Medium.woff") format("woff");
  font-weight: 500;
  font-style: normal;
  font-display: swap;
}

@font-face {
  font-family: "Faktum";
  src: url("/assets/fonts/Faktum-SemiBold.woff2") format("woff2"),
    url("/assets/fonts/Faktum-SemiBold.woff") format("woff");
  font-weight: 600;
  font-style: normal;
  font-display: swap;
}

h1,
h2,
h3,
h4,
h5,
h6 {
  font-family: "Faktum", "Inter", sans-serif;
}

/* CTA button */
#topbar-cta-button > a > span {
  border-radius: 4px;
}

/* Dark mode styling */
.dark main > div > span.inset-0:first-of-type:before {
  content: "";
  display: block;
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  height: 100%;
  background-image: url("https://mintlify.s3.us-west-1.amazonaws.com/octoai/assets/images/gradient-bg.svg");
  background-repeat: no-repeat;
  background-size: 100% 67%;
  background-position: top center;
  opacity: 0.5;
}
.dark main > div > span.inset-0:first-of-type:after {
  content: "";
  display: block;
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  height: 100%;
  background-image: url("https://mintlify.s3.us-west-1.amazonaws.com/octoai/assets/images/noise.png");
  background-repeat: no-repeat;
  background-size: 100% 67%;
  background-position: top center;
  opacity: 0.4;
}

.dark main > div > span.inset-0:last-of-type:before {
  content: "";
  display: block;
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  height: 67%;
  background: linear-gradient(0deg, #111213 0%, rgba(17, 18, 19, 0) 37.07%);
}

:is(.dark .dark\:text-gray-400) {
  color: #e0e0e0;
}

.dark #navbar {
  background-color: rgba(255, 255, 255, 0.051);
}

.dark .stable-scrollbar-gutter {
  background: linear-gradient(
    270deg,
    rgb(17 18 19 / 50%) 0%,
    rgba(17, 18, 19, 0) 100%
  );
}

.dark .stable-scrollbar-gutter .sticky {
  background: transparent;
}

.dark a {
  color: #d0d1d2;
}

/* CTA button */
.dark #topbar-cta-button > a > span {
  background: rgba(255, 255, 255, 0.05);
  box-shadow: 0px 0px 2px 0px rgba(255, 255, 255, 0.5);
  border: unset;
}

.dark #topbar-cta-button > a > div > span,
.dark #topbar-cta-button > a > div > svg {
  color: #f7f7f7;
}

/* Search */
/* Highlighted item */
.dark #headlessui-portal-root li[data-headlessui-state="active"] {
  background: #1f2123;
}

.dark #headlessui-portal-root li[data-headlessui-state="active"] p,
.dark #headlessui-portal-root li[data-headlessui-state="active"] span {
  color: white;
}

.dark .related > ul > li > a {
  color: #e0e0e0;
}

.dark .related > ul > li > a:hover {
  color: #e0e0e0;
}

/* Light mode styling */
main > div > span.inset-0:first-of-type:before {
  content: "";
  display: block;
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  height: 100%;
  background-image: linear-gradient(
      180deg,
      #f7f8f8 0%,
      rgba(245, 245, 244, 0) 46.59%
    ),
    linear-gradient(
      0deg,
      rgba(246, 245, 244, 0.75) 0%,
      rgba(246, 245, 244, 0.75) 100%
    ),
    radial-gradient(
      85.22% 111.48% at 54.03% 97.36%,
      rgba(246, 245, 244, 0) 0%,
      #f6f5f4 86.92%
    ),
    radial-gradient(
      52.79% 78.26% at 38.74% 71.48%,
      #fe6756 0%,
      rgba(254, 103, 86, 0) 100%
    ),
    radial-gradient(
      88.13% 69.33% at 67.53% 67.77%,
      #fea756 0%,
      rgba(254, 167, 86, 0) 100%
    );
  background-repeat: no-repeat;
  background-size: 100% 67%;
  background-position: top center;
  opacity: 1;
}

main > div > span.inset-0:last-of-type:before {
  content: "";
  display: block;
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  height: 67%;
  background: linear-gradient(0deg, #fafaf9 0%, rgba(250, 250, 249, 0) 37.07%);
}

/* CTA button */
#topbar-cta-button > a > span {
  background: #1c1917;
  border: unset;
}

/* Search */

/* Icon */
#headlessui-portal-root ul > li > .rounded-md,
#headlessui-portal-root
  div.flex.items-center.space-x-5.text-sm
  > div.rounded-md.ring-1.shadow-sm {
  background: #0096ff;
}

#headlessui-portal-root
  div.flex.items-center.space-x-5.text-sm
  > div.rounded-md.ring-1.shadow-sm
  svg {
  fill: white;
}

/* Highlighted item */
#headlessui-portal-root li[data-headlessui-state="active"] {
  background: #e6e5e5;
}

#headlessui-portal-root li[data-headlessui-state="active"] p,
#headlessui-portal-root li[data-headlessui-state="active"] span {
  color: black;
}

/* AI sources */
#headlessui-portal-root
  .mt-5.px-7.text-sm.text-gray-400
  > .mt-2.flex.items-center.text-xs
  > a
  > div {
  background: #0096ff;
  color: white;
}

#headlessui-portal-root
  .border-0.bg-transparent.pr-6.text-sm.text-transparent.bg-clip-text.bg-gradient-to-r.from-primary.to-primary-dark {
  color: #0096ff;
}

.img-two-col > p {
  @media (min-width: 600px) {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 16px;
  }
}

/* Related list */

.related > ul > li::before {
  background-color: #7b818a;
  width: 6px;
  height: 6px;
  top: 10px;
}

.related > ul > li > a {
  border-bottom: unset;
}

.related > ul > li > a:hover {
  text-decoration: underline;
}

.related > ul > li {
  padding-left: 20px !important;
}

.related > ul {
  padding-left: 8px;
}

.related > ul > li > a {
  color: #3e4346;
}

.changelog {
  padding-bottom: 12px;
  position: relative;
}

.changelog::after {
  content: "";
  position: absolute;
  display: block;
  width: 100%;
  height: 100%;
  bottom: -10px;
  left: 0;
  border-bottom: 1px solid rgb(208, 208, 208);
  pointer-events: none;
}

.changelog:last-of-type::after {
  display: none;
}

.dark .changelog::after {
  border-bottom: 1px solid rgb(37, 41, 45);
}

.changelog-status {
  display: flex;
  justify-content: flex-start;
  align-items: center;
  gap: 12px;
  padding-top: 32px;
  margin-bottom: -65px;
  position: relative;
}

.changelog-status span {
  text-transform: uppercase;
  font-size: 14px;
  letter-spacing: 2px;
  font-weight: bold;
  color: #646464;
}

.dark .changelog-status span {
  color: #bbbbbb;
}

.not-prose.rounded-xl.mt-5.mb-8 > .group.z-10 > .absolute.top-11.rounded-lg {
  background-color: #3e4346;
}


Support

OctoAI

API Reference

Integrations

Release Notes

Community

Sign up

Login

Inference models

Start using our GenAI Solutions in one minute.

Quickstart

Pricing & billing

All OctoAI text generation models are accessible via REST API. Learn how to implement with easy to follow code examples.

Text Gen REST API

If you've been using GPT-3.5 or GPT-4, switching to Octo AI is easy!

Migrate from OpenAI to OctoAI in 3 lines of code

Image Gen

Image Gen REST API

Video Gen

Video Gen REST API

Background removal takes an existing image you provide and removes those parts of the image considered to be “background.

Background Removal

Background Removal REST API

In addition to image generation, OctoAI can also upscale images to higher resolutions.

Upscaling

Upscaling REST API

Adetailer takes an existing image you provide, detects faces and hands and fixes them.

Adetailer

Adetailer REST API

Create an API token

How to create an OctoAI API token

The OctoAI Text Gen Solution offers market-leading price and performance for a growing list open source LLMs including Llama2, CodeLlama, and Mistral (see Supported models section below). We offer a WebUI playground, API endpoints, and Python/Typescript SDK solution for interacting with these models. All of our endpoints are callable via chat completions format currently popular in the industry (see API documentation).

Getting started

Getting started with our Text Gen Solution

Use the OctoAI Chat Completion API to easily generate text.

Text Gen Python SDK

The OctoAI Text Gen TypeScript SDK supports both the Chat Completions API and the Completions API.

Text Gen TypeScript SDK

Ensure Text Gen outputs fit into your desired JSON schema.

Using JSON mode with Text Gen endpoints

An LLM to guard your AI applications from misuse.

Using Llama Guard to moderate text

Fast and easy document parsing and embedding using Unstuctured.io and OctoAI.

Using Unstructured.io for embedding documents

The OctoAI Media Gen Solution offers access to the fastest and most customizable Stable Diffusion models including Stable Video Diffusion 1.1, Stable Diffusion XL and 1.5 for image-to-video, text-to-image, image-to-image use cases and more. We offer a WebUI playground, API endpoints, and Python/Typescript SDKs for interacting with these models.

Getting started with our Media Gen Solution

You can tweak your images using various customizations available within OctoAI Media Gen solution including checkpoints, LoRAs, textual inversions and ControlNets

Overview

Custom checkpoints are fine-tuned versions of the original model and allow users to refine customizations while creating images or videos.

Checkpoints

LoRAs for image or video AI models are custom weights applied to a base checkpoint. LoRAs are a way to make highly customized AI images or videos.

LoRAs

Customize your images on OctoAI using Textual inversions, which are embeddings that represent custom subjects.

Textual Inversions

OctoAI allows for prompt weighting, the emphasis or de-emphasis of certain words or phrases, in prompts to create customized images.

Prompt Weighting

OctoAI's asset library is pre-populated with the most popular available ControlNets which allow added image input to influence and customize the image generation.

ControlNets

A new technology called SDXL Lighting enables high-quality image generations in less than 1 second

SDXL Lighting

SDXL Lighting for blazing fast generations

SSD-1B, a distilled version of SDXL generates images 50% faster than SDXL.

Stable Diffusion SSD

Stable Diffusion Samplers utilize diffusion models to iteratively refine noise, producing high-quality images with remarkable fidelity and coherence.

Samplers

Image Generator Python client

Create custom assets with OctoAI's fine-tuning of Stable Diffusion models.

Fine-tuning Stable Diffusion

How to create a fine-tuned LoRA using OctoAI's TypeScript SDK

TypeScript SDK

TypeScript SDK Fine-tuning

Uploading custom assets

Uploading custom assets to OctoAI's Asset Library

Asset Library Python client

Asset Library in the Python SDK

Asset Library TypeScript client

Asset Library in the TypeScript SDK

Edit and resort image to fix flaws or remove unwanted objects from an image.

Inpainting

Extend beyond the canvas of an existing image.

Outpainting

Automatically fix faces and hands using Adetailer

Adetailer API

Seamlessly integrate a photo's subject into AI-generated output and eliminate the need to create time-consuming custom facial finetunes.

Photo Merge

Remove parts of the image considered to be background.

Getting started with our Compute Service

Create an endpoint from an existing container

Pulling containers from a private registry

Setting up account-wide secrets for your custom endpoints

Create a container and endpoint using the CLI

Advanced: build a container from scratch in Python

Healthcheck path in custom containers

OctoAI's GenAI production stack in your environment.

OctoStack

Private networking with OctoAI's SecureLink.

SecureLink guide

CLI installation

A reference guide for all CLI commands and their behaviors.

CLI reference

Python SDK installation

Python SDK installation & setup

Python SDK inference

Upgrading from octoai-sdk

Upgrading from the octoai-sdk

TypeScript SDK installation

TypeScript SDK installation & setup

TypeScript SDK inference

TypeScript SDK reference

Browse OctoAI's partner integrations to help you build your custom solution.

All OctoAI Integrations

Langchain developers can leverage OctoAI LLM and embedding endpoints to easily access efficient compute across a wide selection of LLMs.

LangChain

LangChain Integration

This integration allows a developer using Canopy to choose from the best LLMs on OctoAI.

Pinecone

Pinecone (Canopy) Integration

The OctoAIEmbedingEncoder is available, so documents parsed with Unstructured can easily be embedded with the OctoAI embeddings endpoint.

Unstructured.io

Unstructured.io Integration

The OpenRouter API users can leverage OctoAI's best in class LLM endpoints.

OpenRouter

OpenRouter Integration

A developer building AI apps can now access highly optimized LLMs and Embeddings models on OctoAI.

LlamaIndex

LlamaIndex Integration

Rate limits are restrictions on the rate and individual account can submit inference requests.

Rate limits

Privacy & security

There are multiple ways in which customers can build a RAG application on OctoAI.

RAG with OctoAI

How to implement RAG with OctoAI

OctoAI currently runs on AWS and GCP hardware in several regions

Service regions

The default setup is 1 user profile per 1 OctoAI account. We can easily help you setup multiple users within a single account if you have a team or organization with multiple users. This will allow your team to manage endpoints, view logs & metrics, and securely share access to the account.

Multi-user accounts

Below are the open source technologies we make use of and their associated licenses

Organization	Use Cases	Model Name	API Model String	Context Length
Meta	Chat	Llama2-Chat (13B)	llama-2-13b-chat	4,096
Meta	Chat	Llama2-Chat (70B)	llama-2-70b-chat	4,096
Meta	Chat	Llama3-Instruct (8B)	meta-llama-3-8b-instruct	8,192
Meta	Chat	Llama3-Instruct (70B)	meta-llama-3-70b-instruct	8,192
Meta	Coding	Codellama-Instruct (7B)	codellama-7b-instruct	16,384
Meta	Coding	Codellama-Instruct (13B)	codellama-13b-instruct	16,384
Meta	Coding	Codellama-Instruct (34B)	codellama-34b-instruct	16,384
Mistral	Chat, Coding	Mistral Instruct v0.2 (7B)	mistral-7b-instruct	32,768
Nous Research	Chat, Coding	Nous Hermes 2 Pro Mistral (7B)	hermes-2-pro-mistral-7b	32,768
Mistral	Chat, Coding	Mixtral Instruct (8x7B)	mixtral-8x7b-instruct	32,768
Nous Research	Content Moderation	Nous Hermes 2 Mixtral DPO (8x7B)	nous-hermes-2-mixtral-8x7b-dpo	32,768
Mistral	Chat, Coding	Mixtral Instruct (8x22B)	mixtral-8x22b-instruct	65,536
Meta	Content Moderation	Llama Guard	llamaguard-7b	4,096
Alibaba DAMO	Embedding	GTE Large	thenlper/gte-large	n/a

Service	Model	API Model String
Image Gen	Stable Diffusion v1.5	sd
Image Gen	Stable Diffusion XL v1.0	sdxl
Image Gen	Segmind Stable Diffusion	ssd
Image Gen	ControlNet SD v1.5	controlnet-sd15
Image Gen	ControlNet SDXL	controlnet-sdxl
Image Animation	Stable Video Diffusion v1.1	svd
Background Removal	IS-Net	background-removal
Upscaling	REAL-ESRGAN x4 Plus	real-esrgan-x4-plus
Upscaling	REAL-ESRGAN x4 v3	real-esrgan-x4-v3
Upscaling	REAL-ESRGAN x4 v3 WDN	real-esrgan-x4-v3-wdn
Upscaling	REAL-ESRGAN Anime Video v3	real-esrgan-animevideo-v3
Upscaling	REAL-ESRGAN x4 Plus Anime	real-esrgan-x4-plus-anime
Upscaling	REAL-ESRGAN x2 Plus	real-esrgan-x2-plus
Adetailer	Face YOLOv8n	face_yolov8n
Adetailer	Hand YOLOv8n	hand_yolov8n
Adetailer	Face Full MediaPipe	face_full_mediapipe
Adetailer	Face Short MediaPipe	face_short_mediapipe
Adetailer	Face Mesh MediaPipe	face_mesh_mediapipe
Adetailer	Eyes Mesh MediaPipe	eyes_mesh_mediapipe

Quickstart

Text Gen Solution

Media Gen Solution

Compute Service

Private Deployment

CLI

Python SDK

TypeScript SDK

FAQs

Inference models

Serverless Endpoints

Text Gen Models

Media Gen Models

Quickstart

Text Gen Solution

Media Gen Solution

Compute Service

Private Deployment

CLI

Python SDK

TypeScript SDK

FAQs

​Serverless Endpoints

​Text Gen Models

​Media Gen Models

Serverless Endpoints

Text Gen Models

Media Gen Models