Caching

Method: cachedContents.create

Creates CachedContent resource.

Endpoint

post https://generativelanguage.googleapis.com/v1beta/cachedContents

Request body

The request body contains an instance of CachedContent.

Example request

Basic

Python

document = genai.upload_file(path=media / "a11.txt")
model_name = "gemini-1.5-flash-001"
cache = genai.caching.CachedContent.create(
    model=model_name,
    system_instruction="You are an expert analyzing transcripts.",
    contents=[document],
)
print(cache)

model = genai.GenerativeModel.from_cached_content(cache)
response = model.generate_content("Please summarize this transcript")
print(response.text)

Node.js

// Make sure to include these imports:
// import { GoogleAICacheManager, GoogleAIFileManager } from "@google/generative-ai/server";
// import { GoogleGenerativeAI } from "@google/generative-ai";
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",
});

const cacheResult = await cacheManager.create({
  model: "models/gemini-1.5-flash-001",
  contents: [
    {
      role: "user",
      parts: [
        {
          fileData: {
            fileUri: uploadResult.file.uri,
            mimeType: uploadResult.file.mimeType,
          },
        },
      ],
    },
  ],
});

console.log(cacheResult);

const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModelFromCachedContent(cacheResult);
const result = await model.generateContent(
  "Please summarize this transcript.",
);
console.log(result.response.text());

From name

Python

document = genai.upload_file(path=media / "a11.txt")
model_name = "gemini-1.5-flash-001"
cache = genai.caching.CachedContent.create(
    model=model_name,
    system_instruction="You are an expert analyzing transcripts.",
    contents=[document],
)
cache_name = cache.name  # Save the name for later

# Later
cache = genai.caching.CachedContent.get(cache_name)
apollo_model = genai.GenerativeModel.from_cached_content(cache)
response = apollo_model.generate_content("Find a lighthearted moment from this transcript")
print(response.text)

Node.js

// Make sure to include these imports:
// import { GoogleAICacheManager, GoogleAIFileManager } from "@google/generative-ai/server";
// import { GoogleGenerativeAI } from "@google/generative-ai";
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",
});

const cacheResult = await cacheManager.create({
  model: "models/gemini-1.5-flash-001",
  contents: [
    {
      role: "user",
      parts: [
        {
          fileData: {
            fileUri: uploadResult.file.uri,
            mimeType: uploadResult.file.mimeType,
          },
        },
      ],
    },
  ],
});
const cacheName = cacheResult.name; // Save the name for later.

// Later
const getCacheResult = await cacheManager.get(cacheName);
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModelFromCachedContent(getCacheResult);
model.generateContent("Please summarize this transcript.");

From chat

Python

model_name = "gemini-1.5-flash-001"
system_instruction = "You are an expert analyzing transcripts."

model = genai.GenerativeModel(model_name=model_name, system_instruction=system_instruction)
chat = model.start_chat()
document = genai.upload_file(path=media / "a11.txt")
response = chat.send_message(["Hi, could you summarize this transcript?", document])
print("\n\nmodel:  ", response.text)
response = chat.send_message(
    ["Okay, could you tell me more about the trans-lunar injection"]
)
print("\n\nmodel:  ", response.text)

# To cache the conversation so far, pass the chat history as the list of "contents".
cache = genai.caching.CachedContent.create(
    model=model_name,
    system_instruction=system_instruction,
    contents=chat.history,
)
model = genai.GenerativeModel.from_cached_content(cached_content=cache)

# Continue the chat where you left off.
chat = model.start_chat()
response = chat.send_message(
    "I didn't understand that last part, could you explain it in simpler language?"
)
print("\n\nmodel:  ", response.text)

Node.js

// Make sure to include these imports:
// import { GoogleGenerativeAI } from "@google/generative-ai";
// import { GoogleAICacheManager, GoogleAIFileManager } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash-001" });
const chat = model.startChat();

const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",
});

let result = await chat.sendMessage([
  "Hi, could you summarize this transcript?",
  {
    fileData: {
      fileUri: uploadResult.file.uri,
      mimeType: uploadResult.file.mimeType,
    },
  },
]);
console.log(`\n\nmodel: ${result.response.text()}`);
result = await chat.sendMessage(
  "Okay, could you tell me more about the trans-lunar injection",
);
console.log(`\n\nmodel: ${result.response.text()}`);

const cacheResult = await cacheManager.create({
  model: "models/gemini-1.5-flash-001",
  contents: await chat.getHistory(),
});

const newModel = genAI.getGenerativeModelFromCachedContent(cacheResult);

const newChat = newModel.startChat();
result = await newChat.sendMessage(
  "I didn't understand that last part, could you explain it in simpler language?",
);
console.log(`\n\nmodel: ${result.response.text()}`);

Response body

If successful, the response body contains a newly created instance of CachedContent.

Method: cachedContents.list

Lists CachedContents.

Endpoint

get https://generativelanguage.googleapis.com/v1beta/cachedContents

Query parameters

pageSize integer

Optional. The maximum number of cached contents to return. The service may return fewer than this value. If unspecified, some default (under maximum) number of items will be returned. The maximum value is 1000; values above 1000 will be coerced to 1000.

pageToken string

Optional. A page token, received from a previous cachedContents.list call. Provide this to retrieve the subsequent page.

When paginating, all other parameters provided to cachedContents.list must match the call that provided the page token.

Request body

The request body must be empty.

Response body

Response with CachedContents list.

If successful, the response body contains data with the following structure:

Fields
cachedContents[] object (CachedContent)

List of cached contents.

nextPageToken string

A token, which can be sent as pageToken to retrieve the next page. If this field is omitted, there are no subsequent pages.

JSON representation
{
  "cachedContents": [
    {
      object (CachedContent)
    }
  ],
  "nextPageToken": string
}

Method: cachedContents.get

Reads CachedContent resource.

Endpoint

get https://generativelanguage.googleapis.com/v1beta/{name=cachedContents/*}

Path parameters

name string

Required. The resource name referring to the content cache entry. Format: cachedContents/{id} It takes the form cachedContents/{cachedcontent}.

Request body

The request body must be empty.

Example request

Python

document = genai.upload_file(path=media / "a11.txt")
model_name = "gemini-1.5-flash-001"
cache = genai.caching.CachedContent.create(
    model=model_name,
    system_instruction="You are an expert analyzing transcripts.",
    contents=[document],
)
print(genai.caching.CachedContent.get(name=cache.name))

Node.js

// Make sure to include these imports:
// import { GoogleAICacheManager, GoogleAIFileManager } from "@google/generative-ai/server";
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",
});

const cacheResult = await cacheManager.create({
  model: "models/gemini-1.5-flash-001",
  contents: [
    {
      role: "user",
      parts: [
        {
          fileData: {
            fileUri: uploadResult.file.uri,
            mimeType: uploadResult.file.mimeType,
          },
        },
      ],
    },
  ],
});
const cacheGetResult = await cacheManager.get(cacheResult.name);
console.log(cacheGetResult);

Response body

If successful, the response body contains an instance of CachedContent.

Method: cachedContents.patch

Updates CachedContent resource (only expiration is updatable).

Endpoint

patch https://generativelanguage.googleapis.com/v1beta/{cachedContent.name=cachedContents/*}

PATCH https://generativelanguage.googleapis.com/v1beta/{cachedContent.name=cachedContents/*}

Path parameters

cachedContent.name string

Optional. Identifier. The resource name referring to the cached content. Format: cachedContents/{id} It takes the form cachedContents/{cachedcontent}.

Query parameters

updateMask string (FieldMask format)

The list of fields to update.

This is a comma-separated list of fully qualified names of fields. Example: "user.displayName,photo".

Request body

The request body contains an instance of CachedContent.

Example request

Python

import datetime

document = genai.upload_file(path=media / "a11.txt")
model_name = "gemini-1.5-flash-001"
cache = genai.caching.CachedContent.create(
    model=model_name,
    system_instruction="You are an expert analyzing transcripts.",
    contents=[document],
)

# You can update the ttl
cache.update(ttl=datetime.timedelta(hours=2))
print(f"After update:\n {cache}")

# Or you can update the expire_time
cache.update(expire_time=datetime.datetime.now() + datetime.timedelta(minutes=15))

Node.js

// Make sure to include these imports:
// import { GoogleAICacheManager, GoogleAIFileManager } from "@google/generative-ai/server";
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",
});

const cacheResult = await cacheManager.create({
  model: "models/gemini-1.5-flash-001",
  contents: [
    {
      role: "user",
      parts: [
        {
          fileData: {
            fileUri: uploadResult.file.uri,
            mimeType: uploadResult.file.mimeType,
          },
        },
      ],
    },
  ],
});
console.log("initial cache data:", cacheResult);
const cacheUpdateResult = await cacheManager.update(cacheResult.name, {
  cachedContent: {
    // 2 hours
    ttlSeconds: 60 * 60 * 2,
  },
});
console.log("updated cache data:", cacheUpdateResult);

Response body

If successful, the response body contains an instance of CachedContent.

Method: cachedContents.delete

Deletes CachedContent resource.

Endpoint

delete https://generativelanguage.googleapis.com/v1beta/{name=cachedContents/*}

Path parameters

name string

Required. The resource name referring to the content cache entry Format: cachedContents/{id} It takes the form cachedContents/{cachedcontent}.

Request body

The request body must be empty.

Example request

Python

document = genai.upload_file(path=media / "a11.txt")
model_name = "gemini-1.5-flash-001"
cache = genai.caching.CachedContent.create(
    model=model_name,
    system_instruction="You are an expert analyzing transcripts.",
    contents=[document],
)
cache.delete()

Node.js

// Make sure to include these imports:
// import { GoogleAICacheManager, GoogleAIFileManager } from "@google/generative-ai/server";
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",
});

const cacheResult = await cacheManager.create({
  model: "models/gemini-1.5-flash-001",
  contents: [
    {
      role: "user",
      parts: [
        {
          fileData: {
            fileUri: uploadResult.file.uri,
            mimeType: uploadResult.file.mimeType,
          },
        },
      ],
    },
  ],
});
await cacheManager.delete(cacheResult.name);

Response body

If successful, the response body is empty.

REST Resource: cachedContents

Resource: CachedContent

Content that has been preprocessed and can be used in subsequent request to GenerativeService.

Cached content can be only used with model it was created for.

JSON representation
{
  "contents": [
    {
      object (Content)
    }
  ],
  "tools": [
    {
      object (Tool)
    }
  ],
  "createTime": string,
  "updateTime": string,
  "usageMetadata": {
    object (UsageMetadata)
  },

  // Union field expiration can be only one of the following:
  "expireTime": string,
  "ttl": string
  // End of list of possible types for union field expiration.
  "name": string,
  "displayName": string,
  "model": string,
  "systemInstruction": {
    object (Content)
  },
  "toolConfig": {
    object (ToolConfig)
  }
}
Fields
contents[] object (Content)

Optional. Input only. Immutable. The content to cache.

tools[] object (Tool)

Optional. Input only. Immutable. A list of Tools the model may use to generate the next response

createTime string (Timestamp format)

Output only. Creation time of the cache entry.

A timestamp in RFC3339 UTC "Zulu" format, with nanosecond resolution and up to nine fractional digits. Examples: "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z".

updateTime string (Timestamp format)

Output only. When the cache entry was last updated in UTC time.

A timestamp in RFC3339 UTC "Zulu" format, with nanosecond resolution and up to nine fractional digits. Examples: "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z".

usageMetadata object (UsageMetadata)

Output only. Metadata on the usage of the cached content.

Union field expiration. Specifies when this resource will expire. expiration can be only one of the following:
expireTime string (Timestamp format)

Timestamp in UTC of when this resource is considered expired. This is always provided on output, regardless of what was sent on input.

A timestamp in RFC3339 UTC "Zulu" format, with nanosecond resolution and up to nine fractional digits. Examples: "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z".

ttl string (Duration format)

Input only. New TTL for this resource, input only.

A duration in seconds with up to nine fractional digits, ending with 's'. Example: "3.5s".

name string

Optional. Identifier. The resource name referring to the cached content. Format: cachedContents/{id}

displayName string

Optional. Immutable. The user-generated meaningful display name of the cached content. Maximum 128 Unicode characters.

model string

Required. Immutable. The name of the Model to use for cached content Format: models/{model}

systemInstruction object (Content)

Optional. Input only. Immutable. Developer set system instruction. Currently text only.

toolConfig object (ToolConfig)

Optional. Input only. Immutable. Tool config. This config is shared for all tools.

Content

The base structured datatype containing multi-part content of a message.

A Content includes a role field designating the producer of the Content and a parts field containing multi-part data that contains the content of the message turn.

JSON representation
{
  "parts": [
    {
      object (Part)
    }
  ],
  "role": string
}
Fields
parts[] object (Part)

Ordered Parts that constitute a single message. Parts may have different MIME types.

role string

Optional. The producer of the content. Must be either 'user' or 'model'.

Useful to set for multi-turn conversations, otherwise can be left blank or unset.

Part

A datatype containing media that is part of a multi-part Content message.

A Part consists of data which has an associated datatype. A Part can only contain one of the accepted types in Part.data.

A Part must have a fixed IANA MIME type identifying the type and subtype of the media if the inlineData field is filled with raw bytes.

JSON representation
{

  // Union field data can be only one of the following:
  "text": string,
  "inlineData": {
    object (Blob)
  },
  "functionCall": {
    object (FunctionCall)
  },
  "functionResponse": {
    object (FunctionResponse)
  },
  "fileData": {
    object (FileData)
  },
  "executableCode": {
    object (ExecutableCode)
  },
  "codeExecutionResult": {
    object (CodeExecutionResult)
  }
  // End of list of possible types for union field data.
}
Fields

Union field data.

data can be only one of the following:

text string

Inline text.

inlineData object (Blob)

Inline media bytes.

functionCall object (FunctionCall)

A predicted FunctionCall returned from the model that contains a string representing the FunctionDeclaration.name with the arguments and their values.

functionResponse object (FunctionResponse)

The result output of a FunctionCall that contains a string representing the FunctionDeclaration.name and a structured JSON object containing any output from the function is used as context to the model.

fileData object (FileData)

URI based data.

executableCode object (ExecutableCode)

Code generated by the model that is meant to be executed.

codeExecutionResult object (CodeExecutionResult)

Result of executing the ExecutableCode.

Blob

Raw media bytes.

Text should not be sent as raw bytes, use the 'text' field.

JSON representation
{
  "mimeType": string,
  "data": string
}
Fields
mimeType string

The IANA standard MIME type of the source data. Examples: - image/png - image/jpeg If an unsupported MIME type is provided, an error will be returned. For a complete list of supported types, see Supported file formats.

data string (bytes format)

Raw bytes for media formats.

A base64-encoded string.

FunctionCall

A predicted FunctionCall returned from the model that contains a string representing the FunctionDeclaration.name with the arguments and their values.

JSON representation
{
  "name": string,
  "args": {
    object
  }
}
Fields
name string

Required. The name of the function to call. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 63.

args object (Struct format)

Optional. The function parameters and values in JSON object format.

FunctionResponse

The result output from a FunctionCall that contains a string representing the FunctionDeclaration.name and a structured JSON object containing any output from the function is used as context to the model. This should contain the result of aFunctionCall made based on model prediction.

JSON representation
{
  "name": string,
  "response": {
    object
  }
}
Fields
name string

Required. The name of the function to call. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 63.

response object (Struct format)

Required. The function response in JSON object format.

FileData

URI based data.

JSON representation
{
  "mimeType": string,
  "fileUri": string
}
Fields
mimeType string

Optional. The IANA standard MIME type of the source data.

fileUri string

Required. URI.

ExecutableCode

Code generated by the model that is meant to be executed, and the result returned to the model.

Only generated when using the CodeExecution tool, in which the code will be automatically executed, and a corresponding CodeExecutionResult will also be generated.

JSON representation
{
  "language": enum (Language),
  "code": string
}
Fields
language enum (Language)

Required. Programming language of the code.

code string

Required. The code to be executed.

Language

Supported programming languages for the generated code.

Enums
LANGUAGE_UNSPECIFIED Unspecified language. This value should not be used.
PYTHON Python >= 3.10, with numpy and simpy available.

CodeExecutionResult

Result of executing the ExecutableCode.

Only generated when using the CodeExecution, and always follows a part containing the ExecutableCode.

JSON representation
{
  "outcome": enum (Outcome),
  "output": string
}
Fields
outcome enum (Outcome)

Required. Outcome of the code execution.

output string

Optional. Contains stdout when code execution is successful, stderr or other description otherwise.

Outcome

Enumeration of possible outcomes of the code execution.

Enums
OUTCOME_UNSPECIFIED Unspecified status. This value should not be used.
OUTCOME_OK Code execution completed successfully.
OUTCOME_FAILED Code execution finished but with a failure. stderr should contain the reason.
OUTCOME_DEADLINE_EXCEEDED Code execution ran for too long, and was cancelled. There may or may not be a partial output present.

Tool

Tool details that the model may use to generate response.

A Tool is a piece of code that enables the system to interact with external systems to perform an action, or set of actions, outside of knowledge and scope of the model.

JSON representation
{
  "functionDeclarations": [
    {
      object (FunctionDeclaration)
    }
  ],
  "codeExecution": {
    object (CodeExecution)
  }
}
Fields
functionDeclarations[] object (FunctionDeclaration)

Optional. A list of FunctionDeclarations available to the model that can be used for function calling.

The model or system does not execute the function. Instead the defined function may be returned as a [FunctionCall][content.part.function_call] with arguments to the client side for execution. The model may decide to call a subset of these functions by populating [FunctionCall][content.part.function_call] in the response. The next conversation turn may contain a [FunctionResponse][content.part.function_response] with the [content.role] "function" generation context for the next model turn.

codeExecution object (CodeExecution)

Optional. Enables the model to execute code as part of generation.

FunctionDeclaration

Structured representation of a function declaration as defined by the OpenAPI 3.03 specification. Included in this declaration are the function name and parameters. This FunctionDeclaration is a representation of a block of code that can be used as a Tool by the model and executed by the client.

JSON representation
{
  "name": string,
  "description": string,
  "parameters": {
    object (Schema)
  }
}
Fields
name string

Required. The name of the function. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 63.

description string

Required. A brief description of the function.

parameters object (Schema)

Optional. Describes the parameters to this function. Reflects the Open API 3.03 Parameter Object string Key: the name of the parameter. Parameter names are case sensitive. Schema Value: the Schema defining the type used for the parameter.

Schema

The Schema object allows the definition of input and output data types. These types can be objects, but also primitives and arrays. Represents a select subset of an OpenAPI 3.0 schema object.

JSON representation
{
  "type": enum (Type),
  "format": string,
  "description": string,
  "nullable": boolean,
  "enum": [
    string
  ],
  "properties": {
    string: {
      object (Schema)
    },
    ...
  },
  "required": [
    string
  ],
  "items": {
    object (Schema)
  }
}
Fields
type enum (Type)

Required. Data type.

format string

Optional. The format of the data. This is used only for primitive datatypes. Supported formats: for NUMBER type: float, double for INTEGER type: int32, int64 for STRING type: enum

description string

Optional. A brief description of the parameter. This could contain examples of use. Parameter description may be formatted as Markdown.

nullable boolean

Optional. Indicates if the value may be null.

enum[] string

Optional. Possible values of the element of Type.STRING with enum format. For example we can define an Enum Direction as : {type:STRING, format:enum, enum:["EAST", NORTH", "SOUTH", "WEST"]}

properties map (key: string, value: object (Schema))

Optional. Properties of Type.OBJECT.

An object containing a list of "key": value pairs. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.

required[] string

Optional. Required properties of Type.OBJECT.

items object (Schema)

Optional. Schema of the elements of Type.ARRAY.

Type

Type contains the list of OpenAPI data types as defined by https://spec.openapis.org/oas/v3.0.3#data-types

Enums
TYPE_UNSPECIFIED Not specified, should not be used.
STRING String type.
NUMBER Number type.
INTEGER Integer type.
BOOLEAN Boolean type.
ARRAY Array type.
OBJECT Object type.

CodeExecution

This type has no fields.

Tool that executes code generated by the model, and automatically returns the result to the model.

See also ExecutableCode and CodeExecutionResult which are only generated when using this tool.

ToolConfig

The Tool configuration containing parameters for specifying Tool use in the request.

JSON representation
{
  "functionCallingConfig": {
    object (FunctionCallingConfig)
  }
}
Fields
functionCallingConfig object (FunctionCallingConfig)

Optional. Function calling config.

FunctionCallingConfig

Configuration for specifying function calling behavior.

JSON representation
{
  "mode": enum (Mode),
  "allowedFunctionNames": [
    string
  ]
}
Fields
mode enum (Mode)

Optional. Specifies the mode in which function calling should execute. If unspecified, the default value will be set to AUTO.

allowedFunctionNames[] string

Optional. A set of function names that, when provided, limits the functions the model will call.

This should only be set when the Mode is ANY. Function names should match [FunctionDeclaration.name]. With mode set to ANY, model will predict a function call from the set of function names provided.

Mode

Defines the execution behavior for function calling by defining the execution mode.

Enums
MODE_UNSPECIFIED Unspecified function calling mode. This value should not be used.
AUTO Default model behavior, model decides to predict either a function call or a natural language response.
ANY Model is constrained to always predicting a function call only. If "allowedFunctionNames" are set, the predicted function call will be limited to any one of "allowedFunctionNames", else the predicted function call will be any one of the provided "functionDeclarations".
NONE Model will not predict any function call. Model behavior is same as when not passing any function declarations.

UsageMetadata

Metadata on the usage of the cached content.

JSON representation
{
  "totalTokenCount": integer
}
Fields
totalTokenCount integer

Total number of tokens that the cached content consumes.