Counting tokens

במדריך מפורט לגבי ספירת אסימונים באמצעות Gemini API, כולל האופן שבו נספרים תמונות, אודיו וסרטונים, אפשר לעיין במדריך לספירת אסימונים ובמתכון של ספר המתכונים שנלווה אליו.

שיטה: models.countTokens

הפונקציה מפעילה את ה-tokenizer של המודל על הקלט Content ומחזירה את מספר האסימונים. מידע נוסף על אסימונים זמין במדריך בנושא אסימונים.

נקודת קצה


פרמטרים של נתיב

model string

חובה. שם המשאב של המודל. זהו מזהה לשימוש במודל.

השם הזה צריך להיות תואם לשם הדגם שהוחזר על ידי השיטה models.list.

פורמט: models/{model} הוא מופיע בפורמט models/{model}.

גוף הבקשה

גוף הבקשה מכיל נתונים במבנה הבא:

contents[] object (Content)

זה שינוי אופציונלי. הקלט שסופק למודל כהנחיה. המערכת מתעלמת מהשדה הזה כאשר השדה generateContentRequest מוגדר.

generateContentRequest object (GenerateContentRequest)

זה שינוי אופציונלי. הקלט הכולל שניתן ל-Model. זה כולל את ההנחיה וגם מידע נוסף לגבי הכיוון של המודל, כמו הוראות מערכת ו/או הצהרות על פונקציות לקריאה לפונקציה. Model/Content ו-generateContentRequest הם ערכים בלעדיים. אפשר לשלוח Model + Content או generateContentRequest, אבל לא את שניהם.

בקשה לדוגמה



model = genai.GenerativeModel("models/gemini-1.5-flash")

prompt = "The quick brown fox jumps over the lazy dog."

# Call `count_tokens` to get the input token count (`total_tokens`).
print("total_tokens: ", model.count_tokens(prompt))
# ( total_tokens: 10 )

response = model.generate_content(prompt)

# On the response for `generate_content`, use `usage_metadata`
# to get separate input and output token counts
# (`prompt_token_count` and `candidates_token_count`, respectively),
# as well as the combined token count (`total_token_count`).
# ( prompt_token_count: 11, candidates_token_count: 73, total_token_count: 84 )


// Make sure to include these imports:
// import { GoogleGenerativeAI } from "@google/generative-ai";
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModel({
  model: "gemini-1.5-flash",

// Count tokens in a prompt without calling text generation.
const countResult = await model.countTokens(
  "The quick brown fox jumps over the lazy dog.",

console.log(countResult.totalTokens); // 11

const generateResult = await model.generateContent(
  "The quick brown fox jumps over the lazy dog.",

// On the response for `generateContent`, use `usageMetadata`
// to get separate input and output token counts
// (`promptTokenCount` and `candidatesTokenCount`, respectively),
// as well as the combined token count (`totalTokenCount`).
// candidatesTokenCount and totalTokenCount depend on response, may vary
// { promptTokenCount: 11, candidatesTokenCount: 124, totalTokenCount: 135 }


model := client.GenerativeModel("gemini-1.5-flash")
prompt := "The quick brown fox jumps over the lazy dog"

// Call CountTokens to get the input token count (`total tokens`).
tokResp, err := model.CountTokens(ctx, genai.Text(prompt))
if err != nil {

fmt.Println("total_tokens:", tokResp.TotalTokens)
// ( total_tokens: 10 )

resp, err := model.GenerateContent(ctx, genai.Text(prompt))
if err != nil {

// On the response for GenerateContent, use UsageMetadata to get
// separate input and output token counts (PromptTokenCount and
// CandidatesTokenCount, respectively), as well as the combined
// token count (TotalTokenCount).
fmt.Println("prompt_token_count:", resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:", resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("total_token_count:", resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 10, candidates_token_count: 38, total_token_count: 48 )


    -H 'Content-Type: application/json' \
    -X POST \
    -d '{
      "contents": [{
          "text": "The quick brown fox jumps over the lazy dog."


val generativeModel =
        // Specify a Gemini model appropriate for your use case
        modelName = "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key" above)
        apiKey = BuildConfig.apiKey)

// For text-only input
val (totalTokens) = generativeModel.countTokens("Write a story about a magic backpack.")


let generativeModel =
    // Specify a Gemini model appropriate for your use case
    name: "gemini-1.5-flash",
    // Access your API key from your on-demand resource .plist file (see "Set up your API key"
    // above)
    apiKey: APIKey.default

let prompt = "Write a story about a magic backpack."

let response = try await generativeModel.countTokens(prompt)

print("Total Tokens: \(response.totalTokens)")


// Make sure to include this import:
// import 'package:google_generative_ai/google_generative_ai.dart';
final model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,
final prompt = 'The quick brown fox jumps over the lazy dog.';
final tokenCount = await model.countTokens([Content.text(prompt)]);
print('Total tokens: ${tokenCount.totalTokens}');


// Specify a Gemini model appropriate for your use case
GenerativeModel gm =
    new GenerativeModel(
        /* modelName */ "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key"
        // above)
        /* apiKey */ BuildConfig.apiKey);
GenerativeModelFutures model = GenerativeModelFutures.from(gm);

Content inputContent =
    new Content.Builder().addText("Write a story about a magic backpack.").build();

// For illustrative purposes only. You should use an executor that fits your needs.
Executor executor = Executors.newSingleThreadExecutor();

// For text-only input
ListenableFuture<CountTokensResponse> countTokensResponse = model.countTokens(inputContent);

    new FutureCallback<CountTokensResponse>() {
      public void onSuccess(CountTokensResponse result) {
        int totalTokens = result.getTotalTokens();
        System.out.println("TotalTokens = " + totalTokens);

      public void onFailure(Throwable t) {



model = genai.GenerativeModel("models/gemini-1.5-flash")

chat = model.start_chat(
        {"role": "user", "parts": "Hi my name is Bob"},
        {"role": "model", "parts": "Hi Bob!"},
# Call `count_tokens` to get the input token count (`total_tokens`).
# ( total_tokens: 10 )

response = chat.send_message(
    "In one sentence, explain how a computer works to a young child."

# On the response for `send_message`, use `usage_metadata`
# to get separate input and output token counts
# (`prompt_token_count` and `candidates_token_count`, respectively),
# as well as the combined token count (`total_token_count`).
# ( prompt_token_count: 25, candidates_token_count: 21, total_token_count: 46 )

from google.generativeai.types.content_types import to_contents

# You can call `count_tokens` on the combined history and content of the next turn.
print(model.count_tokens(chat.history + to_contents("What is the meaning of life?")))
# ( total_tokens: 56 )


// Make sure to include these imports:
// import { GoogleGenerativeAI } from "@google/generative-ai";
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModel({
  model: "gemini-1.5-flash",

const chat = model.startChat({
  history: [
      role: "user",
      parts: [{ text: "Hi my name is Bob" }],
      role: "model",
      parts: [{ text: "Hi Bob!" }],

const countResult = await model.countTokens({
  generateContentRequest: { contents: await chat.getHistory() },
console.log(countResult.totalTokens); // 10

const chatResult = await chat.sendMessage(
  "In one sentence, explain how a computer works to a young child.",

// On the response for `sendMessage`, use `usageMetadata`
// to get separate input and output token counts
// (`promptTokenCount` and `candidatesTokenCount`, respectively),
// as well as the combined token count (`totalTokenCount`).
// candidatesTokenCount and totalTokenCount depend on response, may vary
// { promptTokenCount: 25, candidatesTokenCount: 25, totalTokenCount: 50 }


model := client.GenerativeModel("gemini-1.5-flash")
cs := model.StartChat()

cs.History = []*genai.Content{
		Parts: []genai.Part{
			genai.Text("Hi my name is Bob"),
		Role: "user",
		Parts: []genai.Part{
			genai.Text("Hi Bob!"),
		Role: "model",

prompt := "Explain how a computer works to a young child."
resp, err := cs.SendMessage(ctx, genai.Text(prompt))
if err != nil {

// On the response for SendMessage, use `UsageMetadata` to get
// separate input and output token counts
// (`prompt_token_count` and `candidates_token_count`, respectively),
// as well as the combined token count (`total_token_count`).
fmt.Println("prompt_token_count:", resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:", resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("total_token_count:", resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 25, candidates_token_count: 21, total_token_count: 46 )


    -H 'Content-Type: application/json' \
    -X POST \
    -d '{
      "contents": [
        {"role": "user",
        "parts": [{"text": "Hi, my name is Bob."}],
        {"role": "model",
         "parts":[{"text": "Hi Bob"}],


val generativeModel =
        // Specify a Gemini model appropriate for your use case
        modelName = "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key" above)
        apiKey = BuildConfig.apiKey)

val chat =
        history =
                content(role = "user") { text("Hello, I have 2 dogs in my house.") },
                content(role = "model") {
                  text("Great to meet you. What would you like to know?")

val history = chat.history
val messageContent = content { text("This is the message I intend to send") }
val (totalTokens) = generativeModel.countTokens(*history.toTypedArray(), messageContent)


let generativeModel =
    // Specify a Gemini model appropriate for your use case
    name: "gemini-1.5-flash",
    // Access your API key from your on-demand resource .plist file (see "Set up your API key"
    // above)
    apiKey: APIKey.default

// Optionally specify existing chat history
let history = [
  ModelContent(role: "user", parts: "Hello, I have 2 dogs in my house."),
  ModelContent(role: "model", parts: "Great to meet you. What would you like to know?"),

// Initialize the chat with optional chat history
let chat = generativeModel.startChat(history: history)

let response = try await generativeModel.countTokens(chat.history + [
  ModelContent(role: "user", parts: "This is the message I intend to send"),
print("Total Tokens: \(response.totalTokens)")


// Make sure to include this import:
// import 'package:google_generative_ai/google_generative_ai.dart';
final model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,
final chat = model.startChat(history: [
  Content.text('Hi my name is Bob'),
  Content.model([TextPart('Hi Bob!')])
var tokenCount = await model.countTokens(chat.history);
print('Total tokens: ${tokenCount.totalTokens}');

final response = await chat.sendMessage(Content.text(
    'In one sentence, explain how a computer works to a young child.'));
if (response.usageMetadata case final usage?) {
  print('Prompt: ${usage.promptTokenCount}, '
      'Candidates: ${usage.candidatesTokenCount}, '
      'Total: ${usage.totalTokenCount}');

tokenCount = await model.countTokens(
    [, Content.text('What is the meaning of life?')]);
print('Total tokens: ${tokenCount.totalTokens}');


// Specify a Gemini model appropriate for your use case
GenerativeModel gm =
    new GenerativeModel(
        /* modelName */ "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key"
        // above)
        /* apiKey */ BuildConfig.apiKey);
GenerativeModelFutures model = GenerativeModelFutures.from(gm);

// (optional) Create previous chat history for context
Content.Builder userContentBuilder = new Content.Builder();
userContentBuilder.addText("Hello, I have 2 dogs in my house.");
Content userContent =;

Content.Builder modelContentBuilder = new Content.Builder();
modelContentBuilder.addText("Great to meet you. What would you like to know?");
Content modelContent =;

List<Content> history = Arrays.asList(userContent, modelContent);

// Initialize the chat
ChatFutures chat = model.startChat(history);

Content messageContent =
    new Content.Builder().addText("This is the message I intend to send").build();

Collections.addAll(history, messageContent);

// For illustrative purposes only. You should use an executor that fits your needs.
Executor executor = Executors.newSingleThreadExecutor();

ListenableFuture<CountTokensResponse> countTokensResponse =
    model.countTokens(history.toArray(new Content[0]));
    new FutureCallback<CountTokensResponse>() {
      public void onSuccess(CountTokensResponse result) {

      public void onFailure(Throwable t) {

מדיה מוטבעת


import PIL.Image

model = genai.GenerativeModel("models/gemini-1.5-flash")

prompt = "Tell me about this image"
your_image_file = / "organ.jpg")

# Call `count_tokens` to get the input token count
# of the combined text and file (`total_tokens`).
# An image's display or file size does not affect its token count.
# Optionally, you can call `count_tokens` for the text and file separately.
print(model.count_tokens([prompt, your_image_file]))
# ( total_tokens: 263 )

response = model.generate_content([prompt, your_image_file])

# On the response for `generate_content`, use `usage_metadata`
# to get separate input and output token counts
# (`prompt_token_count` and `candidates_token_count`, respectively),
# as well as the combined token count (`total_token_count`).
# ( prompt_token_count: 264, candidates_token_count: 80, total_token_count: 345 )


// Make sure to include these imports:
// import { GoogleGenerativeAI } from "@google/generative-ai";
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModel({
  model: "gemini-1.5-flash",

function fileToGenerativePart(path, mimeType) {
  return {
    inlineData: {
      data: Buffer.from(fs.readFileSync(path)).toString("base64"),

const imagePart = fileToGenerativePart(

const prompt = "Tell me about this image.";

// Call `countTokens` to get the input token count
// of the combined text and file (`totalTokens`).
// An image's display or file size does not affect its token count.
// Optionally, you can call `countTokens` for the text and file separately.
const countResult = await model.countTokens([prompt, imagePart]);
console.log(countResult.totalTokens); // 265

const generateResult = await model.generateContent([prompt, imagePart]);

// On the response for `generateContent`, use `usageMetadata`
// to get separate input and output token counts
// (`promptTokenCount` and `candidatesTokenCount`, respectively),
// as well as the combined token count (`totalTokenCount`).
// candidatesTokenCount and totalTokenCount depend on response, may vary
// { promptTokenCount: 265, candidatesTokenCount: 157, totalTokenCount: 422 }


model := client.GenerativeModel("gemini-1.5-flash")
prompt := "Tell me about this image"
imageFile, err := os.ReadFile(filepath.Join(testDataDir, "personWorkingOnComputer.jpg"))
if err != nil {
// Call `CountTokens` to get the input token count
// of the combined text and file (`total_tokens`).
// An image's display or file size does not affect its token count.
// Optionally, you can call `count_tokens` for the text and file separately.
tokResp, err := model.CountTokens(ctx, genai.Text(prompt), genai.ImageData("jpeg", imageFile))
if err != nil {
fmt.Println("total_tokens:", tokResp.TotalTokens)
// ( total_tokens: 264 )

resp, err := model.GenerateContent(ctx, genai.Text(prompt), genai.ImageData("jpeg", imageFile))
if err != nil {

fmt.Println("prompt_token_count:", resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:", resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("total_token_count:", resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 264, candidates_token_count: 100, total_token_count: 364 )


curl "$GOOGLE_API_KEY" \
    -H 'Content-Type: application/json' \
    -X POST \
    -d '{
      "contents": [{
            {"text": "Tell me about this instrument"},
              "inline_data": {
                "data": "'$(base64 $B64FLAGS $IMG_PATH)'"
       }' 2> /dev/null


val generativeModel =
        // Specify a Gemini model appropriate for your use case
        modelName = "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key" above)
        apiKey = BuildConfig.apiKey)

val image1: Bitmap = BitmapFactory.decodeResource(context.resources, R.drawable.image1)
val image2: Bitmap = BitmapFactory.decodeResource(context.resources, R.drawable.image2)

val multiModalContent = content {
  text("What's the difference between these pictures?")

val (totalTokens) = generativeModel.countTokens(multiModalContent)


let generativeModel =
    // Specify a Gemini model appropriate for your use case
    name: "gemini-1.5-flash",
    // Access your API key from your on-demand resource .plist file (see "Set up your API key"
    // above)
    apiKey: APIKey.default

guard let image1 = UIImage(systemName: "cloud.sun") else { fatalError() }
guard let image2 = UIImage(systemName: "cloud.heavyrain") else { fatalError() }

let prompt = "What's the difference between these pictures?"

let response = try await generativeModel.countTokens(image1, image2, prompt)
print("Total Tokens: \(response.totalTokens)")


// Make sure to include this import:
// import 'package:google_generative_ai/google_generative_ai.dart';
final model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,

Future<DataPart> fileToPart(String mimeType, String path) async {
  return DataPart(mimeType, await File(path).readAsBytes());

final prompt = 'Tell me about this image';
final image = await fileToPart('image/jpeg', 'resources/organ.jpg');
final content = Content.multi([TextPart(prompt), image]);

// An image's display size does not affet its token count.
// Optionally, you can call `countTokens` for the prompt and file separately.
final tokenCount = await model.countTokens([content]);
print('Total tokens: ${tokenCount.totalTokens}');

final response = await model.generateContent([content]);
if (response.usageMetadata case final usage?) {
  print('Prompt: ${usage.promptTokenCount}, '
      'Candidates: ${usage.candidatesTokenCount}, '
      'Total: ${usage.totalTokenCount}');


// Specify a Gemini model appropriate for your use case
GenerativeModel gm =
    new GenerativeModel(
        /* modelName */ "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key"
        // above)
        /* apiKey */ BuildConfig.apiKey);
GenerativeModelFutures model = GenerativeModelFutures.from(gm);
Content text = new Content.Builder().addText("Write a story about a magic backpack.").build();

// For illustrative purposes only. You should use an executor that fits your needs.
Executor executor = Executors.newSingleThreadExecutor();

// For text-and-image input
Bitmap image1 = BitmapFactory.decodeResource(context.getResources(), R.drawable.image1);
Bitmap image2 = BitmapFactory.decodeResource(context.getResources(), R.drawable.image2);

Content multiModalContent =
    new Content.Builder()
        .addText("What's different between these pictures?")

ListenableFuture<CountTokensResponse> countTokensResponse =

    new FutureCallback<CountTokensResponse>() {
      public void onSuccess(CountTokensResponse result) {
        int totalTokens = result.getTotalTokens();
        System.out.println("TotalTokens = " + totalTokens);

      public void onFailure(Throwable t) {



import time

model = genai.GenerativeModel("models/gemini-1.5-flash")

prompt = "Tell me about this video"
your_file = genai.upload_file(path=media / "Big_Buck_Bunny.mp4")

# Videos need to be processed before you can use them.
while == "PROCESSING":
    print("processing video...")
    your_file = genai.get_file(

# Call `count_tokens` to get the input token count
# of the combined text and video/audio file (`total_tokens`).
# A video or audio file is converted to tokens at a fixed rate of tokens per second.
# Optionally, you can call `count_tokens` for the text and file separately.
print(model.count_tokens([prompt, your_file]))
# ( total_tokens: 300 )

response = model.generate_content([prompt, your_file])

# On the response for `generate_content`, use `usage_metadata`
# to get separate input and output token counts
# (`prompt_token_count` and `candidates_token_count`, respectively),
# as well as the combined token count (`total_token_count`).
# ( prompt_token_count: 301, candidates_token_count: 60, total_token_count: 361 )


// Make sure to include these imports:
// import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
// import { GoogleGenerativeAI } from "@google/generative-ai";
const fileManager = new GoogleAIFileManager(process.env.API_KEY);

const uploadVideoResult = await fileManager.uploadFile(
  { mimeType: "video/mp4" },

let file = await fileManager.getFile(;
process.stdout.write("processing video");
while (file.state === FileState.PROCESSING) {
  // Sleep for 10 seconds
  await new Promise((resolve) => setTimeout(resolve, 10_000));
  // Fetch the file from the API again
  file = await fileManager.getFile(;

if (file.state === FileState.FAILED) {
  throw new Error("Video processing failed.");
} else {

const videoPart = {
  fileData: {
    fileUri: uploadVideoResult.file.uri,
    mimeType: uploadVideoResult.file.mimeType,

const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModel({
  model: "gemini-1.5-flash",

const prompt = "Tell me about this video.";

// Call `countTokens` to get the input token count
// of the combined text and file (`totalTokens`).
// A video or audio file is converted to tokens at a fixed rate of tokens
// per second.
// Optionally, you can call `countTokens` for the text and file separately.
const countResult = await model.countTokens([prompt, videoPart]);

console.log(countResult.totalTokens); // 302

const generateResult = await model.generateContent([prompt, videoPart]);

// On the response for `generateContent`, use `usageMetadata`
// to get separate input and output token counts
// (`promptTokenCount` and `candidatesTokenCount`, respectively),
// as well as the combined token count (`totalTokenCount`).
// candidatesTokenCount and totalTokenCount depend on response, may vary
// { promptTokenCount: 302, candidatesTokenCount: 46, totalTokenCount: 348 }


model := client.GenerativeModel("gemini-1.5-flash")
prompt := "Tell me about this video"
file, err := client.UploadFileFromPath(ctx, filepath.Join(testDataDir, "earth.mp4"), nil)
if err != nil {
defer client.DeleteFile(ctx, file.Name)

fd := genai.FileData{URI: file.URI}
// Call `CountTokens` to get the input token count
// of the combined text and file (`total_tokens`).
// A video or audio file is converted to tokens at a fixed rate of tokens per
// second.
// Optionally, you can call `count_tokens` for the text and file separately.
tokResp, err := model.CountTokens(ctx, genai.Text(prompt), fd)
if err != nil {
fmt.Println("total_tokens:", tokResp.TotalTokens)
// ( total_tokens: 1481 )

resp, err := model.GenerateContent(ctx, genai.Text(prompt), fd)
if err != nil {

fmt.Println("prompt_token_count:", resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:", resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("total_token_count:", resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 1481, candidates_token_count: 43, total_token_count: 1524 )


MIME_TYPE=$(file -b --mime-type "${VIDEO_PATH}")
NUM_BYTES=$(wc -c < "${VIDEO_PATH}")

# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
  -D upload-header.tmp \
  -H "X-Goog-Upload-Protocol: resumable" \
  -H "X-Goog-Upload-Command: start" \
  -H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
  -H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
  -H "Content-Type: application/json" \
  -d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null

upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
rm "${tmp_header_file}"

# Upload the actual bytes.
curl "${upload_url}" \
  -H "Content-Length: ${NUM_BYTES}" \
  -H "X-Goog-Upload-Offset: 0" \
  -H "X-Goog-Upload-Command: upload, finalize" \
  --data-binary "@${VIDEO_PATH}" 2> /dev/null > file_info.json

file_uri=$(jq ".file.uri" file_info.json)

state=$(jq ".file.state" file_info.json)

name=$(jq "" file_info.json)

while [[ "($state)" = *"PROCESSING"* ]];
  echo "Processing video..."
  sleep 5
  # Get the file of interest to check state
  curl$name > file_info.json
  state=$(jq ".file.state" file_info.json)

curl "$GOOGLE_API_KEY" \
    -H 'Content-Type: application/json' \
    -X POST \
    -d '{
      "contents": [{
          {"text": "Describe this video clip"},
          {"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]



model = genai.GenerativeModel("gemini-1.5-flash")
sample_pdf = genai.upload_file(media / "test.pdf")
token_count = model.count_tokens(["Give me a summary of this document.", sample_pdf])

response = model.generate_content(["Give me a summary of this document.", sample_pdf])



import time

model = genai.GenerativeModel("models/gemini-1.5-flash")

your_file = genai.upload_file(path=media / "a11.txt")

cache = genai.caching.CachedContent.create(
    # You can set the system_instruction and tools
    contents=["Here the Apollo 11 transcript:", your_file],

model = genai.GenerativeModel.from_cached_content(cache)

prompt = "Please give a short summary of this file."

# Call `count_tokens` to get input token count
# of the combined text and file (`total_tokens`).
# A video or audio file is converted to tokens at a fixed rate of tokens per second.
# Optionally, you can call `count_tokens` for the text and file separately.
# ( total_tokens: 9 )

response = model.generate_content(prompt)

# On the response for `generate_content`, use `usage_metadata`
# to get separate input and output token counts
# (`prompt_token_count` and `candidates_token_count`, respectively),
# as well as the cached content token count and the combined total token count.
# ( prompt_token_count: 323393, cached_content_token_count: 323383, candidates_token_count: 64)
# ( total_token_count: 323457 )



// Make sure to include these imports:
// import { GoogleAIFileManager, GoogleAICacheManager } from "@google/generative-ai/server";
// import { GoogleGenerativeAI } from "@google/generative-ai";

// Upload large text file.
const fileManager = new GoogleAIFileManager(process.env.API_KEY);
const uploadResult = await fileManager.uploadFile(`${mediaPath}/a11.txt`, {
  mimeType: "text/plain",

// Create a cache that uses the uploaded file.
const cacheManager = new GoogleAICacheManager(process.env.API_KEY);
const cacheResult = await cacheManager.create({
  ttlSeconds: 600,
  model: "models/gemini-1.5-flash-001",
  contents: [
      role: "user",
      parts: [{ text: "Here's the Apollo 11 transcript:" }],
      role: "user",
      parts: [
          fileData: {
            fileUri: uploadResult.file.uri,
            mimeType: uploadResult.file.mimeType,

const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const model = genAI.getGenerativeModelFromCachedContent(cacheResult);

const prompt = "Please give a short summary of this file.";

// Call `countTokens` to get the input token count
// of the combined text and file (`totalTokens`).
const result = await model.countTokens(prompt);

console.log(result.totalTokens); // 10

const generateResult = await model.generateContent(prompt);

// On the response for `generateContent`, use `usageMetadata`
// to get separate input and output token counts
// (`promptTokenCount` and `candidatesTokenCount`, respectively),
// as well as the cached content token count and the combined total
// token count.
// {
//   promptTokenCount: 323396,
//   candidatesTokenCount: 113, (depends on response, may vary)
//   totalTokenCount: 323509,
//   cachedContentTokenCount: 323386
// }

await cacheManager.delete(;


txt := strings.Repeat("George Washington was the first president of the United States. ", 3000)
argcc := &genai.CachedContent{
	Model:    "gemini-1.5-flash-001",
	Contents: []*genai.Content{genai.NewUserContent(genai.Text(txt))},
cc, err := client.CreateCachedContent(ctx, argcc)
if err != nil {
defer client.DeleteCachedContent(ctx, cc.Name)

modelWithCache := client.GenerativeModelFromCachedContent(cc)
prompt := "Summarize this statement"
tokResp, err := modelWithCache.CountTokens(ctx, genai.Text(prompt))
if err != nil {
fmt.Println("total_tokens:", tokResp.TotalTokens)
// ( total_tokens: 5 )

resp, err := modelWithCache.GenerateContent(ctx, genai.Text(prompt))
if err != nil {

fmt.Println("prompt_token_count:", resp.UsageMetadata.PromptTokenCount)
fmt.Println("candidates_token_count:", resp.UsageMetadata.CandidatesTokenCount)
fmt.Println("cached_content_token_count:", resp.UsageMetadata.CachedContentTokenCount)
fmt.Println("total_token_count:", resp.UsageMetadata.TotalTokenCount)
// ( prompt_token_count: 33007,  candidates_token_count: 39, cached_content_token_count: 33002, total_token_count: 33046 )

הוראות למערכת


model = genai.GenerativeModel(model_name="gemini-1.5-flash")

prompt = "The quick brown fox jumps over the lazy dog."

# total_tokens: 10

model = genai.GenerativeModel(
    model_name="gemini-1.5-flash", system_instruction="You are a cat. Your name is Neko."

# The total token count includes everything sent to the `generate_content` request.
# When you use system instructions, the total token count increases.
# ( total_tokens: 21 )


// Make sure to include these imports:
// import { GoogleGenerativeAI } from "@google/generative-ai";
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const prompt = "The quick brown fox jumps over the lazy dog.";
const modelNoInstructions = genAI.getGenerativeModel({
  model: "models/gemini-1.5-flash",

const resultNoInstructions = await modelNoInstructions.countTokens(prompt);

// { totalTokens: 11 }

const modelWithInstructions = genAI.getGenerativeModel({
  model: "models/gemini-1.5-flash",
  systemInstruction: "You are a cat. Your name is Neko.",

const resultWithInstructions =
  await modelWithInstructions.countTokens(prompt);

// The total token count includes everything sent to the
// generateContent() request. When you use system instructions, the
// total token count increases.
// { totalTokens: 23 }


model := client.GenerativeModel("gemini-1.5-flash")
prompt := "The quick brown fox jumps over the lazy dog"

respNoInstruction, err := model.CountTokens(ctx, genai.Text(prompt))
if err != nil {
fmt.Println("total_tokens:", respNoInstruction.TotalTokens)
// ( total_tokens: 10 )

// The total token count includes everything sent to the GenerateContent
// request. When you use system instructions, the total token
// count increases.
model.SystemInstruction = genai.NewUserContent(genai.Text("You are a cat. Your name is Neko."))
respWithInstruction, err := model.CountTokens(ctx, genai.Text(prompt))
if err != nil {
fmt.Println("total_tokens:", respWithInstruction.TotalTokens)
// ( total_tokens: 21 )


val generativeModel =
        // Specify a Gemini model appropriate for your use case
        modelName = "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key" above)
        apiKey = BuildConfig.apiKey,
        systemInstruction = content(role = "system") { text("You are a cat. Your name is Neko.")}

// For text-only input
val (totalTokens) = generativeModel.countTokens("What is your name?")


let generativeModel =
    // Specify a model that supports system instructions, like a Gemini 1.5 model
    name: "gemini-1.5-flash",
    // Access your API key from your on-demand resource .plist file (see "Set up your API key"
    // above)
    apiKey: APIKey.default,
    systemInstruction: ModelContent(role: "system", parts: "You are a cat. Your name is Neko.")

let prompt = "What is your name?"

let response = try await generativeModel.countTokens(prompt)
print("Total Tokens: \(response.totalTokens)")


// Make sure to include this import:
// import 'package:google_generative_ai/google_generative_ai.dart';
var model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,
final prompt = 'The quick brown fox jumps over the lazy dog.';

// The total token count includes everything sent in the `generateContent`
// request.
var tokenCount = await model.countTokens([Content.text(prompt)]);
print('Total tokens: ${tokenCount.totalTokens}');
model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,
  systemInstruction: Content.system('You are a cat. Your name is Neko.'),
tokenCount = await model.countTokens([Content.text(prompt)]);
print('Total tokens: ${tokenCount.totalTokens}');


// Create your system instructions
Content systemInstruction =
    new Content.Builder().addText("You are a cat. Your name is Neko.").build();

// Specify a Gemini model appropriate for your use case
GenerativeModel gm =
    new GenerativeModel(
        /* modelName */ "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key"
        // above)
        /* apiKey */ BuildConfig.apiKey,
        /* generationConfig (optional) */ null,
        /* safetySettings (optional) */ null,
        /* requestOptions (optional) */ new RequestOptions(),
        /* tools (optional) */ null,
        /* toolsConfig (optional) */ null,
        /* systemInstruction (optional) */ systemInstruction);
GenerativeModelFutures model = GenerativeModelFutures.from(gm);

Content inputContent = new Content.Builder().addText("What's your name?.").build();

// For illustrative purposes only. You should use an executor that fits your needs.
Executor executor = Executors.newSingleThreadExecutor();

// For text-only input
ListenableFuture<CountTokensResponse> countTokensResponse = model.countTokens(inputContent);

    new FutureCallback<CountTokensResponse>() {
      public void onSuccess(CountTokensResponse result) {
        int totalTokens = result.getTotalTokens();
        System.out.println("TotalTokens = " + totalTokens);

      public void onFailure(Throwable t) {



model = genai.GenerativeModel(model_name="gemini-1.5-flash")

prompt = "I have 57 cats, each owns 44 mittens, how many mittens is that in total?"

# ( total_tokens: 22 )

def add(a: float, b: float):
    """returns a + b."""
    return a + b

def subtract(a: float, b: float):
    """returns a - b."""
    return a - b

def multiply(a: float, b: float):
    """returns a * b."""
    return a * b

def divide(a: float, b: float):
    """returns a / b."""
    return a / b

model = genai.GenerativeModel(
    "models/gemini-1.5-flash-001", tools=[add, subtract, multiply, divide]

# The total token count includes everything sent to the `generate_content` request.
# When you use tools (like function calling), the total token count increases.
# ( total_tokens: 206 )


// Make sure to include these imports:
// import { GoogleGenerativeAI } from "@google/generative-ai";
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
const prompt =
  "I have 57 cats, each owns 44 mittens, how many mittens is that in total?";

const modelNoTools = genAI.getGenerativeModel({
  model: "models/gemini-1.5-flash",

const resultNoTools = await modelNoTools.countTokens(prompt);

// { totalTokens: 23 }

const functionDeclarations = [
  { name: "add" },
  { name: "subtract" },
  { name: "multiply" },
  { name: "divide" },

const modelWithTools = genAI.getGenerativeModel({
  model: "models/gemini-1.5-flash",
  tools: [{ functionDeclarations }],

const resultWithTools = await modelWithTools.countTokens(prompt);

// The total token count includes everything sent to the
// generateContent() request. When you use tools (like function calling),
// the total token count increases.
// { totalTokens: 99 }


val multiplyDefinition = defineFunction(
    name = "multiply",
    description = "returns the product of the provided numbers.",
    parameters = listOf(
        Schema.double("a", "First number"),
        Schema.double("b", "Second number")
val usableFunctions = listOf(multiplyDefinition)

val generativeModel =
        // Specify a Gemini model appropriate for your use case
        modelName = "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key" above)
        apiKey = BuildConfig.apiKey,
        tools = listOf(Tool(usableFunctions))

// For text-only input
val (totalTokens) = generativeModel.countTokens("What's the product of 9 and 358?")


let generativeModel =
    // Specify a model that supports system instructions, like a Gemini 1.5 model
    name: "gemini-1.5-flash",
    // Access your API key from your on-demand resource .plist file (see "Set up your API key"
    // above)
    apiKey: APIKey.default,
    tools: [Tool(functionDeclarations: [
        name: "controlLight",
        description: "Set the brightness and color temperature of a room light.",
        parameters: [
          "brightness": Schema(
            type: .number,
            format: "double",
            description: "Light level from 0 to 100. Zero is off and 100 is full brightness."
          "colorTemperature": Schema(
            type: .string,
            format: "enum",
            description: "Color temperature of the light fixture.",
            enumValues: ["daylight", "cool", "warm"]
        requiredParameters: ["brightness", "colorTemperature"]

let prompt = "Dim the lights so the room feels cozy and warm."

let response = try await generativeModel.countTokens(prompt)
print("Total Tokens: \(response.totalTokens)")


// Make sure to include this import:
// import 'package:google_generative_ai/google_generative_ai.dart';
var model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,
final prompt = 'I have 57 cats, each owns 44 mittens, '
    'how many mittens is that in total?';

// The total token count includes everything sent in the `generateContent`
// request.
var tokenCount = await model.countTokens([Content.text(prompt)]);
print('Total tokens: ${tokenCount.totalTokens}');
final binaryFunction = Schema.object(
  properties: {
    'a': Schema.number(nullable: false),
    'b': Schema.number(nullable: false)
  requiredProperties: ['a', 'b'],

model = GenerativeModel(
  model: 'gemini-1.5-flash',
  apiKey: apiKey,
  tools: [
    Tool(functionDeclarations: [
      FunctionDeclaration('add', 'returns a + b', binaryFunction),
      FunctionDeclaration('subtract', 'returns a - b', binaryFunction),
      FunctionDeclaration('multipley', 'returns a * b', binaryFunction),
      FunctionDeclaration('divide', 'returns a / b', binaryFunction)
tokenCount = await model.countTokens([Content.text(prompt)]);
print('Total tokens: ${tokenCount.totalTokens}');


FunctionDeclaration multiplyDefinition =
        /* name  */ "multiply",
        /* description */ "returns a * b.",
        /* parameters */ Arrays.asList(
            Schema.numDouble("a", "First parameter"),
            Schema.numDouble("b", "Second parameter")),
        /* required */ Arrays.asList("a", "b"));

Tool tool = new Tool(Arrays.asList(multiplyDefinition), null);

// Specify a Gemini model appropriate for your use case
GenerativeModel gm =
    new GenerativeModel(
        /* modelName */ "gemini-1.5-flash",
        // Access your API key as a Build Configuration variable (see "Set up your API key"
        // above)
        /* apiKey */ BuildConfig.apiKey,
        /* generationConfig (optional) */ null,
        /* safetySettings (optional) */ null,
        /* requestOptions (optional) */ new RequestOptions(),
        /* tools (optional) */ Arrays.asList(tool));
GenerativeModelFutures model = GenerativeModelFutures.from(gm);

Content inputContent = new Content.Builder().addText("What's your name?.").build();

// For illustrative purposes only. You should use an executor that fits your needs.
Executor executor = Executors.newSingleThreadExecutor();

// For text-only input
ListenableFuture<CountTokensResponse> countTokensResponse = model.countTokens(inputContent);

    new FutureCallback<CountTokensResponse>() {
      public void onSuccess(CountTokensResponse result) {
        int totalTokens = result.getTotalTokens();
        System.out.println("TotalTokens = " + totalTokens);

      public void onFailure(Throwable t) {

גוף התשובה

תשובה מ-models.countTokens.

היא מחזירה את הערך tokenCount של המודל במסגרת prompt.

אם הפעולה מצליחה, גוף התגובה מכיל נתונים במבנה הבא:

totalTokens integer

מספר הטוקנים שה-Model ממיר את ה-prompt אליהם. תמיד מספר חיובי.

cachedContentTokenCount integer

מספר האסימונים בקטע שנשמר במטמון של ההנחיה (התוכן שנשמר במטמון).

ייצוג ב-JSON
  "totalTokens": integer,
  "cachedContentTokenCount": integer


בקשה ליצירת השלמה מהמודל.

model string

חובה. השם של Model שישמש ליצירת ההשלמה.

פורמט: name=models/{model}

contents[] object (Content)

חובה. תוכן השיחה הנוכחית עם המודל.

בשאילתות עם סיבוב אחד, מדובר באירוע יחיד. בשאילתות מרובות תורות כמו צ'אט, זהו שדה חוזר שמכיל את היסטוריית השיחות ואת הבקשה האחרונה.

tools[] object (Tool)

זה שינוי אופציונלי. רשימה של Tools ש-Model עשוי להשתמש בה כדי ליצור את התשובה הבאה.

Tool הוא קטע קוד שמאפשר למערכת לקיים אינטראקציה עם מערכות חיצוניות כדי לבצע פעולה או קבוצת פעולות מחוץ לידע ולהיקף של Model. הערכים הנתמכים של Tool הם Function ו-codeExecution. מידע נוסף זמין במדריכים קריאות לפונקציה והפעלת קוד.

toolConfig object (ToolConfig)

זה שינוי אופציונלי. הגדרת הכלי לכל Tool שצוין בבקשה. במדריך לקריאה לפונקציות מופיעה דוגמה לשימוש.

safetySettings[] object (SafetySetting)

זה שינוי אופציונלי. רשימה של מופעים ייחודיים של SafetySetting לחסימת תוכן לא בטוח.

המדיניות הזו תהיה בתוקף ב-GenerateContentRequest.contents וב-GenerateContentResponse.candidates. לכל סוג של SafetyCategory יכולה להיות הגדרה אחת לכל היותר. ה-API יחסום תוכן ותשובות שלא יעמדו בערכי הסף שהוגדרו על ידי ההגדרות האלה. הרשימה הזו מבטלת את הגדרות ברירת המחדל של כל SafetyCategory שצוין ב-safetySettings. אם לא צוין SafetySetting עבור SafetyCategory מסוים ברשימה, ה-API ישתמש בהגדרת ברירת המחדל של בטיחות לקטגוריה הזו. קיימת תמיכה בקטגוריות פגיעה HARM_CATEGORY_HATE_SPEECH, HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT ו-HARM_CATEGORY_HARASSMENT. כדאי לעיין במדריך לקבלת מידע מפורט על הגדרות הבטיחות הזמינות. מומלץ גם לעיין בהנחיות הבטיחות כדי ללמוד איך לשלב שיקולי בטיחות באפליקציות ה-AI.

systemInstruction object (Content)

זה שינוי אופציונלי. הוראות המערכת של המפתחים. בשלב הזה, רק טקסט.

generationConfig object (GenerationConfig)

זה שינוי אופציונלי. אפשרויות תצורה ליצירת מודל ולפלט שלו.

cachedContent string

זה שינוי אופציונלי. השם של התוכן ששמור במטמון כדי לשמש כהקשר להצגת התחזית. פורמט: cachedContents/{cachedContent}

ייצוג JSON
  "model": string,
  "contents": [
      object (Content)
  "tools": [
      object (Tool)
  "toolConfig": {
    object (ToolConfig)
  "safetySettings": [
      object (SafetySetting)
  "systemInstruction": {
    object (Content)
  "generationConfig": {
    object (GenerationConfig)
  "cachedContent": string