# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Vertex AI: Gemini Evaluations Playbook
Prepare your environment
Evals Playbook: Prepare your environment¶
This notebook show you how to prepare the environment to run notebooks under Gemini Evals Playbook. The notebook performs following steps:
- Enable required APIs in Google Cloud project
- Required permissions and roles
- Install required libraries using Poetry
- Configure the necessary resources on Google Cloud such as BigQuery
🎬 Getting Started¶
The following steps are necessary to run this notebook, no matter what notebook environment you're using.
If you're entirely new to Google Cloud, get started here.
Google Cloud Project Setup¶
- Select or create a Google Cloud project. When you first create an account, you get a $300 free credit towards your compute/storage costs
- Make sure that billing is enabled for your project
- Enable the Service Usage API
- Enable the Vertex AI API
- Enable the Cloud Storage API
- Enable the Cloud BigQuery API
- Enable the Cloud Resource Manager API
Google Cloud Permissions¶
To run the complete Notebook, you will need to have the Owner role for your project. At minimum, you need the following roles:
roles/serviceusage.serviceUsageAdmin
to enable APIsroles/iam.serviceAccountAdmin
to modify service agent permissionsroles/aiplatform.user
to use AI Platform componentsroles/storage.objectAdmin
to modify and delete GCS bucketsroles/bigquery.user
androles/bigquery.dataViewer
to query BigQuery tablesroles/bigquery.jobUser
to run BigQuery jobsroles/secretmanager.secretAccessor
to access secret versions in Cloud Secret Manager
Install dependencies¶
# Install poetry
! pip uninstall poetry -y
! pip install poetry --quiet
# Run the poetry commands below to set up the environment
! poetry lock # resolve dependencies (also auto create poetry venv if not exists)
! poetry install --quiet # installs dependencies
! poetry env info # displays the evn just created and the path to it
Restart Runtime¶
To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.
You may see the restart reported as a crash, but it is working as-intended -- you are merely restarting the runtime.
The restart might take a minute or longer. After it's restarted, continue to the next step.
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)
Authenticate¶
If you're using Colab, run the code in the next cell. Follow the popups and authenticate with an account that has access to your Google Cloud project.
If you're running this notebook somewhere besides Colab, make sure your environment has the right Google Cloud access. If that's a new concept to you, consider looking into Application Default Credentials for your local environment and initializing the Google Cloud CLI. In many cases, running gcloud auth application-default login
in a shell on the machine running the notebook kernel is sufficient.
More authentication options are discussed here.
# Colab authentication.
import sys
if "google.colab" in sys.modules:
from google.colab import auth
auth.authenticate_user()
print("Authenticated")
Set Google Cloud project information¶
To get started using Vertex AI, you must have an existing Google Cloud project and enable the Vertex AI API.
Learn more about setting up a project and a development environment.
Make sure to change PROJECT_ID
in the next cell. You can leave the values for LOCATION
unless you have a specific reason to change them.
# Define variables
PROJECT_ID = "[your-project-id]" # @param {type:"string"}
LOCATION = "us-central1" # @param {type:"string"}
Create Google Cloud storage bucket¶
Create or set Cloud Storage bucket name for Vertex AI staging and any other files relating to evals.
STAGING_BUCKET = "[your-bucket-name]" # @param {type:"string"}
STAGING_BUCKET_URI = f"gs://{STAGING_BUCKET}"
from google.cloud import storage
storage_client = storage.Client(project=PROJECT_ID)
# Check if bucket exists, create if not
if not storage_client.bucket(STAGING_BUCKET).exists():
storage_client.create_bucket(STAGING_BUCKET)
print(f"Bucket {STAGING_BUCKET} created!")
else:
print(f"Bucket {STAGING_BUCKET} already exists")
bucket = storage_client.get_bucket(STAGING_BUCKET)
# Verify the storage bucket project
print(f"Bucket is in the project {bucket.client.project}")
Enable required Google Cloud APIs¶
# Enable required APIs
! gcloud services enable \
iam.googleapis.com \
storage-component.googleapis.com \
compute.googleapis.com \
aiplatform.googleapis.com \
bigquery.googleapis.com \
cloudresourcemanager.googleapis.com \
--project $PROJECT_ID
Initialize Vertex AI SDK¶
import os
import sys
module_path = os.path.abspath(os.path.join(".."))
sys.path.append(module_path)
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET_URI)
print("Vertex AI SDK initialized.")
print(f"Vertex AI SDK version = {vertexai.__version__}")
⛁ Create Data Schema¶
Configure dataset name and table names to define data schema in BigQuery.
# BigQuery datasets
BQ_DATASET_ID = "gemini_evals_playbook" # @param {type:"string"}
BQ_LOCATION = "US"
# DO NOT CHANGE
BQ_TABLES_SQL_PATH = os.path.join(module_path, "bigquery_sqls", "evals_bigquery.sql")
BQ_PREFIX = "eval"
BQ_T_EVAL_TASKS = f"{BQ_PREFIX}_tasks"
BQ_T_EXPERIMENTS = f"{BQ_PREFIX}_experiments"
BQ_T_PROMPTS = f"{BQ_PREFIX}_prompts"
BQ_T_DATASETS = f"{BQ_PREFIX}_datasets"
BQ_T_EVAL_RUN_DETAILS = f"{BQ_PREFIX}_run_details"
BQ_T_EVAL_RUNS = f"{BQ_PREFIX}_runs"
def setup_bigquery(bq_project_id, dataset_name, dataset_region, dry_run=False):
from google.cloud import bigquery
dataset_ref = f"{bq_project_id}.{dataset_name}"
job_config = bigquery.QueryJobConfig(dry_run=dry_run, default_dataset=dataset_ref)
client = bigquery.Client(
project=bq_project_id,
location=dataset_region,
default_query_job_config=job_config,
)
# create schema/dataset
try:
ddl = f"""
CREATE SCHEMA IF NOT EXISTS {dataset_name}
OPTIONS(
description="dataset for configuring Gemini evaluation tasks and storing evaluation results",
{f"location='{dataset_region}',"if dataset_region else ""}
labels=[("tool", "vertexai-gemini-evals")]
)
"""
print(
f"Creating dataset {dataset_name} in project {bq_project_id}, if does not exists"
)
print(ddl)
job = client.query(ddl)
results = job.result()
for result in results:
print(result)
except Exception as e:
print(
f"Failed to create dataset {dataset_name} in project {bq_project_id} \n{e}"
)
raise e
# create tables
try:
print(f"Creating tables in project {bq_project_id}, if does not exists.")
ddl = open(BQ_TABLES_SQL_PATH).read()
print(ddl)
job = client.query(ddl)
result = job.result()
for result in results:
print(result)
except Exception as e:
print(f"Failed to create tables in project {bq_project_id}")
raise e
setup_bigquery(
bq_project_id=PROJECT_ID,
dataset_name=BQ_DATASET_ID,
dataset_region=BQ_LOCATION,
dry_run=False,
)
print(
"Done! Created Bigquery dataset and tables to configure experiments and store eval results"
)
print("You are ready to run the evaluations!")
💾 Save Configuration to File¶
Save the configurations set in this notebook to config.ini
. The parameters from this file are used in subsequent notebooks
from utils.config import save_config
save_config(
PROJECT_ID,
LOCATION,
STAGING_BUCKET,
STAGING_BUCKET_URI,
BQ_DATASET_ID,
BQ_LOCATION,
BQ_TABLES_SQL_PATH,
BQ_PREFIX,
BQ_T_EVAL_TASKS,
BQ_T_EXPERIMENTS,
BQ_T_PROMPTS,
BQ_T_DATASETS,
BQ_T_EVAL_RUN_DETAILS,
BQ_T_EVAL_RUNS,
)
🥁 If all the above steps are executed sucessfully, the following should be set up¶
GCP project and APIs to run the eval pipeline
All the required IAM permissions
Environment to run the notebooks
Bigquery datasets and tables to track evaluation results
You can now proceed to run rest of the notebooks in Evals Playbook. Start with 1_gemini_evals_playbook_evaluate to design experiments, assess model performance on your generative AI tasks, and analyze evaluation results including side-by-side comparison of results across different experiments and runs.