-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgemini_api.py
124 lines (98 loc) · 4.42 KB
/
gemini_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import backoff
import ratelimit
import os
import vertexai
from vertexai.generative_models import GenerationConfig, GenerativeModel
from google.api_core import exceptions
MODEL_VARIANT = 'gemini-1.5-flash'
# https://cloud.google.com/vertex-ai/docs/quotas#request_quotas
CALL_LIMIT = 50 # Number of calls to allow within a period
ONE_MINUTE = 60 # One minute in seconds
FIVE_MINUTE = 5 * ONE_MINUTE
initial_prompt_template = '''
I am an analyst using a business intelligence tool to prompt AI to derive insights on my data.
I will create queries to ask different questions about my first-party data.
This may include sales data, customer data, marketing data, retention data, internal HR data, etc.
I will provide you the results of these queries in the form of a JSON payload.
Responses should be comprehensive with different metrics, insights and inferences made about the data.
Please include insights that would be difficult to capture by the naked eye reading a chart or data table.
Answer my question below in following text based on the JSON payload delimited by triple backquotes:
Question:
```{question}```
JSON payload:
```{data}```
Answer:
'''
final_prompt_template = '''
Write a concise summary of the following text delimited by triple backquotes.
Return your response in bullet points which covers the key points of the text.
```{text}```
BULLET POINT SUMMARY:
'''
def backoff_hdlr(details):
"""function to print a message when the function is retrying"""
print('Backing off {} seconds after {} tries'.format(
details['wait'], details['tries']))
@backoff.on_exception( # Retry with exponential backoff strategy when exceptions occur
backoff.expo,
(
exceptions.ResourceExhausted,
ratelimit.RateLimitException,
), # Exceptions to retry on
max_time=FIVE_MINUTE,
on_backoff=backoff_hdlr, # Function to call when retrying
)
@ratelimit.limits( # Limit the number of calls to the model per minute
calls=CALL_LIMIT, period=ONE_MINUTE
)
def model_prediction(model: GenerativeModel,
content: str,
temperature: float,
max_output_tokens: int,
top_k: int,
top_p: float,
):
"""Predict using a Large Language Model."""
config = GenerationConfig(max_output_tokens=max_output_tokens,
temperature=temperature, top_p=top_p, top_k=top_k)
response = model.generate_content(content, generation_config=config)
print('Response from model: {}'.format(response))
return response
def model_with_limit_and_backoff(all_data: dict,
question: str,
row_chunks: int,
temperature: float,
max_output_tokens: int,
top_k: int,
top_p: float
):
"""Split data into chunks to call model predict function and applies rate limiting."""
vertexai.init(project=os.environ.get('PROJECT'),
location=os.environ.get('REGION'))
model = GenerativeModel(MODEL_VARIANT)
initial_summary = []
list_size = len(all_data)
# max input token [text-bison: 8192, code-bison: 6144] so we split data into chunks
for i in range(0, list_size, row_chunks):
chunk = all_data[i:i+row_chunks]
print('Processing rows {} to {}.'.format(i, i+row_chunks))
content = initial_prompt_template.format(question=question, data=chunk)
summary = model_prediction(
model, content, temperature, max_output_tokens, top_k, top_p).text
initial_summary.append(summary) # append summary to list of summaries
return initial_summary
def reduce(initial_summary: any,
temperature: float,
max_output_tokens: int,
top_k: int,
top_p: float
):
"""creates a summary of the summaries"""
vertexai.init(project=os.environ.get('PROJECT'),
location=os.environ.get('REGION'))
model = GenerativeModel(MODEL_VARIANT)
content = final_prompt_template.format(text=initial_summary)
# Generate a summary using the model and the prompt
summary = model_prediction(
model, content, temperature, max_output_tokens, top_k, top_p).text
return summary