下面是我使用Llama3 70B构建可视化基线。






这里我们使用layoff.fyi 的数据来进行分析。


 #Optional pre-processingimport pandas as pdimport numpy as npdf = pd.read_csv('WARN Notices California_Omer Arain - Sheet1.csv')#Changes date like column into datetime df['Received Date'] = [pd.to_datetime(x) for x in df['Received Date']]df['Effective Date'] = [pd.to_datetime(x) for x in df['Effective Date']]#Converts numbers stored as strings into intsdf['Number of Workers'] = [int(str(x).replace(',','')) if str(x)!='nan' else np.nan for x in df['Number of Workers']]# Replacing NULL valuesdf = df.replace(np.nan,0)


 from llama_index.core.readers.json import JSONReaderfrom llama_index.core import VectorStoreIndeximport json# Function that stores the max,min & mean for numerical valuesdef return_vals(df,c):if isinstance(df[c].iloc[0], (int, float, complex)):return [max(df[c]), min(df[c]), np.mean(df[c])]# For datetime we need to store that information as stringelif(isinstance(df[c].iloc[0],datetime.datetime)):return [str(max(df[c])), str(min(df[c])), str(np.mean(df[c]))]else:# For categorical variables you can store the top 10 most frequent items and their frequencyreturn list(df[c].value_counts()[:10])# declare a dictionary dict_ = {}for c in df.columns:# storing the column name, data type and contentdict_[c] = {'column_name':c,'type':str(type(df[c].iloc[0])), 'variable_information':return_vals(df,c)}# After looping storing the information as a json dump that can be loaded # into a llama-index Document# Writing the information into dataframe.json with open("dataframe.json", "w") as fp:json.dump(dict_ ,fp) reader = JSONReader()# Load data from JSON filedocuments = reader.load_data(input_file='dataframe.json')# Creating an Indexdataframe_index =  VectorStoreIndex.from_documents(documents)




 from llama_index.core import Documentfrom llama_index.core import VectorStoreIndexstyling_instructions =[Document(text="""Dont ignore any of these instructions.For a line chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1. Always give a title and make bold using html tag axis label and try to use multiple colors if more than one lineAnnotate the min and max of the lineDisplay numbers in thousand(K) or Million(M) if larger than 1000/100000 Show percentages in 2 decimal points with '%' sign"""), Document(text="""Dont ignore any of these instructions.For a bar chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1. Always give a title and make bold using html tag axis label and try to use multiple colors if more than one lineAlways display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x valuesAnnotate the values on the y variableIf variable is a percentage show in 2 decimal points with '%' sign.""")# You should fill in instructions for other charts and play around with these instructions, Document(text=""" General chart instructionsDo not ignore any of these instructionsalways use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1. Always give a title and make bold using html tag axis label Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x valuesIf variable is a percentage show in 2 decimal points with '%'""")]# Creating an Indexstyle_index =  VectorStoreIndex.from_documents(styling_instructions)




下面就可以使用lama- index从索引构建查询引擎并将其用作代理工具使用。

 #All imports for this sectionfrom llama_index.core.agent import ReActAgentfrom llama_index.core.tools import QueryEngineToolfrom llama_index.core.tools import  ToolMetadatafrom llama_index.llms.groq import Groq# Build query engines over your indexes# It makes sense to only retrieve one document per query # However, you may play around with this if you need multiple charts# Or have two or more dataframes with similar column namesdataframe_engine = dataframe_index.as_query_engine(similarity_top_k=1)styling_engine = style_index.as_query_engine(similarity_top_k=1)# Builds the toolsquery_engine_tools = [QueryEngineTool(query_engine=dataframe_engine,# Provides the description which helps the agent decide which tool to use metadata=ToolMetadata(name="dataframe_index",description="Provides information about the data in the data frame. Only use column names in this tool",),\),QueryEngineTool(# Play around with the description to see if it leads to better resultsquery_engine=styling_engine,metadata=ToolMetadata(name="Styling",description="Provides instructions on how to style your Plotly plots""Use a detailed plain text question as input to the tool.",),),]# I used open-source models via Groq but you can use OpenAI/Google/Mistral models as wellllm = Groq(model="llama3-70b-8192", api_key="<your_api_key>")# initialize ReAct agentagent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=True)




 from llama_index.core import PromptTemplatenew_prompt_txt= """You are designed to help with building data visualizations in Plotly. You may do all sorts of analyses and actions using Python## ToolsYou have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.This may require breaking the task into subtasks and using different tools to complete each subtask.You have access to the following tools, use these tools to find information about the data and styling:{tool_desc}## Output FormatPlease answer in the same language as the question and use the following format:

Thought: The current language of the user is: (user’s language). I need to use a tool to help me answer the question.
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{“input”: “hello world”, “num_beams”: 5}})

Please ALWAYS start with a Thought.Please use a valid JSON format for the Action Input. Do NOT do this {{'input': 'hello world', 'num_beams': 5}}.If this format is used, the user will respond in the following format:

Observation: tool response

You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in the one of the following two formats:

Thought: I can answer without using any more tools. I’ll use the user’s language to answer
Answer: [your answer here (In the same language as the user’s question)]

Thought: I cannot answer the question with the provided tools.
Answer: [your answer here (In the same language as the user’s question)]

## Current ConversationBelow is the current conversation consisting of interleaving human and assistant messages."""# Adding the prompt text into PromptTemplate object
new_prompt = PromptTemplate(new_prompt_txt)# Updating the prompt



 response = agent.chat("Give Plotly code for a line chart for Number of Workers get information from the dataframe about the correct column names and make sure to style the plot properly and also give a title")









作者:Arslan Shahid


