more notes
parent
fae914028f
commit
45e225c5d8
|
@ -4,3 +4,8 @@ __pycache__/
|
|||
.env
|
||||
.pylintrc
|
||||
*.egg-info/
|
||||
notebooks/.ipynb_checkpoints/
|
||||
notebooks/__pycache__/
|
||||
notebooks/state_of_the_union.txt
|
||||
notebooks/chroma_logs.log
|
||||
notebooks/.chroma/
|
|
@ -46,27 +46,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"import os\n",
|
||||
"import IPython\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"load_dotenv()"
|
||||
"from dotenv import load_dotenv"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -79,7 +67,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -87,7 +75,9 @@
|
|||
"openai.api_key = os.getenv(\"OPENAI_KEY\")\n",
|
||||
"\n",
|
||||
"# for LangChain\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_KEY\")"
|
||||
"load_dotenv()\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_KEY\")\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = os.getenv(\"SERPAPI_API_KEY\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -516,14 +506,6 @@
|
|||
"Exercise: Improve the prompt to have a better structure and output format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
@ -537,23 +519,12 @@
|
|||
"- Review more advanced applications"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 3. Tools and Applications\n",
|
||||
"\n",
|
||||
"Objective:\n",
|
||||
"\n",
|
||||
"- Demonstrate how to use LangChain to develop a simple application leveraging the PAL prompting technique"
|
||||
"### 2.X PAL - Code as Reasoning"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -566,14 +537,6 @@
|
|||
"Specifically, the application takes in some data and answers a question about the data input. The prompt includes a few exemplars which are adopted from [here](https://github.com/reasoning-machines/pal/blob/main/pal/prompt/penguin_prompt.py). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.1 PAL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
|
@ -735,11 +698,6 @@
|
|||
"That's the correct answer! Vincent is the oldest penguin. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
@ -753,7 +711,293 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.2"
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 3. Tools and Applications\n",
|
||||
"\n",
|
||||
"Objective:\n",
|
||||
"\n",
|
||||
"- Demonstrate how to use LangChain to demonstrate simple applications using prompting techniques and LLMs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.1 LLMs & External Tools\n",
|
||||
"\n",
|
||||
"Example adopted from the [LangChain documentation](https://langchain.readthedocs.io/en/latest/modules/agents/getting_started.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Olivia Wilde boyfriend\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mJason Sudeikis\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Jason Sudeikis' age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Jason Sudeikis age\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m47 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 47 raised to the 0.23 power\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 47^0.23\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.4242784855673896\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Jason Sudeikis, Olivia Wilde's boyfriend, is 47 years old and his age raised to the 0.23 power is 2.4242784855673896.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Jason Sudeikis, Olivia Wilde's boyfriend, is 47 years old and his age raised to the 0.23 power is 2.4242784855673896.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# run the agent\n",
|
||||
"agent.run(\"Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.2 Data-Augmented Generation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, we need to download the data we want to use as source to augment generation.\n",
|
||||
"\n",
|
||||
"Code example adopted from [LangChain Documentation](https://langchain.readthedocs.io/en/latest/modules/chains/combine_docs_examples/qa_with_sources.html). We are only using the examples for educational purposes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--2023-02-18 18:42:47-- https://raw.githubusercontent.com/hwchase17/langchain/a83ba44efacb9f27648e770927fd93eb60d7cf3f/docs/modules/state_of_the_union.txt\n",
|
||||
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.108.133, ...\n",
|
||||
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
|
||||
"HTTP request sent, awaiting response... 200 OK\n",
|
||||
"Length: 39027 (38K) [text/plain]\n",
|
||||
"Saving to: ‘state_of_the_union.txt’\n",
|
||||
"\n",
|
||||
"state_of_the_union. 100%[===================>] 38.11K --.-KB/s in 0.02s \n",
|
||||
"\n",
|
||||
"2023-02-18 18:42:47 (2.29 MB/s) - ‘state_of_the_union.txt’ saved [39027/39027]\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!wget https://raw.githubusercontent.com/hwchase17/langchain/a83ba44efacb9f27648e770927fd93eb60d7cf3f/docs/modules/state_of_the_union.txt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Prepare the data first:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.embeddings.cohere import CohereEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('./state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"Using DuckDB in-memory for database. Data will be transient.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{\"source\": str(i)} for i in range(len(texts))])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's quickly test it:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.qa_with_sources import load_qa_with_sources_chain\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president thanked Justice Breyer for his service.\\nSOURCES: 30-pl'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"stuff\")\n",
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's try a question with a custom prompt:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': '\\nEl Presidente no dijo nada acerca de la Justicia Breyer.\\n\\nFUENTES: 30, 31, 33'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"template = \"\"\"Given the following extracted parts of a long document and a question, create a final answer with references (\"SOURCES\"). \n",
|
||||
"If you don't know the answer, just say that you don't know. Don't try to make up an answer.\n",
|
||||
"ALWAYS return a \"SOURCES\" part in your answer.\n",
|
||||
"Respond in Spanish.\n",
|
||||
"\n",
|
||||
"QUESTION: {question}\n",
|
||||
"=========\n",
|
||||
"{summaries}\n",
|
||||
"=========\n",
|
||||
"FINAL ANSWER IN SPANISH:\"\"\"\n",
|
||||
"\n",
|
||||
"# create a prompt template\n",
|
||||
"PROMPT = PromptTemplate(template=template, input_variables=[\"summaries\", \"question\"])\n",
|
||||
"\n",
|
||||
"# query \n",
|
||||
"chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"stuff\", prompt=PROMPT)\n",
|
||||
"query = \"What did the president say about Justice Breyer?\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue