6️⃣Huggingface Hub

LangChain:Huggingface

import os
from langchain import HuggingFaceHub
os.environ["HUGGINGFACEHUB_API_TOKEN"]="<Huggingface_API_Key>" # hugginface에서 가입 후 key 발급

1. Model I/O

LLM

# open-source LLM from Hugging Face
llm=HuggingFaceHub(repo_id="google/flan-t5-large")
llm_out=llm("Which is most expensive city in the world?")

print(llm_out)
/home/kubwa/anaconda3/envs/langchain/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
/home/kubwa/anaconda3/envs/langchain/lib/python3.9/site-packages/huggingface_hub/utils/_deprecation.py:127: FutureWarning: '__init__' (from 'huggingface_hub.inference_api') is deprecated and will be removed from version '0.19.0'. `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out this guide to learn how to convert your script to use it: https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.
  warnings.warn(warning_message, FutureWarning)


london

Prompts

from langchain import PromptTemplate

# Write a query template
template = "Which is most {input} city in the world?"

# Create a prompt template
prompt = PromptTemplate(template=template, input_variables=['input'], )

#Format the prompt
_input=prompt.format(input="expensive")

# Generate the output
output = llm(_input)

# The response
print(output)
london

Output Parsers

from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate

# Initialize the parser
output_parser = CommaSeparatedListOutputParser()

# Create format instructions
format_instructions = output_parser.get_format_instructions()

# Create a prompt to request a list
prompt = PromptTemplate(
    template="List three {subject}.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions}
)

# Define a query to prompt the model
query = "Planets in the Universe"

# Generate the output
output = llm(prompt.format(subject=query))

# Parse the output using the parser
parsed_result = output_parser.parse(output)

# The result is a list of items
print(parsed_result)
['uranus', 'neptune', 'and jupiter']

2. Retrieval

Document Loaders

from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("dataset/kb_23849_1_1.pdf")
transcript = loader.load_and_split()

Document Transformers

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
docs = text_splitter.split_documents(transcript)

Text Embedding Models

from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

text_embeddings = embeddings.embed_query("Which is most expensive city in the world?")
print(text_embeddings)
[0.12059028446674347, 0.052785277366638184, -0.05982799828052521, 0.08542031794786453, -0.0047978428192436695, -0.0894688069820404, 0.01297465618699789, 0.030412208288908005, -0.02683505415916443, 0.05475056543946266, 0.014216876588761806, -0.13152025640010834, -0.003897641086950898, 0.05708581954240799, -0.032678600400686264, -0.06539638340473175, 0.06220779940485954, -0.06350390613079071, 0.04352043569087982, -0.04667651653289795, -0.018967855721712112, -0.06042526662349701, 0.05154570937156677, 0.013220753520727158, 0.07343099266290665, 0.0058999210596084595, -0.007350783329457045, 0.009249470196664333, 0.01441187784075737, -0.020577138289809227, -0.060433536767959595, 0.0007745643961243331, 0.042855314910411835, 0.02764868550002575, 0.050729844719171524, -0.004928782116621733, 0.014492431655526161, -0.016814816743135452, 0.029464788734912872, 0.029767075553536415, 0.013785254210233688, -0.02622278779745102, -0.013363903388381004, -0.07667090743780136, -0.016218122094869614, -0.027539115399122238, 0.02798015996813774, 0.09559766203165054, 0.0021996002178639174, 0.028464792296290398, -0.013678706251084805, 0.04807870090007782, -0.06867171823978424, -0.021837906911969185, -0.007933593355119228, 0.01384018175303936, 0.003574154805392027, -0.027108414098620415, 0.02275482565164566, 0.05444975942373276, -0.05712449550628662, 0.027558425441384315, -0.04068039357662201, 0.007833953015506268, 0.0742175281047821, -0.016470953822135925, 0.04229069501161575, 0.030570480972528458, -0.05390646681189537, -0.003073311410844326, 0.07933726161718369, -0.054329290986061096, 0.0012276616180315614, -0.012950502336025238, 0.015291228890419006, -0.0037043618503957987, 0.0938057005405426, 0.015454933047294617, -0.06264204531908035, 0.053342219442129135, 0.05414989963173866, -0.06116132810711861, -0.048171259462833405, 0.03701522573828697, 0.034475911408662796, 0.03373529762029648, 0.002232589293271303, 0.021282650530338287, 0.01742977276444435, -0.014908908866345882, 0.028167326003313065, 0.014866268262267113, -0.07398663461208344, 0.023061536252498627, -0.03994656726717949, 0.081743985414505, -0.029204150661826134, 0.03422081097960472, -0.03927065432071686, 0.0027342475950717926, 0.0008989247144199908, 0.014586939476430416, 0.10636808723211288, 0.05065007135272026, 0.00024338062212336808, -0.03450760990381241, 0.014143653213977814, -0.004870363045483828, -0.05268403887748718, 0.023807428777217865, -0.07248372584581375, -0.04757285118103027, -0.05437948927283287, -0.033191096037626266, -0.043658237904310226, 0.019301215186715126, 0.0679992139339447, -0.03758327662944794, -0.006510628387331963, 0.009244668297469616, -0.0047137197107076645, -0.09159809350967407, -0.046985480934381485, 0.03177395835518837, -0.07341766357421875, -0.07267805933952332, -0.04942988231778145, -4.7267880482731776e-33, -0.05542539805173874, -0.07102491706609726, 0.05785880982875824, -0.013004408217966557, -0.0929630845785141, -0.032662928104400635, -0.013681226409971714, -0.006839026231318712, -0.021326186135411263, -0.03499830886721611, 0.020210660994052887, -0.09699935466051102, -0.0156557634472847, -0.02384665422141552, 0.13282303512096405, 0.03129126876592636, -5.6023789511527866e-05, 0.0040978663600981236, -0.10561016947031021, 0.007235232274979353, 0.0043182955123484135, 0.07007357478141785, 0.0555403046309948, -0.032838404178619385, -0.0392155684530735, -0.027978284284472466, 0.03897017240524292, -0.041917718946933746, 0.1020178571343422, -0.008563436567783356, 0.023717964068055153, 0.02938506193459034, 0.06254404783248901, -0.008927094750106335, -0.014742884784936905, 0.06980566680431366, -0.020347096025943756, -0.009751750156283379, -0.057852812111377716, 0.039585188031196594, -0.04205895960330963, -0.04065248370170593, -0.002668399130925536, 0.030295060947537422, 0.07291954755783081, 0.04895645007491112, -0.04016372188925743, -0.033699508756399155, -0.01804323121905327, -0.08209193497896194, -0.017847267910838127, 0.018320253118872643, -0.1389736533164978, 0.013002057559788227, 0.033305823802948, -0.020391345024108887, 0.01805689185857773, -0.028267398476600647, 0.017083747312426567, 0.0914691761136055, -0.12384072691202164, 0.016529466956853867, 0.029454782605171204, 0.0501738078892231, 0.05839250609278679, 0.012779800221323967, 0.01694088615477085, 0.006962170358747244, -0.04679562523961067, 0.04710007086396217, 0.009917798452079296, 0.03656293451786041, 0.10243450105190277, 0.047741953283548355, 0.016817055642604828, 0.05922606959939003, 0.03337971866130829, 0.04632338136434555, 0.02813338115811348, 0.044609084725379944, -0.053863368928432465, 0.045677293092012405, -0.03774689882993698, 0.06831374764442444, 0.07599101215600967, 0.004773995839059353, -0.02215559035539627, -0.06525178253650665, 0.015318457037210464, -0.004245837219059467, -0.057813651859760284, -0.0034768949262797832, -0.009965226985514164, -0.0871831476688385, -0.02286692149937153, 2.6329903474198187e-33, 0.008660168386995792, -0.02675318904221058, 0.070536307990551, 0.0454910509288311, -0.02008768543601036, -0.016858331859111786, 0.0020033938344568014, 0.017924733459949493, 0.029350105673074722, 0.09537993371486664, -0.08086299151182175, -0.011546129360795021, 0.11374151706695557, -0.012229708023369312, 0.06533791124820709, 0.040898397564888, 0.109329953789711, -0.04347386956214905, -0.05652811378240585, -0.03537741303443909, -0.050420936197042465, 0.03913848474621773, 0.00011952301429118961, 0.025006208568811417, -0.09272227436304092, 0.013636242598295212, -0.12986862659454346, -0.03770656883716583, -0.03696915879845619, -0.028358839452266693, -0.061301618814468384, 0.05012812465429306, -0.06688229739665985, 0.04296669736504555, -0.04059772938489914, 0.10735879838466644, -0.002130336593836546, 0.016694961115717888, 0.002216171473264694, 0.08668150007724762, -0.01593322865664959, -0.0038958610966801643, 0.0027010836638510227, 0.07459613680839539, 0.05211967974901199, -0.05443103611469269, -0.12676849961280823, -0.03229113668203354, 0.09315548092126846, -0.059978581964969635, 0.0431625172495842, 0.05843678489327431, -0.08262697607278824, 0.011867527849972248, -0.00628291629254818, 0.04558709263801575, -0.042136773467063904, 0.11623110622167587, -0.011010662652552128, -0.051584359258413315, 0.053252916783094406, 0.04677816107869148, -0.028557410463690758, 0.10840843617916107, -0.045069366693496704, 0.0338531956076622, 0.053518399596214294, 0.02360231801867485, -0.006163671147078276, -0.07035623490810394, 0.011547554284334183, 0.05606567859649658, 0.0004216328961774707, 0.002742093987762928, -0.06695059686899185, 0.047924142330884933, 0.062197208404541016, 0.0743488222360611, 0.12096421420574188, 0.01472033467143774, 0.07833712548017502, 0.03204336017370224, -0.016300853341817856, -0.06067029759287834, -0.03177114576101303, 0.0015053892275318503, 0.04636557772755623, -0.055508729070425034, -0.022859210148453712, -0.00708851870149374, -0.06700815260410309, 0.006875192280858755, 0.0032466554548591375, -0.12760920822620392, -0.01893959566950798, -1.616386846592377e-08, 0.021821768954396248, -0.015182079747319221, -0.023900898173451424, 0.03550928086042404, 0.048616304993629456, -0.06636763364076614, 0.05737857148051262, 0.127315491437912, -0.010500498116016388, 0.10938044637441635, 0.026321163401007652, 0.0363653190433979, 0.003084104275330901, 0.031870145350694656, -0.13267368078231812, -0.03406882286071777, -0.033247269690036774, -0.0017028865404427052, 0.02051669731736183, -0.07118113338947296, -0.0019890835974365473, 0.031718455255031586, -0.013175683096051216, -0.01538345217704773, -0.022799678146839142, -0.045648571103811264, -0.018053371459245682, 0.03467676788568497, 0.0017156669637188315, 0.05857444554567337, 0.04053273797035217, -0.08142174780368805, -0.018373411148786545, -0.04224172979593277, 0.018263278529047966, -0.03692886233329773, -0.011968037113547325, -0.017345938831567764, -0.053661469370126724, -0.08901085704565048, -0.0007313553360290825, -0.0040868669748306274, -0.029899582266807556, 0.022176522761583328, 0.08037397265434265, -0.05580749735236168, -0.0375262089073658, -0.04341369867324829, 0.035723526030778885, -0.06513353437185287, -0.1096712201833725, 0.03498729318380356, 0.04571588337421417, -0.034230317920446396, -0.017642589285969734, -0.07003467530012131, -0.030622225254774094, -0.005048822611570358, -0.04937375336885452, 0.052541546523571014, 0.08874338865280151, -0.12483590096235275, 0.03191329166293144, 0.06401468813419342]

Vector Stores

from langchain.vectorstores import FAISS

db = FAISS.from_documents(docs, embeddings)
query = "Why Use Machine Learning?"
docs = db.similarity_search(query)

print(docs[0].page_content)
특별약관
제5장 배상책임 관련 특별약관

3. Retrievers

RetrievalQA

from langchain.chains import RetrievalQA

retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    #return_source_documents=True,
    #verbose=True
)
query = "What is Machine Learning?"
result = qa({"query": query})
print(result['result'])
not enough information

WebResearchRetriever

from langchain.retrievers.web_research import WebResearchRetriever
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.chains import RetrievalQAWithSourcesChain

os.environ["GOOGLE_CSE_ID"] = "xxx"
os.environ["GOOGLE_API_KEY"] = "xxx"
search = GoogleSearchAPIWrapper()

web_research_retriever = WebResearchRetriever.from_llm(vectorstore=db, llm=llm, search=search)
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=web_research_retriever,
    #return_source_documents=True,
    #verbose=True
)
result = qa_chain({"question": query})
print(result)

Indexing

from langchain.document_loaders import UnstructuredPDFLoader
from langchain.indexes import VectorstoreIndexCreator

loader1 = [UnstructuredPDFLoader("MLInterview.pdf") for fn in os.listdir("/")]
llm=HuggingFaceHub(repo_id="google/flan-t5-large")

from langchain.text_splitter import CharacterTextSplitter
index = VectorstoreIndexCreator(
    embedding=HuggingFaceEmbeddings(),
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)).from_loaders(loader1)

results = index.query("What is Machine Learning?", llm=llm)
print(results)

#results = index.query("\n When to use ensemble learning?", llm=llm)
#print(results)
Machine learning is a branch of computer science which deals with system programming in order to automatically learn

4. Agents

from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType

tools = load_tools(["wikipedia"])
agent = initialize_agent(tools, 
                         llm, 
                         agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
                         verbose=True,
                         max_iterations=1,
                         handle_parsing_errors=True)


agent.run("Who is Barak Obama?")
> Entering new AgentExecutor chain...
Action: president of the united states
Observation: Invalid Format: Missing 'Action Input:' after 'Action:'
Thought:

> Finished chain.





'Agent stopped due to iteration limit or time limit.'
from langchain.tools import YouTubeSearchTool
tool = YouTubeSearchTool()

ans = tool.run(
    "How to choose a Niche"
)
print(ans)
#%pip install duckduckgo-search
from langchain.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun()
search.run("Tell me about Abraham Lincoln")
"Abraham Lincoln ( / ˈlɪŋkən / LINK-ən; February 12, 1809 - April 15, 1865) was an American lawyer, politician, and statesman who served as the 16th president of the United States from 1861 until his assassination in 1865. 1809-1865 Who Was Abraham Lincoln? Abraham Lincoln was the 16 th president of the United States, serving from 1861 to 1865, and is regarded as one of America's greatest heroes due to his... ADVERTISEMENT Abraham Lincoln was 6 feet 4 inches tall, making him the tallest U.S. president in history. Who was Abraham Lincoln's wife? We Are Teachers; Wikimedia Commons In 1842, Abraham Lincoln married Mary Todd, the daughter of a prominent Kentucky slave-owning family. They lived in Springfield, Illinois, and had four sons. Lincoln Is One of the Four Assassinated US Presidents in History Abraham Lincoln facts reveal that not only was he one of the four US presidents (Lincoln, Garfield, McKinley, and Kennedy) who were assassinated, but that he was actually the first assassinated US president. Feb. 12, 1809, was the day Abraham Lincoln was born. Review his legacy and life during one of the most trying times in American history."

5. Chains

from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser

prompt = ChatPromptTemplate.from_messages([
    ("system", "You're a finance person who likes to roam around the world, and very well versed about the world economy."),
    ("human", "{question}")
])
runnable = prompt | llm 

for chunk in runnable.stream({"question": "Which is most expensive city in the world?"}):
    print(chunk, end="", flush=True)
New York
from langchain import LLMChain

chain = LLMChain(llm = llm, prompt = prompt)
print(chain.run(question="Which is most expensive city in the world?"))
New York
from langchain.chains import SimpleSequentialChain

template1 = "Which is most {input} city in the world?"
template2 = "Which is most {input} country in the world?"

prompt1 = PromptTemplate(template=template1, input_variables=['input'], )
prompt2 = PromptTemplate(template=template2, input_variables=['input'])

chain1 = LLMChain(llm = llm, prompt = prompt1)
chain2 = LLMChain(llm = llm, prompt = prompt2)

all_chains = SimpleSequentialChain(chains=[chain1, chain2], verbose=True)

print(all_chains.run("expensive"))
> Entering new SimpleSequentialChain chain...
london
United Kingdom

> Finished chain.
United Kingdom

7. Memory

from langchain.chains import ConversationChain

conversation = ConversationChain(llm=llm, verbose=True)

conversation.predict(input="I have a cat.")

conversation.predict(input="My mom gave me a cat and a dog.")

conversation.predict(input="Now how many pets I have?")
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: I have a cat.
AI:

> Finished chain.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: I have a cat.
AI: I have a cat.
Human: My mom gave me a cat and a dog.
AI:

> Finished chain.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: I have a cat.
AI: I have a cat.
Human: My mom gave me a cat and a dog.
AI: I have a dog.
Human: Now how many pets I have?
AI:

> Finished chain.





'I have three pets.'
from langchain.memory import ConversationBufferMemory

conversation = ConversationChain(
    llm=llm, 
    verbose=True, 
    memory=ConversationBufferMemory()
)
conversation.predict(input="Hi There!")
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi There!
AI:

> Finished chain.





'Hello!'
conversation.predict(input="I would like to know more about the world")
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi There!
AI: Hello!
Human: I would like to know more about the world
AI:

> Finished chain.





'Human: What would you like to know?'
conversation.predict(input="which is the most populated country in the world?")
> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi There!
AI: Hello!
Human: I would like to know more about the world
AI: Human: What would you like to know?
Human: which is the most populated country in the world?
AI:

> Finished chain.





'Human: The most populated country in the world is China.'
conversation.memory
ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[HumanMessage(content='Hi There!'), AIMessage(content='Hello!'), HumanMessage(content='I would like to know more about the world'), AIMessage(content='Human: What would you like to know?'), HumanMessage(content='which is the most populated country in the world?'), AIMessage(content='Human: The most populated country in the world is China.')]))

8. Callbacks

Logging to File

from langchain.callbacks import FileCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from loguru import logger

logfile = "output.log"

logger.add(logfile, colorize=True, enqueue=True)
handler = FileCallbackHandler(logfile)

prompt = PromptTemplate.from_template("Conversation:: + {message} = ")

chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler], verbose=True)
msg = chain.run(message="What is the capital of United States?")
logger.info("Answer:: " + msg)
> Entering new LLMChain chain...
Prompt after formatting:
Conversation:: + What is the capital of United States? = 


2024-02-20 17:57:32.528 | INFO     | __main__:<module>:15 - Answer:: United States Capitol, Washington, D.C.



> Finished chain.

Last updated