[8기 랭체인] 대화형 텍스트로 챗봇


대화형 텍스트 자료를 이용한 챗봇 소스 공유합니다.

import tool

import os

import platform

import openai

import chromadb

import langchain

import tiktoken

from langchain.embeddings.openai import OpenAIEmbeddings

from langchain.vectorstores import Chroma

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chat_models import ChatOpenAI

from langchain.chains import ChatVectorDBChain

from langchain.document_loaders import GutenbergLoader

from langchain.document_loaders import DirectoryLoader, TextLoader

os.environ["OPENAI_API_KEY"] = tool.key()

loader = TextLoader('./chatbot.txt', encoding='utf-8')

data =loader.load()

print(f'{len(data)}개의 문서')

print(f'문서에 {len(data[0].page_content)}개의 단어를 가지고 있음')

encoding = tiktoken.get_encoding('cl100k_base')

num_tokens = len(encoding.encode(data[0].page_content))

print(num_tokens)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=0)

docs = text_splitter.split_documents(data)

print(f'{len(docs)}개의 문서 존재')

persist_directory = "./vectordb"

embeddings = OpenAIEmbeddings()

vectordb = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)

vectordb.persist()

model = ChatOpenAI(model_name='gpt-4-1106-preview', temperature=0, max_tokens=500) # gpt-3.5-turbo

chain = ChatVectorDBChain.from_llm(model, vectordb, return_source_documents=True)

question = "안녕하세요."

result = chain({'question': question, 'chat_history':[]})

print(result['answer'])

question = "드라이브할 곳 추천좀 해줘"

result = chain({'question': question, 'chat_history':[]})

print(result['answer'])

question = "강화도 생각중야"

result = chain({'question': question, 'chat_history':[]})

print(result['answer'])

question = "강화도가 좋아."

result = chain({'question': question, 'chat_history':[]})

print(result['answer'])

question = "갈릴리 카페 메뉴좀 알 수 있어?"

result = chain({'question': question, 'chat_history':[]})

print(result['answer'])

question = "갈릴리 홈페이지 주소 알려줄래?"

result = chain({'question': question, 'chat_history':[]})

print(result['answer'])

5
3개의 답글

👉 이 게시글도 읽어보세요