대화형 텍스트 자료를 이용한 챗봇 소스 공유합니다.
import tool
import os
import platform
import openai
import chromadb
import langchain
import tiktoken
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import ChatVectorDBChain
from langchain.document_loaders import GutenbergLoader
from langchain.document_loaders import DirectoryLoader, TextLoader
os.environ["OPENAI_API_KEY"] = tool.key()
loader = TextLoader('./chatbot.txt', encoding='utf-8')
data =loader.load()
print(f'{len(data)}개의 문서')
print(f'문서에 {len(data[0].page_content)}개의 단어를 가지고 있음')
encoding = tiktoken.get_encoding('cl100k_base')
num_tokens = len(encoding.encode(data[0].page_content))
print(num_tokens)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=0)
docs = text_splitter.split_documents(data)
print(f'{len(docs)}개의 문서 존재')
persist_directory = "./vectordb"
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
vectordb.persist()
model = ChatOpenAI(model_name='gpt-4-1106-preview', temperature=0, max_tokens=500) # gpt-3.5-turbo
chain = ChatVectorDBChain.from_llm(model, vectordb, return_source_documents=True)
question = "안녕하세요."
result = chain({'question': question, 'chat_history':[]})
print(result['answer'])
question = "드라이브할 곳 추천좀 해줘"
result = chain({'question': question, 'chat_history':[]})
print(result['answer'])
question = "강화도 생각중야"
result = chain({'question': question, 'chat_history':[]})
print(result['answer'])
question = "강화도가 좋아."
result = chain({'question': question, 'chat_history':[]})
print(result['answer'])
question = "갈릴리 카페 메뉴좀 알 수 있어?"
result = chain({'question': question, 'chat_history':[]})
print(result['answer'])
question = "갈릴리 홈페이지 주소 알려줄래?"
result = chain({'question': question, 'chat_history':[]})
print(result['answer'])