基于知识库的chatbot或者FAQ

人工智能
2025-08-15 05:45:01

背景

最近突然想做一个基于自己的知识库（knowlegebase）的chatbot或者FAQ的项目。未来如果可以在公司用chatgpt或者gpt3.5之后的模型的话，还可以利用gpt强大的语言理解力和搜索出来的用户问题的相关业务文档来回答用户在业务中的问题。

Chatbot UI

FAQ UI

后端代码实现 1. 建立一个基于excel的简单的知识库，

2.利用knowlege_base_service.py文件来获取上面知识库中所有的问题。 import pandas as pd knowledge_base = pd.read_excel("./data/knowledge_base.xlsx") require_to_reload = False def get_all_questions(): knowledge_base = get_knowlege_base() return knowledge_base["Question"].tolist(); pass def get_knowlege_base(): global require_to_reload, knowledge_base # knowledge_base_dict = knowledge_base.to_dict(orient="records") if require_to_reload == True: knowledge_base = pd.read_excel("./data/knowledge_base.xlsx") require_to_reload = False return knowledge_base 3. 创建一个句子相似度比较的模型，用来比较用户输入的问题和我们知识库中问题的相似度。

base 类

class BaseSentenceSimilarityModel(): def calculate_sentence_similarity(self, source_sentence, sentences_to_compare): print("padding to be overided by subclass") results = [] return results def find_most_similar_question(self, source_sentence, sentences_to_compare): print("padding to be overided by subclass") return ''

模型1. TF-IDF

class TFIDFModel(BaseSentenceSimilarityModel): def calculate_sentence_similarity(self, source_sentence, sentences_to_compare): # Combine source_sentence and sentences_to_compare into one list for vectorization sentences = [source_sentence] + sentences_to_compare # Create a TF-IDF vectorizer vectorizer = TfidfVectorizer() # Compute the TF-IDF matrix tfidf_matrix = vectorizer.fit_transform(sentences) # Calculate cosine similarity between the source_sentence and sentences_to_compare similarity_scores = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1:]) scores = similarity_scores.flatten(); results = [] for idx, score in enumerate(scores): # print('sentence:', sentences_to_compare[idx], f", score: {score:.4f}") results.append( {'sentence': sentences_to_compare[idx], 'score': round(score, 4) }) print(results) return results def find_most_similar_question(self, source_sentence, sentences_to_compare): results = self.calculate_sentence_similarity(source_sentence, sentences_to_compare) most_similar_question = '' score = 0 for result in results: # print('sentence:', sentences_to_compare[idx], f", score: {score:.4f}") if result['score'] > score and result['score']>0.7: score = result['score'] most_similar_question = result['sentence'] return most_similar_question

模型二，基于glove词向量的模型

class Word2VectorModel(BaseSentenceSimilarityModel): def calculate_sentence_similarity(self, source_sentence, sentences_to_compare): # Parse the sentences using spaCy sentences = [source_sentence] + sentences_to_compare gloveHelper = GloveHelper() source_sentence_vector = gloveHelper.getVector(source_sentence) sentences_vector_mean = [] for sentence in sentences: sentences_vector = gloveHelper.getVector(sentence) # sentences_vector_mean.append(sentences_vector) sentences_vector_mean.append(np.mean(sentences_vector, axis=0)) # Calculate cosine similarity between the source_sentence and sentences_to_compare print(np.array(sentences_vector_mean[0]).shape) print(np.array(sentences_vector_mean[1:]).shape) similarity_scores = cosine_similarity([sentences_vector_mean[0]], np.array(sentences_vector_mean[1:])) scores = similarity_scores.flatten(); results = [] for idx, score in enumerate(scores): # print('sentence:', sentences_to_compare[idx], f", score: {score:.4f}") results.append({'sentence': sentences_to_compare[idx], 'score': round(float(score), 4)}) print(results) return results def find_most_similar_question(self, source_sentence, sentences_to_compare): results = self.calculate_sentence_similarity(source_sentence, sentences_to_compare) most_similar_question = '' score = 0 for result in results: # print('sentence:', sentences_to_compare[idx], f", score: {score:.4f}") if result['score'] > score and result['score']>0.7: score = result['score'] most_similar_question = result['sentence'] return most_similar_question

模型三，tensorhub 里的模型 universal-sentence-encoder_4

import tensorflow_hub as hub enable_universal_sentence_encoder_Model = True if enable_universal_sentence_encoder_Model: print('loading universal-sentence-encoder_4 model...') embed = hub.load("C:/apps/ml_model/universal-sentence-encoder_4") class UniversalSentenceEncoderModel(BaseSentenceSimilarityModel): def calculate_sentence_similarity(self, source_sentence, sentences_to_compare): # Parse the sentences using spaCy sentences = [source_sentence] + sentences_to_compare sentences_vectors = embed(sentences) sentences_vectors = sentences_vectors.numpy() print(sentences_vectors) # sentences_vector_mean = np.mean(sentences_vectors, axis=1) # for sentences_vector in sentences_vectors: # sentences_vector_mean.append(np.mean(sentences_vector, axis=0)) # Calculate cosine similarity between the source_sentence and sentences_to_compare print(np.array(sentences_vectors[0]).shape) print(np.array(sentences_vectors[1:]).shape) similarity_scores = cosine_similarity([sentences_vectors[0]], np.array(sentences_vectors[1:])) scores = similarity_scores.flatten(); results = [] for idx, score in enumerate(scores): # print('sentence:', sentences_to_compare[idx], f", score: {score:.4f}") results.append({'sentence': sentences_to_compare[idx], 'score': round(float(score), 4)}) print(results) return results def find_most_similar_question(self, source_sentence, sentences_to_compare): print("universal sentence encoder model....") results = self.calculate_sentence_similarity(source_sentence, sentences_to_compare) most_similar_question = '' score = 0 for result in results: # print('sentence:', sentences_to_compare[idx], f", score: {score:.4f}") if result['score'] > score and result['score']>0.6: score = result['score'] most_similar_question = result['sentence'] return most_similar_question 4. 利用flask 创建一个rest api app = Flask(__name__) CORS(app) @app.route('/') def index(): return 'welcome to my webpage!' @app.route('/api/chat', methods=['POST','GET']) def send_message(): user_message = request.json.get('user_message') # Find the most similar question in the knowledge base answer = find_most_similar_question(user_message) return jsonify({'bot_response': answer}) def find_most_similar_question(user_question , model = 'tf_idf_model'): knowledge_base = get_knowlege_base() print('model name :', model) if model == 'tf_idf_model': sentenceSimilarityModel = TFIDFModel() pass elif model == 'word2vector_model': sentenceSimilarityModel = Word2VectorModel() elif model == 'UniversalSentenceEncoder_Model': from nlp.sentence_similarity.universal_sentence_encoder_model import UniversalSentenceEncoderModel sentenceSimilarityModel = UniversalSentenceEncoderModel() else: sentenceSimilarityModel = TFIDFModel() most_similar_question = sentenceSimilarityModel.find_most_similar_question(user_question, knowledge_base["Question"].tolist()) filtered_df = knowledge_base[knowledge_base["Question"] == most_similar_question] # Check if any matching rows were found if not filtered_df.empty: found_answer = filtered_df.iloc[0]["Answer"] print("Answer:", found_answer) return found_answer else: print("No answer found for the question:", user_question) return 'No answer found for the question'; def get_top_faq(): # Count the frequency of each question top_question = knowledge_base.head(3).to_dict(orient="records") print(top_question) return top_question if __name__=="__main__": app.run(port=2020,host="127.0.0.1",debug=True) 前端Angular UI

chat ponent.css

.chat-container { max-width: 60%; margin: 0 auto; background-color: #f7f7f7; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); } .chat-area { max-height: 550px; overflow-y: auto; padding: 20px; background-color: #f7f7f7; border-radius: 10px; } .chat-header { color: black; /* Set text color */ background-color: #ececf1; text-align: center; padding: 10px; /*box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.1); !* Add a subtle shadow *!*/ border-bottom: 1px solid #ccc; /* Add a border at the bottom */ font-size: 35px; /* Adjust the font size as needed */ } .chat-foot{ padding: 10px 15px; margin: 10px } .user-bubble { --tw-border-opacity: 1; background-color: white; /* User message background color */ border-color: rgba(255,255,255,var(--tw-border-opacity)); border-radius: 10px; padding: 10px 10px; margin: 10px 0; /* max-width: 85%;*/ align-self: flex-end; } .chat-message{ display: flex; } .bot-bubble { --tw-border-opacity: 1; background-color: #ececf1; /* Chatbot message background color */ border-collapse: rgba(255,255,255,var(--tw-border-opacity)); border-radius: 10px; padding: 10px 10px; margin: 10px 0; /*max-width: 85%;*/ align-self: flex-start; justify-content: right; } .form-container { display: flex; align-items: center; } .user-input { /* width: 86%;*/ flex-grow: 1; padding: 10px; border: 1px solid #ccc; border-radius: 5px; outline: none; font-size: 16px; /*margin-top: 10px;*/ margin-right: 10px; } .indented-div { margin-right: 10px; /* Adjust this value as needed */ padding: 15px 1px 10px 10px } /* Send button */ .send-button { /* width: 10%;*/ width: 100px; background-color: #3f51b5; color: #fff; border: none; border-radius: 5px; padding: 10px 20px; font-size: 16px; cursor: pointer; transition: background-color 0.3s; } .send-button:hover { background-color: #303f9f; } .chat_left{ display: flex; padding: 0px 0px 0px 10px; margin: 1px 0; } .chat_right { /* float: right;*/ /* Align bot actions to the right */ /* margin-left: 10px;*/ /* Add some spacing between the chat message and bot actions */ width: 50px; /* padding: 10px 15px;*/ /* margin: 20px 5px; */ margin: 20px 20px 20px 2px } .chat_right i { color: #000; transition: color 0.3s; cursor: pointer; } .chat_right i:hover { color: darkorange; } /* text-suggestion ponent.css */ .suggestion-container { position: relative; width: calc(100% - 110px); } .suggestion-container ul { list-style: none; padding: 0; margin: 0; /* width: 91.6%;*/ width : 100%; position: absolute; /*top: -195px; !* Adjust this value to control the distance from the input *!*/ background-color: #fff; /* Customize this background color */ /*border: 1px solid #ccc;*/ border-radius: 5px; /* Add border radius for styling */ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); /* Add box shadow for a card-like effect */ } .selected { background-color: #f0f0f0; /* Highlight color */ } .suggestion-container li { padding: 10px; cursor: pointer; } .suggestion-container li:hover { background-color: #f2f2f2; /* Hover effect */ } .category-button { background-color: #fff; color: #333; border: 1px solid #ccc; padding: 5px 10px; margin: 5px; border-radius: 5px; cursor: pointer; font-size: 15px; transition: background-color 0.3s, border-color 0.3s; } .category{ margin-bottom: 10px } .category-button.selected { /*background-color: #007bff;*/ /*color: #fff;*/ /*border-color: #007bff;*/ color: #007bff; border: 1px solid #007bff; } .category-button:hover { /*background-color: #007bff;*/ color: #007bff; border: 1px solid #007bff; /*border-color: #007bff;*/ }

chat ponent.html

<div class="chat-container"> <div class="chat-header"> ChatBot </div> <div #chatArea class="chat-area">  <div *ngFor="let message of chatMessages" style = "display: flex; justify-content: space-between"> <div class="chat_left" style="flex-grow: 1; flex:8"> <ng-container *ngIf="message.type === 'user'" > <div class = "indented-div"> <img style = "height: 25px" src = "assets/user.svg" alt="User:"/></div> </ng-container> <ng-container *ngIf="message.type === 'bot'" style = "word-break:break-word"> <div class = "indented-div"> <img style = "height: 25px" src = "assets/bot.svg" alt="Bot:"/></div> </ng-container> <div [ngClass]="{'user-bubble': message.type === 'user', 'bot-bubble': message.type === 'bot'}" style="flex-grow: 1" > <div [innerHTML]="message.text" style = "word-break:break-word"> </div> </div> </div> <div class="chat_right" style = ""> <div *ngIf="message.type === 'bot'" > <i (click)="onThumbsUpClick(message)"> <img style = "height:20px" src = "assets/thumb-up.svg" alt="ThumbsUp"/></i> <i (click)="onThumbsDownClick(message)"> <img style = "height:20px" src = "assets/thumb-down.svg" alt="ThumbsDown"/> </i> </div> </div> </div> </div> <div class = "chat-foot" > <div class="category"> <button class="category-button" [class.selected]="selectedCategory === 'general'" (click)="selectCategory('general')"> General </button> <button class="category-button" [class.selected]="selectedCategory === 'ecs'" (click)="selectCategory('ecs')"> ecs </button> <button class="category-button" [class.selected]="selectedCategory === 'jdk17'" (click)="selectCategory('jdk17')"> jdk17 </button> <button class="category-button" [class.selected]="selectedCategory === 'kafka'" (click)="selectCategory('kafka')"> kafka </button> <button class="category-button" [class.selected]="selectedCategory === 'Permission'" (click)="selectCategory('Permission')"> Permission </button> </div> <div > <form (submit)="sendMessage()"> <div class="suggestion-container"> <ul *ngIf="showSuggestions" [style.top.px] = "-suggestions.length*41.3"> <li *ngFor="let suggestion of suggestions; let i = index" [class.selected]="i === selectedSuggestionIndex" (click)="onSuggestionClick(suggestion)"> {{ suggestion }} </li> </ul> </div> <div style = "display: flex"> <input class="user-input" name="userMessage" placeholder="Type your message..." [(ngModel)]="userMessage" (input)="onQueryChange()" (keydown)="onKeyDown($event)" autocomplete="off"/> <button class="send-button" >Send</button> </div> </form> </div> </div> </div>

chat ponent.ts

import { Component, ElementRef, ViewChild, AfterViewChecked, OnInit } from '@angular/core'; import { HttpClient } from '@angular/common/http'; import { DomSanitizer } from '@angular/platform-browser'; import {host} from "../app-config"; @Component({ selector: 'app-chat', templateUrl: './chat ponent.html', styleUrls: ['./chat ponent.css'] }) export class ChatComponent implements AfterViewChecked, OnInit { @ViewChild('chatArea') private chatArea!: ElementRef; userMessage: string = ''; chatMessages: any[] = []; suggestions: string[] = []; allSuggestions: string[] = []; showSuggestions = false; selectedSuggestionIndex: number = -1; selectedCategory: string = 'general'; // Default category constructor( private http: HttpClient, private sanitizer: DomSanitizer ) { this.http.get<string[]>(host+'/faq/all-suggestions') .subscribe(data => { this.allSuggestions = data }); } ngOnInit() { this.sanitizeMessages(); this.chatMessages.push({ text: 'Hello! How can I assist you?', type: 'bot' }); } selectCategory(category: string) { this.selectedCategory = category; // Implement category-specific logic or fetching here } ngAfterViewChecked() { this.scrollToBottom(); } onKeyDown(event: KeyboardEvent) { // console.info("....."+event.key) if (event.key === 'ArrowDown') { event.preventDefault(); this.selectedSuggestionIndex = (this.selectedSuggestionIndex + 1) % this.suggestions.length; this.userMessage = this.suggestions[this.selectedSuggestionIndex]; } else if (event.key === 'ArrowUp') { event.preventDefault(); this.selectedSuggestionIndex = (this.selectedSuggestionIndex - 1 + this.suggestions.length) % this.suggestions.length; this.userMessage = this.suggestions[this.selectedSuggestionIndex]; } } onSuggestionClick(suggestion: string) { this.userMessage = suggestion; this.showSuggestions = false; } sendMessage() { if (this.userMessage === undefined || this.userMessage.trim() === ''){ return; } this.showSuggestions=false this.chatMessages.push({ text: this.userMessage, type: 'user' }); this.http.post<any>(host+'/api/chat', { user_message: this.userMessage }).subscribe(response => { this.chatMessages.push({ text: response.bot_response, type: 'bot' }); this.userMessage = ''; }); } scrollToBottom() { try { this.chatArea.nativeElement.scrollTop = this.chatArea.nativeElement.scrollHeight; } catch (err) {} } onThumbsUpClick(message: any) { console.log('Thumbs up clicked for the bot message: ', message.text); } onThumbsDownClick(message: any) { console.log('Thumbs down clicked for the bot message: ', message.text); } // Sanitize messages with HTML content sanitizeMessages() { for (let message of this.chatMessages) { if (message.type === 'bot') { message.text = this.sanitizer.bypassSecurityTrustHtml(message.text); } } } onQueryChange() { this.showSuggestions = true; this.suggestions = this.getTop5SimilarSuggestions(this.allSuggestions, this.userMessage); } getTop5SimilarSuggestions(suggestions: string[], query: string): string[] { return suggestions .filter(suggestion => suggestion.toLowerCase().includes(query.toLowerCase())) .sort((a, b) => this.calculateSimilarity(a, query) - this.calculateSimilarity(b, query)) .slice(0, 5); } calculateSimilarity(suggestion: string, query: string): number { // You can use Levenshtein distance or any other similarity metric here // Example: Using Levenshtein distance if (suggestion === query) return 0; const matrix = []; const len1 = suggestion.length; const len2 = query.length; for (let i = 0; i <= len2; i++) { matrix[i] = [i]; } for (let i = 0; i <= len1; i++) { matrix[0][i] = i; } for (let i = 1; i <= len2; i++) { for (let j = 1; j <= len1; j++) { const cost = suggestion[j - 1] === query[i - 1] ? 0 : 1; matrix[i][j] = Math.min( matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost ); } } return matrix[len2][len1]; } }

标签：

基于知识库的chatbot或者FAQ由讯客互联人工智能栏目发布，感谢您对讯客互联的认可，以及对我们原创作品以及文章的青睐，非常欢迎各位朋友分享到个人网站或者朋友圈，但转载请说明文章出处“基于知识库的chatbot或者FAQ”

上一篇
如何确认目标期刊被SCI或EI收录？

下一篇
AD教程（四）排针类元件模型的创建