{"id":3880,"date":"2025-03-28T12:22:05","date_gmt":"2025-03-28T04:22:05","guid":{"rendered":"http:\/\/benjenq.ddns.net\/blog\/%e3%80%90%e7%a2%bc%e8%be%b2%e3%80%91rag-%e6%87%89%e7%94%a8%ef%bc%9a%e8%ae%93-ai-%e6%9b%b4%e8%81%bd%e8%a9%b1\/"},"modified":"2025-03-28T12:22:05","modified_gmt":"2025-03-28T04:22:05","slug":"%e3%80%90%e7%a2%bc%e8%be%b2%e3%80%91rag-%e6%87%89%e7%94%a8%ef%bc%9a%e8%ae%93-ai-%e6%9b%b4%e8%81%bd%e8%a9%b1","status":"publish","type":"post","link":"http:\/\/benjenq.ddns.net\/blog\/%e3%80%90%e7%a2%bc%e8%be%b2%e3%80%91rag-%e6%87%89%e7%94%a8%ef%bc%9a%e8%ae%93-ai-%e6%9b%b4%e8%81%bd%e8%a9%b1\/","title":{"rendered":"\u3010\u78bc\u8fb2\u3011RAG \u61c9\u7528\uff1a\u8b93 AI \u66f4\u807d\u8a71"},"content":{"rendered":"<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u3010\u78bc\u8fb2\u3011RAG \u61c9\u7528\uff1a\u8b93 AI \u66f4\u807d\u8a71\" height=\"405\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743271911-3763047691-g_l.png\" title=\"\u3010\u78bc\u8fb2\u3011RAG \u61c9\u7528\uff1a\u8b93 AI \u66f4\u807d\u8a71\" width=\"720\"><\/p>\n<p>\u5728<a href=\"https:\/\/benjenq.pixnet.net\/blog\/post\/49551868\" target=\"_blank\">\u4e0a\u4e00\u7bc7\u6587\u7ae0<\/a>\u4e2d\u4ecb\u7d39\u4e86\u5982\u4f55\u5229\u7528 Telegram \u548c Ollama \u6253\u9020\u4e00\u500b\u4f4e\u6210\u672c\u7684 AI \u804a\u5929\u6a5f\u5668\u4eba\u3002\u96d6\u7136\u4f7f\u7528 Ollama \u9a45\u52d5\u7684 LLM\uff08\u5927\u578b\u8a9e\u8a00\u6a21\u578b\uff09\u4f86\u9032\u884c\u8f15\u9b06\u5c0d\u8a71\u6216\u5275\u610f\u767c\u60f3\u6548\u679c\u76f8\u7576\u4e0d\u932f\uff0c\u4f46\u5b83\u5728\u56de\u7b54\u8f03\u65b0\u7684\u4e8b\u4ef6\u6216\u662f\u7279\u5b9a\u9818\u57df\u7684\u554f\u984c\u6642\uff0c\u8868\u73fe\u4ecd\u4e0d\u5920\u7cbe\u78ba\u3002\u9019\u4e5f\u4fc3\u4f7f\u4e86\u4e00\u9805\u540d\u70ba RAG\uff08Retrieval-Augmented Generation\uff0c\u6aa2\u7d22\u589e\u5f37\u751f\u6210\uff09\u7684\u5275\u65b0\u6280\u8853\uff0c\u9010\u6f38\u5728 AI \u9818\u57df\u4e2d\u5d84\u9732\u982d\u89d2\u3002 \u6839\u64da GhatGPT \u7684\u8aaa\u660e\uff0cRAG\u7684\u904b\u4f5c\u539f\u7406\u5982\u4e0b\uff1a<\/p>\n<ul>\n<li>\u6aa2\u7d22\u968e\u6bb5\uff1a\u7576\u4f60\u5411 RAG \u63d0\u554f\u6642\uff0c\u5b83\u9996\u5148\u6703\u5206\u6790\u4f60\u7684\u554f\u984c\uff0c\u7136\u5f8c\u641c\u5c0b\u8cc7\u6599\u5eab\u88e1\u6700\u76f8\u95dc\u7684\u5167\u5bb9\u3002\u5c31\u50cf\u6211\u5011\u5728\u641c\u5c0b\u5f15\u64ce\u4e0a\u67e5\u8a62\u8cc7\u8a0a\u4e00\u6a23\uff0cRAG \u6703\u9078\u64c7\u51fa\u90a3\u4e9b\u6700\u7b26\u5408\u554f\u984c\u7684\u6bb5\u843d\u6216\u8cc7\u6599\u3002<\/li>\n<li>\u751f\u6210\u968e\u6bb5\uff1a\u63a5\u4e0b\u4f86\uff0cRAG \u6703\u6839\u64da\u5b83\u525b\u525b\u627e\u5230\u7684\u8cc7\u6599\u4f86\u751f\u6210\u56de\u7b54\uff0c\u9019\u6a23\u53ef\u4ee5\u78ba\u4fdd\u7b54\u6848\u4e0d\u50c5\u662f\u57fa\u65bc\u820a\u7684\u77e5\u8b58\uff0c\u9084\u80fd\u7d50\u5408\u5230\u6700\u65b0\u7684\u8cc7\u8a0a\u3002<\/li>\n<li>\u7d50\u5408\u6aa2\u7d22\u548c\u751f\u6210\uff1a\u9019\u5169\u6b65\u9a5f\u7684\u7d50\u5408\u5c31\u662f RAG \u7684\u4eae\u9ede\uff0c\u5b83\u4e0d\u50c5\u50c5\u4f9d\u8cf4\u5df2\u6709\u7684\u77e5\u8b58\u4f86\u751f\u6210\u5167\u5bb9\uff0c\u9084\u80fd\u7d50\u5408\u5916\u90e8\u6aa2\u7d22\u5230\u7684\u4fe1\u606f\uff0c\u505a\u5230\u66f4\u52a0\u7cbe\u6e96\u548c\u52d5\u614b\u7684\u56de\u7b54\u3002<\/li>\n<\/ul>\n<p>\u7c21\u55ae\u5730\u8aaa\uff0cRAG \u662f\u4e00\u7a2e\u7d50\u5408\u4e86\u300c\u8cc7\u6599\u6aa2\u7d22\u300d\u548c\u300c\u751f\u6210\u6a21\u578b\u300d\u7684\u6280\u8853\u3002\u4e0a\u8ff0\u5167\u5bb9\u807d\u8d77\u4f86\u4f3c\u4e4e\u5f88\u53b2\u5bb3\uff0c\u4f46\u5c0d\u65bc\u4e0d\u5c11\u4eba\u4f86\u8aaa\uff0c\u4ecd\u7136\u89ba\u5f97\u6709\u4e9b\u6a21\u7cca\u3002\u8209\u500b\u66f4\u5be6\u969b\u7684\u4f8b\u5b50\uff0c\u73fe\u5728\u5e7e\u4e4e\u6240\u6709\u7684 AI \u804a\u5929\u670d\u52d9\u90fd\u5df2\u7d93\u52a0\u5165\u4e86\u7db2\u8def\u641c\u5c0b\u529f\u80fd\uff0c\u9019\u6b63\u662f RAG \u6280\u8853\u7684\u4e00\u7a2e\u5c55\u73fe\u3002\u5b83\u6703\u5148\u5c07\u7528\u6236\u7684\u554f\u984c\u9001\u5230\u7db2\u8def\u4e0a\u9032\u884c\u641c\u5c0b\uff0c\u6a21\u64ec\u771f\u4eba\u7528\u6236\u5148\u7528 Google \u67e5\u8a62\u4e00\u904d\uff0c\u518d\u5f9e\u641c\u5c0b\u7d50\u679c\u4e2d\u7be9\u9078\u51fa\u6700\u76f8\u95dc\u7684\u7b54\u6848\uff0c\u4ea4\u7531\u8a9e\u8a00\u6a21\u578b\u5f59\u6574\u4e4b\u5f8c\u56de\u8986\u7d66\u7528\u6236\u3002\u9019\u6a23\u4e00\u4f86\uff0cAI \u804a\u5929\u6a5f\u5668\u4eba\u4f3c\u4e4e\u80fd\u5920\u4e0d\u65b7\u5b78\u7fd2\u65b0\u77e5\u4e26\u4e0d\u65b7\u6210\u9577\u3002<\/p>\n<p>\u90a3\u9ebc\u554f\u984c\u4f86\u4e86\uff0c\u6709\u4e0d\u5c11\u8cc7\u6599\u662f\u53ea\u80fd\u5b58\u653e\u5728\u672c\u5730\u7aef\uff0c\u5982\u4f55\u5728\u672c\u5730\u7aef\u5be6\u73fe RAG \u5462\uff1f\u7db2\u8def\u4e0a\u6709\u4e0d\u5c11\u76f8\u95dc\u6559\u5b78\uff0c\u751a\u81f3\u6709\u4e9b\u535a\u4e3b\u4fe1\u8a93\u65e6\u65e6\u5730\u8aaa\uff0cRAG \u975e\u5e38\u9069\u5408\u7528\u65bc\u90e8\u7f72\u5728\u79c1\u4eba\u4f01\u696d\uff0c\u4f5c\u70ba\u4f01\u696d\u5167\u90e8\u7684\u77e5\u8b58\u5eab\u3002\u7136\u800c\uff0c\u7576\u6211\u7df4\u7fd2 RAG \u7684\u5efa\u7f6e\u904e\u7a0b\u4e2d&nbsp;\uff0c\u7d50\u679c\u537b\u662f\u7ffb\u8eca\u9023\u9023\uff0c\u96e3\u9053\u662f\u6211\u6280\u4e0d\u5982\u4eba\u55ce\uff1f\u9084\u662f\u8aaa\u6559\u5b78\u535a\u4e3b\u5011\u4e5f\u662f\u6a19\u984c\u6d41\u91cf\u9ee8\uff1f\u7562\u7adf\uff0c\u4f01\u696d\u7684\u9700\u6c42\u548c\u500b\u4eba\u4f7f\u7528\u6a19\u6e96\u662f\u4e0d\u540c\u7684\uff0c\u5982\u679c\u56de\u7b54\u5167\u5bb9\u672a\u80fd\u9075\u5faa\u4f01\u696d\u76ee\u6a19\uff0c\u5c0f\u554f\u984c\u5c31\u53ef\u80fd\u8b8a\u6210\u5927\u554f\u984c\u3002<\/p>\n<p>\u7d93\u904e\u6578\u5468\u4ee5\u4f86\u8207 ChatGPT \u4e00\u8d77\u591a\u8f2a\u596e\u6230\u4e4b\u5f8c\uff0c\u5982\u4eca\u7e3d\u7b97\u662f\u6709\u4e86\u4e00\u9ede\u7709\u76ee\uff0c\u76ee\u524d\u7b97\u662f\u5f88\u63a5\u8fd1\u6210\u529f\u5427\uff01\u56e0\u6b64\uff0c\u6211\u6c7a\u5b9a\u64b0\u5beb\u9019\u7bc7\u6587\u7ae0\uff0c\u5206\u4eab\u5982\u4f55\u5728\u672c\u5730\u7aef\u642d\u5efa RAG \u61c9\u7528\u7684\u7d93\u9a57\uff0c\u4e26\u8ac7\u8ac7\u5728\u904e\u7a0b\u4e2d\u6240\u906d\u9047\u7684\u5404\u7a2e\u6311\u6230\u8207\u5fc3\u5f97\u3002<\/p>\n<p><!-- more --><\/p>\n<p><span style=\"font-size:14px\"><strong>\u76f8\u95dc\u4ee3\u78bc<\/strong><\/span><\/p>\n<p>\u5148\u8ac7\u7a0b\u5f0f\u78bc\u7684\u90e8\u5206\u3002\u7a0b\u5f0f\u78bc\u7684\u90e8\u5206\u4e26\u4e0d\u591a\uff0c\u505a\u6cd5\u8ddf\u7db2\u8def\u6559\u5b78\u4e5f\u5927\u540c\u5c0f\u7570\u3002\u9996\u5148\u662f ollama \u7684\u90e8\u5206\uff0c<a href=\"http:\/\/benjenq.pixnet.net\/blog\/post\/49551868\" target=\"_blank\">\u524d\u4e00\u7bc7<\/a>\u5c31\u804a\u904e\u642d\u5efa Ollama \u7684\u65b9\u6cd5\u8207\u5957\u4ef6\uff0c\u9019\u908a\u5c31\u4e0d\u8d05\u8ff0\u4e86\u3002\u8981\u8b93 ollama \u9a45\u52d5 LLM \u8aaa\u51fa\u672c\u5730\u8cc7\u6599\u5167\u7684\u7b54\u6848\uff0c\u804a\u5929\u7d50\u5408 RAG \u6aa2\u7d22\u672c\u5730\u8cc7\u6599\u5eab\u7684\u7a0b\u5f0f\u78bc\u5c31\u53ea\u6709\u9019\u6a23\uff1a<\/p>\n<pre>q = input('\u8f38\u5165\u63d0\u554f\uff1a')\n# \u9032\u884c\u4f7f\u7528\u8005\u67e5\u8a62\nrag = rag_result(f'{q}') # \u77e5\u8b58\u5eab\u7684 RAG \u6aa2\u7d22\u7d50\u679c\n# \u4f7f\u7528 Ollama \u56de\u7b54\nfrom ollama import chat\nstream = chat(\nmodel='phi4:14b', # RTX-3060 12G \u8dd1\u5f97\u52d5\uff0cVRAM \u4e0d\u5920\u5927\u53ef\u4ee5\u9078\u64c7\u5176\u4ed6 7b\/8b \u6216\u66f4\u5c0f\u7684\u3002\nmessages=[{'role': 'system', 'content': f'\u8acb\u6839\u64da\u63d0\u4f9b\u7684\u8cc7\u6599\u56de\u7b54\u3002\u5982\u679c\u8cc7\u6599\u5eab\u5167\u986f\u793a\u300c\u6c92\u6709\u7b54\u6848\u300d,\\\n\u6216\u4e0d\u80fd\u78ba\u5b9a\u662f\u7528\u6236\u8981\u7684\u7b54\u6848\uff0c\u5c31\u8aaa\u300c\u62b1\u6b49\uff0c\u6211\u4e0d\u77e5\u9053\u3002\u300d\uff0c\u4e0d\u8981\u81ea\u884c\u63a8\u7406\u3002\u63d0\u4f9b\u7684\u8cc7\u6599:\\n {rag}'},\n{'role': 'user', 'content': f'{q}'}],\nstream=True,\nkeep_alive=1\n)\nfor chunk in stream:\nprint(chunk['message']['content'], end='', flush=True)\n<\/pre>\n<p>\u5f9e\u4ee5\u4e0a\u7a0b\u5f0f\u78bc\u5c31\u80fd\u770b\u51fa RAG \u642d\u914d LLM \u5230\u5e95\u662f\u600e\u9ebc\u4e00\u56de\u4e8b\u4e86\u3002\u5f9e\u672c\u5730\u8cc7\u6599\u5eab\u4e2d\u5f97\u5230\u7684\u7b54\u6848 rag \u653e\u5165 LLM \u7684\u7cfb\u7d71\u984c\u8a5e\uff08system role \u7684 content\uff09\u5167\uff0c\u4f5c\u70ba\u4e0a\u4e0b\u6587\u672c\u8b93\u8a9e\u8a00\u6a21\u578b\u53c3\u8003\u5c31\u884c\u4e86\u3002\u5230\u4e86\u9019\u88e1\u53ef\u4ee5\u5148\u505a\u5169\u4ef6\u4e8b\uff1a<\/p>\n<ul>\n<li>rag = rag_result() \u9019\u4e00\u6bb5\u9700\u8981\u751f\u6210\u4ec0\u9ebc\u7b54\u6848\uff0c\u53ef\u4ee5\u5148\u96a8\u4fbf\u624b\u52d5\u751f\u6210\u5e7e\u500b\u7b54\u6848\u3002<\/li>\n<li>\u89c0\u5bdf ollama \u6703\u600e\u9ebc\u56de\u61c9\u3002\u4f8b\u5982 rag_result() \u56de\u61c9\u5167\u5bb9\u53ef\u4ee5\u5148\u4e82\u80e1\u8b05\u300c\u7533\u8acb\u8cb8\u6b3e\u8981\u6e96\u5099\u8eab\u5206\u8b49\u3001\u8b77\u7167\u3001\u5e33\u6236\u8cc7\u6599\u3002\u7533\u8acb\u501f\u6b3e\u8981\u6e96\u5099\u5370\u7ae0\u3001\u96d9\u8b49\u4ef6\u3001\u6236\u53e3\u540d\u7c3f\u300d\uff0c\u7136\u5f8c\u8f38\u5165\u554f\u984c\u6642\u96a8\u4fbf\u554f\uff0c\u770b\u770b ollama \u6703\u600e\u9ebc\u56de\u61c9\u3002<\/li>\n<\/ul>\n<p>\u9019\u88e1\u5c31\u53ef\u4ee5\u8aaa\u660e\u70ba\u4f55\u7167\u8457\u7db2\u8def\u6559\u5b78\u6c92\u554f\u984c\uff0c\u81ea\u5df1\u505a\u5c31\u7ffb\u8eca\u4e86\u3002\u56e0\u70ba\u7db2\u8def\u6559\u5b78\u901a\u5e38\u53ea\u6703\u793a\u7bc4\u5e7e\u7b46\u8cc7\u6599\u5230\u8cc7\u6599\u5eab\uff0c\u6240\u4ee5\u4e0d\u8ad6\u7528\u54ea\u4e00\u7a2e\u6aa2\u7d22\u65b9\u5f0f\uff0c\u4e00\u5b9a\u4e5f\u6703\u627e\u51fa\u90a3\u5e7e\u7b46\uff0cRAG \u6aa2\u7d22\u7bc4\u570d\u5f88\u5c0f\uff0c\u8cc7\u6599\u5eab\u547d\u4e2d\u7387\u767e\u5206\u4e4b\u767e\uff0c\u8a9e\u8a00\u6a21\u578b\u6703\u6839\u64da system role \u984c\u8a5e\u628a\u4e0d\u76f8\u5e72\u7684\u7b54\u6848\u904e\u6ffe\u6389\uff0c\u6240\u4ee5\u7d50\u679c\u4e00\u5b9a\u662f\u7b26\u5408\u9810\u671f\u7684\u3002\u7136\u800c\u73fe\u5be6\u4e2d\u7684\u8cc7\u6599\u5eab\u901a\u5e38\u90fd\u5f88\u9f90\u5927\uff0cRAG \u6aa2\u7d22\u7bc4\u570d\u5927\uff0c\u7b54\u6848\u660e\u660e\u5728\u8cc7\u6599\u5eab\u4e2d\u537b\u4e0d\u4e00\u5b9a\u6703\u88ab RAG \u6aa2\u7d22\u5230\uff0c\u6240\u4ee5\u554f\u984c\u5c31\u4f86\u4e86\u3002<\/p>\n<p>\u56e0\u6b64\uff0c\u63a5\u4e0b\u4f86\u76f4\u63a5\u9032\u5165\u5927\u578b\u8cc7\u6599\u5eab\u7684 RAG \u5be6\u6230\u74b0\u7bc0\uff0c\u624d\u80fd\u770b\u51fa RAG \u662f\u600e\u9ebc\u904b\u4f5c\u7684\u3002\u9019\u88cf\u642c\u51fa\u6574\u5957\u300c<a href=\"https:\/\/law.moj.gov.tw\/LawClass\/LawAll.aspx?pcode=C0000001\" target=\"_blank\">\u4e2d\u83ef\u6c11\u570b\u5211\u6cd5<\/a>\u300d\u4f5c\u4f8b\u5b50\u3002\u5148\u5c07\u4e0b\u8f09\u7684 pdf \u8f49\u63db\u6210\u6587\u5b57\u6a94\uff1a<\/p>\n<p>\u5b89\u88dd pdfplumber \u5957\u4ef6<\/p>\n<p>pip install pdfplumber<\/p>\n<p>PDF \u8f49\u6587\u5b57\u6a94\u7684\u7a0b\u5f0f\u78bc\uff1a<\/p>\n<pre>import pdfplumber\ndef read_pdf(file_path:str):\n'''\u8b80\u53d6 PDF \u6587\u4ef6\u5167\u5bb9'''\nwith pdfplumber.open(file_path) as pdf:\ntext = ''\nfor page in pdf.pages:\ntext += page.extract_text()\nreturn text\ntext = read_pdf('\u4e2d\u83ef\u6c11\u570b\u5211\u6cd5.pdf')\n<\/pre>\n<p>text \u5167\u5bb9\u662f\u4e00\u6bb5&nbsp;1,171 \u884c\u7684\u300c\u4e2d\u83ef\u6c11\u570b\u5211\u6cd5\u300d\u5168\u6587\u6587\u672c\u3002\u9019\u6642\u5019\u82e5\u7570\u60f3\u5929\u958b\u7684\u628a\u5b83\u5168\u4e1f\u5230\u7cfb\u7d71\u984c\u8a5e\u5167\uff0c\u7136\u5f8c\u554f ollama \u4e00\u4e9b\u6cd5\u5f8b\u554f\u984c\u5982\u300c<a href=\"https:\/\/udn.com\/news\/story\/7320\/8636479\" target=\"_blank\">\u8907\u5370\u5468\u6770\u502b\u6f14\u5531\u6703\u7684\u9580\u7968\u518d\u62ff\u53bb\u8ce3<\/a>\u662f\u6709\u6c92\u6709\u72af\u7f6a\uff1f\u300d\u6642\uff0c\u6703\u767c\u73fe ollama \u76f4\u63a5\u653e\u98db\u81ea\u6211\u4e86\uff0c\u4e0d\u50c5\u5b8c\u5168\u5ffd\u8996\u63d0\u4f9b\u7684\u5211\u6cd5\u5168\u6587\uff0c\u9084\u81ea\u884c\u63a8\u7406\u51fa\u7b54\u6848\uff0c\u7528\u7684\u9084\u662f\u4e0d\u77e5\u54ea\u4e00\u570b\u7684\u6cd5\u5f8b\u3002\u539f\u56e0\u662f\u5211\u6cd5\u5168\u6587\u4e1f\u5230 system role \u63d0\u8a5e\u5df2\u8d85\u51fa\u8a9e\u8a00\u6a21\u578b\u7684\u4e0a\u4e0b\u6587\u9577\u5ea6\uff0c\u672c\u4f86\u8981\u6c42\u5b83\u300c\u53ea\u80fd\u6839\u64da\u63d0\u4f9b\u7684\u5167\u5bb9\u56de\u7b54\u300d\u7684\u984c\u8a5e\u56e0\u800c\u5931\u6548\u3002<\/p>\n<p>\u9019\u6642\u5019\u5c31\u5f97\u8b93&nbsp;RAG \u767b\u5834\u4e86\u3002\u9996\u5148\u5fc5\u9808\u5c07\u6574\u90e8\u300c\u4e2d\u570b\u6c11\u570b\u5211\u6cd5\u300d\u5207\u5272\u70ba\u591a\u500b\u6587\u672c\u5340\u584a\uff08chunk)\uff0c\u5f8c\u7e8c\u518d\u900f\u904e\u6587\u672c\u6bd4\u5c0d\uff0c\u6839\u64da\u554f\u984c\u627e\u51fa\u6700\u63a5\u8fd1\u7b54\u6848\u7684\u6587\u672c\u5340\u584a\u5373\u53ef\u3002\u9019\u4e9b\u6587\u672c\u5340\u584a\u5982\u4f55\u8207\u63d0\u554f\u7522\u751f\u95dc\u806f\uff0c\u5f97\u4ef0\u8cf4\u300cembedding \u8a9e\u8a00\u6a21\u578b\u300d\u7684\u7406\u89e3\u529b\uff0c\u9019\u90e8\u5206\u60f3\u60f3\u5c31\u89ba\u5f97\u5f88\u7384\u3002<\/p>\n<p>\u6587\u672c\u5207\u5272\u7684\u7a0b\u5f0f\u78bc\u4e5f\u4e0d\u591a\u3002\u7136\u800c\u5207\u5272\u65b9\u6cd5\u8b93\u6211\u591a\u6b21\u7ffb\u8eca\uff0c\u6700\u5f8c\u662f\u6839\u64da\u300c\u7b2c xxx \u689d\u300d\u5167\u5bb9\u9032\u884c\u5207\u5272\uff08\u611f\u8b1d <a href=\"https:\/\/chatgpt.com\/\" target=\"_blank\">ChatGPT<\/a> \u63d0\u4f9b\u5beb\u6cd5\uff09\uff0c\u539f\u56e0\u662f\u5207\u5272\u4e4b\u5f8c\u5fc5\u9808\u78ba\u4fdd\u6bcf\u4e00\u6bb5\u5167\u5bb9\u7684\u8a9e\u610f\u5b8c\u6574\uff0c\u5728 embedding \u5411\u91cf\u5316\u4e4b\u5f8c\u624d\u80fd\u6b63\u5e38\u767c\u63ee\u4f5c\u7528\u3002<\/p>\n<pre>def chunk_content(text:str):\n'''\u4ee5 '\u7b2c xx \u689d' \u70ba\u5206\u5272\u9ede\u5207\u5272\u6587\u672c\n---\ntext: str  \u8981\u5207\u5272\u7684\u6587\u672c\u5168\u6587\\n\nreturn: List[str]  # \u5207\u5272\u5f8c\u7684\u6587\u672c\u5217\u8868\n'''\n# \u4f7f\u7528\u6b63\u5247\u8868\u9054\u5f0f\u5339\u914d '\u7b2c xx \u689d' \u7684\u6a21\u5f0f\u4f5c\u70ba\u5206\u5272\u9ede\npattern = r'(\u7b2c\\s*\\d+[-\\d]*\\s*\u689d)'  # \u9019\u500b\u6b63\u5247\u8868\u9054\u5f0f\u6703\u5339\u914d '\u7b2c 1 \u689d'\u3001'\u7b2c 3-1 \u689d' \u7b49\u6a21\u5f0f\n# \u4f7f\u7528 re.split \u9032\u884c\u5206\u5272\ntexts = re.split(pattern, text)\nresult = []\nfor text in texts:  #\u5207\u5272\u5f8c\uff0c\u7b2cxxx\u689d\u4e5f\u6703\u81ea\u6210\u4e00\u500b\u5206\u5272\u6bb5\uff0c\u6240\u4ee5\u8981\u5148\u52a0\u5165======== \u518d\u5408\u4f75\u5f8c\uff0c\u518d\u6b21\u4ee5 ======== \u5207\u5272\nif re.match(pattern, text):  # \u6aa2\u67e5\u662f\u5426\u662f\u5206\u5272\u9ede\uff08'\u7b2c xx \u689d'\uff09\nresult.append('========\\n') #\u52a0\u5165\u65b0\u7684\u5207\u5272\u6a19\u793a\nresult.append(text)\n# \u5c07\u7d50\u679c\u5408\u4f75\u6210\u5b57\u7b26\u4e32\uff0c\u5167\u5bb9\u662f\u7b2c xx \u689d\u4e4b\u524d\u90fd\u6703\u6709 ========\\n \u6a19\u793a\noutput_text = ''.join(result)\npattern2 = r'========\\n' # \u518d\u91dd\u5c0d '========\\n' \u5207\u5272\nchunks = re.split(pattern2, output_text)\nreturn chunks\n<\/pre>\n<p>chunk_content(text) \u7d50\u679c\u6703\u5f97\u5230\u4e00\u500b\u6587\u672c\u5340\u584a\u7684 list\uff08\u4e32\u5217\uff09\uff0c\u6bcf\u4e00\u500b\u6587\u672c\u5340\u584a\u5167\u5bb9\u662f\u5211\u6cd5\u7684\u67d0\u4e00\u689d\u689d\u6587\u3002<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 000123.png\" height=\"492\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743177702-3625475934-g.png\" title=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 000123.png\" width=\"720\"><\/p>\n<p>\u6587\u672c\u5207\u5272\u4e4b\u5f8c\uff0c\u63a5\u8457\u662f\u4e00\u6574\u5957 RAG \u7d44\u5408\u62f3\u3002<\/p>\n<p>pip \u5b89\u88dd\u76f8\u95dc\u7684\u5957\u4ef6<\/p>\n<p>pip install&nbsp;chromadb&nbsp;sentence_transformers<\/p>\n<p>\u4ee5\u4e0b\u662f RAG \u7684\u7a0b\u5f0f\u78bc\uff0c\u6574\u6bb5\u7a0b\u5f0f\u78bc\u53ef\u76f4\u63a5\u5b58\u6210\u6a94\u6848\uff0c\u5982 RAGProc.py\u3002\u96d6\u7136\u6709\u9ede\u7384\u5b78\uff0c\u4e0d\u904e\u53ea\u9700\u8981\u77e5\u9053\u600e\u9ebc\u7528\u5c31\u884c\u4e86\u3002<\/p>\n<pre>import chromadb\nfrom sentence_transformers import SentenceTransformer\nmodel = SentenceTransformer('intfloat\/multilingual-e5-large') # \u8a66\u4e86\u4e00\u5927\u5806\u7684\u6a21\u578b\uff0c\u5c31\u9019\u500b\u6700\u597d\u7528\uff0c\u4f46\u4e5f\u6700\u6d88\u8017\u8a18\u61b6\u9ad4\n# \u521d\u59cb\u5316 ChromaDB\uff08\u7d22\u5f15\u5132\u5b58\u5230\u672c\u5730\uff09\nchroma_client = chromadb.PersistentClient(path='.\/chroma_db')\ncollection = chroma_client.get_or_create_collection(name='rag_collection')\n#\u53d6\u5f97\u672c\u5730 Embedding \u5411\u91cf\uff08\u52a0\u4e0a \"passage:\" \u524d\u7db4\uff09\ndef get_embedding(text, is_query=False):\nif is_query:\ntext = 'query: ' + text\nelse:\ntext = 'passage: ' + text\nresult = model.encode(text,normalize_embeddings=True).tolist() #normalize_embeddings=True \u7684\u610f\u601d\u662f\u5c07\u751f\u6210\u7684\u8a9e\u53e5\u5d4c\u5165\u5411\u91cf\u9032\u884c\u6b78\u4e00\u5316\u8655\u7406\u3002\nreturn result\n# \u8655\u7406\u6587\u4ef6\u4e26\u5b58\u5165 ChromaDB\ndef process_ragdb(chunks,file_path):\nprint(f'\u5efa\u7acb [{file_path}] RAG \u7d22\u5f15...')\nfor idx, chunk in enumerate(chunks):\nembedding = get_embedding(chunk, is_query=False)  # \u6587\u4ef6\u5167\u5bb9\u52a0 \"passage:\"\ncollection.add(\nids=[f'{file_path}_{idx}'],\nembeddings=[embedding],\nmetadatas=[{'source': file_path, 'chunk_index': idx, 'text': f'{chunk}'}],\ndocuments=[f'{chunk}']\n)\nprint(f'\u6210\u529f\u5b58\u5165 {len(chunks)} \u500b chunk')\n# \u8a08\u7b97 token \u9577\u5ea6\ndef tokens_len(text:str):\ntokens = model.tokenizer.encode(text)\nreturn len(tokens)\n# \u67e5\u8a62\u6700\u76f8\u95dc\u5167\u5bb9\ndef rag_result(query, top_k=5, limit_tokens = 1024):\nquery_embedding = get_embedding(query, is_query=True)  # \u67e5\u8a62\u52a0 \"query:\"\nresults = collection.query(\nquery_embeddings=[query_embedding],\nn_results=top_k\n)\ndb = ''\nans_tokens_lens = 0\nfor document in results['documents'][0]:\ndocument_toekn_lens = tokens_len(str(document))\nif ans_tokens_lens + document_toekn_lens &gt; limit_tokens:\n#print('\u7b54\u6848\u904e\u9577\uff0c\u5f8c\u9762\u5ffd\u7565')\ncontinue\ndb = f\"{db}{document}\\n\"\nans_tokens_lens += document_toekn_lens\nif db == '':\ndb = '\u6c92\u6709\u7b54\u6848'\nreturn db.strip()\n<\/pre>\n<p>\u8aaa\u660e\u5982\u4e0b\uff1a<\/p>\n<ul>\n<li>get_embedding\uff1a\u4f7f\u7528 <a href=\"https:\/\/huggingface.co\/intfloat\/multilingual-e5-large\" target=\"_blank\">multilingual-e5-large<\/a> \u6a21\u578b\uff0c\u5c07\u6bcf\u4e00\u500b\u6587\u672c\u5340\u584a\u8f49\u63db\u70ba\u5411\u91cf\u6578\u64da\u3002\u8f49\u63db\u70ba\u6578\u64da\u4e4b\u5f8c\uff0c\u8a9e\u8a00\u6a21\u578b\u624d\u80fd\u8a08\u7b97\u8a9e\u610f\u7684\u76f8\u4f3c\u5ea6\uff0c\u554f\u7b54\u4e4b\u9593\u7684\u95dc\u806f\u5ea6\u7b49\uff0c\u6240\u4ee5 embedding&nbsp;\u662f RAG \u80fd\u5426\u5be6\u73fe\u529f\u80fd\u7684\u6838\u5fc3\u91cd\u9ede\uff0cLLM \u7684\u56de\u7b54\u624d\u662f\u5176\u6b21\u3002\u9019\u88e1\u4f7f\u7528&nbsp;is_query \u7528\u4f86\u5207\u63db\u6587\u672c\u5340\u584a\u52a0\u4e0a &#8216;query: &#8216; \u8207 &#8216;passage: &#8216; \u524d\u7db4\u8a5e\uff0c\u539f\u56e0\u662f&nbsp;<a href=\"https:\/\/huggingface.co\/intfloat\/multilingual-e5-large\" target=\"_blank\">multilingual-e5-large<\/a>&nbsp;\u6a21\u578b\u7684\u7279\u6709\u7528\u6cd5\uff0c\u8207\u9019\u500b\u6a21\u578b\u7684\u8a13\u7df4\u65b9\u5f0f\u6709\u95dc\uff0c\u5c11\u4e86\u524d\u7db4\u8a5e\u6703\u964d\u4f4e multilingual-e5-large \u6a21\u578b\u7684\u6027\u80fd\u8868\u73fe\uff0c\u8a73\u60c5\u53ef\u53c3\u8003\u8a72\u6a21\u578b\u7684 <a href=\"https:\/\/huggingface.co\/intfloat\/multilingual-e5-large\" target=\"_blank\">Model card<\/a> \u8aaa\u660e\u3002<\/li>\n<li>process_ragdb\uff1a\u4e3b\u8981\u662f\u8655\u7406 chunks \u6587\u672c\u5340\u584a\u96c6\uff0c\u4e5f\u5c31\u662f\u4e0a\u9762\u63d0\u5230\u7684\u5207\u5272\u4e4b\u5f8c\u7684\u6587\u672c list\uff0c\u9010\u4e00 embedding \u8f49\u63db\u6210\u5411\u91cf\u8cc7\u6599\u4e4b\u5f8c\uff0c\u5132\u5b58\u5728\u672c\u6a5f\u7aef chromadb \u5411\u91cf\u8cc7\u6599\u5eab\u5167\u3002file_path \u53ea\u662f\u4f5c\u70ba\u5207\u5272\u4e4b\u5f8c\u5c07\u6587\u672c\u7247\u6bb5\u6a19\u8a18\u4f86\u6e90\u800c\u5df2\uff08\u4f8b\u5982\u82e5\u8981\u52a0\u5165\u5169\u7a2e\u4ee5\u4e0a\u7684\u6cd5\u898f\uff09\uff0c\u4e0d\u5f71\u97ff\u7d50\u679c\uff0c\u6e2c\u8a66\u6642\u53ef\u4ee5\u7701\u7565\u4e0d\u7528\u3002<\/li>\n<li>tokens_len\uff1a\u8a08\u7b97\u6bcf\u4e00\u6bb5\u6587\u5b57\uff08\u5373\u6bcf\u4e00\u689d\u6cd5\u689d\uff09\u7684\u9577\u5ea6\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0cget_embedding() \u7528\u7684\u662f model.encode()\uff0c\u800c\u8a08\u7b97\u9577\u5ea6 tokens_len() \u7528\u7684\u662f model.tokenizer.encode()\uff0c\u5169\u8005\u7684\u7528\u9014\u8207\u610f\u7fa9\u5dee\u5225\u5f88\u5927\uff0c\u53ef\u4ee5\u8acb&nbsp;ChatGPT \u89e3\u91cb\u5169\u8005\u6709\u4f55\u4e0d\u540c\u3002<\/li>\n<li>rag_result\uff1a\u6839\u64da\u63d0\u554f\u5f9e\u8cc7\u6599\u5eab\u4e2d\u627e\u51fa\u53ef\u80fd\u7684\u7b54\u6848\u3002top_k \u662f\u5217\u51fa\u524d\u5e7e\u500b\u6700\u6709\u53ef\u80fd\u662f\u7b54\u6848\u7684\u6587\u672c\u5340\u584a\uff0c\u5728\u9019\u88e1\u6211\u7684\u5efa\u8b70\u662f 3 \u5c31\u6709\u4e0d\u932f\u7684\u53cd\u61c9\uff0c\u6700\u9ad8 5&nbsp;\u5c31\u884c\u4e86\uff0c\u7b54\u6848\u4e00\u591a\u53cd\u800c\u6703\u5e72\u64fe LLM \u7684\u5224\u65b7\u3002limit_tokens \u662f\u9650\u5236\u7b54\u6848\u7684\u9577\u5ea6\uff0c\u56e0\u70ba\u5982\u679c\u7b54\u6848\u592a\u9577\uff0c\u5c31\u6703\u767c\u751f\u4e0a\u9762\u8aaa\u7684\u653e\u98db\u81ea\u6211\uff0c\u9019\u500b\u6578\u503c\u8207 ollama \u4f7f\u7528\u54ea\u4e00\u500b LLM \u6709\u95dc\uff0c\u5982\u679c\u8a72 LLM \u6a21\u578b\u80fd\u63a5\u53d7\u8f03\u9577\u7684\u6587\u672c\uff0c\u6578\u5b57\u53ef\u4ee5\u52a0\u5927\u3002\u500b\u4eba\u5fc3\u5f97\u662f phi4:14b \u6a21\u578b\u5728 limit_tokens \u8d85\u904e 1,024&nbsp;\u5c31\u653e\u98db\u4e86\uff0cDeepSeek-r1:14b \u53ef\u4ee5\u518d\u7a0d\u5fae\u5927\u4e00\u9ede\u3002<\/li>\n<\/ul>\n<p>\u4ee5\u4e0a\u5c31\u662f\u4ee3\u78bc\u7684\u90e8\u5206\uff0c\u7d44\u5408\u8d77\u4f86\u518d\u7a0d\u52a0\u6539\u5beb\uff0c\u5982\u4e0b\uff1a<\/p>\n<pre>import pdfplumber\nimport re\ndef read_pdf(file_path:str):\n#\u70ba\u4e86\u7bc0\u7701\u7248\u9762\uff0c\u9019\u88e1\u586b\u5165\u4e0a\u9762 read_pdf \u5167\u5bb9\ndef chunk_content(text:str):\n#\u70ba\u4e86\u7bc0\u7701\u7248\u9762\uff0c\u9019\u88e1\u586b\u5165\u4e0a\u9762 chunk_content \u5167\u5bb9\nfrom RAGProc import * #\u532f\u5165\u4e0a\u9762\u7684 RAGProc.py\n# \u7b2c\u4e00\u6b21\u57f7\u884c\u6642 is_Indexed \u8acb\u6539\u70ba False \u624d\u80fd\u5efa\u7acb\u5411\u91cf\u7d22\u5f15\uff0c\u5728\u540c\u76ee\u9304\u4e0b\u6703\u7522\u751f chroma_db \u76ee\u9304\u3002\u7b2c\u4e8c\u6b21\u4ee5\u5f8c\u6539\u70ba True\u3002\n# \u5982\u679c\u8981\u91cd\u5efa\u7d22\u5f15\uff0c\u624b\u52d5\u522a\u9664 chroma_db \u76ee\u9304\uff0c\u4e26\u5c07 is_Indexed \u6539\u70ba False\u3002\nis_Indexed = True #\u5047\u8a2d\u5df2\u7d93\u5efa\u904e\u5411\u91cf\u7d22\u5f15\nif not is_Indexed:\nfile_path = '\u4e2d\u83ef\u6c11\u570b\u5211\u6cd5.pdf'\ntext = read_pdf(file_path)\ntexts = chunk_content(text) #\u5207\u5272\u6587\u672c\u53d6\u5f97\u6587\u672c list\nprocess_ragdb(texts,file_path) #\u5c07\u6587\u672c list \u8f49\u70ba\u5411\u91cf\u6578\u64da\u4e26\u5b58\u5165\u8cc7\u6599\u5eab\u4e2d\nexit(0) #\u76f4\u63a5\u8df3\u96e2\u7d50\u675f\u7a0b\u5f0f\nif __name__ == \"__main__\":\nwhile True:\nq = input('\u8f38\u5165\u63d0\u554f\uff1a')\n# \u9032\u884c\u4f7f\u7528\u8005\u67e5\u8a62\nrag = rag_result(f'{q}') # \u77e5\u8b58\u5eab\u7684\u6aa2\u7d22\u7d50\u679c\n# \u4f7f\u7528 Ollama \u56de\u7b54\nfrom ollama import chat\nstream = chat(\nmodel='phi4:14b', # \u6a21\u578b\u540d\uff0c\u53ef\u7528 ollama list \u67e5\u8a62\nmessages=[{'role': 'system', 'content': f'\u8acb\u6839\u64da\u63d0\u4f9b\u7684\u8cc7\u6599\u56de\u7b54\uff0c\u76f4\u63a5\u56de\u7b54\u5c31\u597d\u3002\u5982\u679c\u8cc7\u6599\u5eab\u5167\u986f\u793a\u300c\u6c92\u6709\u7b54\u6848\u300d\uff0c\\\n\u6216\u4e0d\u80fd\u78ba\u5b9a\u662f\u7528\u6236\u8981\u7684\u7b54\u6848\uff0c\u5c31\u8aaa\u300c\u62b1\u6b49\uff0c\u6211\u4e0d\u77e5\u9053\u3002\u300d\uff0c\u4e0d\u8981\u81ea\u884c\u63a8\u7406\u3002\u63d0\u4f9b\u7684\u8cc7\u6599:\\n {rag}'},\n{'role': 'user', 'content': f'{q}'}],\nstream=True,\nkeep_alive=1\n)\nfor chunk in stream:\nprint(chunk['message']['content'], end='', flush=True)\nprint('\\n')\n<\/pre>\n<p><strong><span style=\"font-size:14px\">\u6700\u7d42\u6210\u679c<\/span><\/strong><\/p>\n<p>\u63a5\u4e0b\u4f86\u53c8\u5230\u4e86\u898b\u8b49\u5947\u8e5f\u7684\u6642\u523b\u4e86! \u4e0d\u7ba1\u4fee\u4e0d\u4fee\u5f97\u597d\uff0c\u725b\u903c\u7684\u53e3\u865f\u5c11\u4e0d\u4e86\uff01\u5144\u5f1f\u5011\u642c\u8d77\u4f60\u5011\u7684\u5c0f\u677f\u51f3\uff0c\u5750\u7a69\u4e86\uff01\u775c\u958b\u4f60\u5011\u7684\u5361\u59ff\u862d\u5927\u773c\u775b\uff0c\u9192\u4f86\u5427\uff01\u89aa\u611b\u7684\uff5e\u597d&#8230;.\u958b\u6a5f\uff013\u500b\u5927\u98a8\u6247\u55da\u55da\u7684\u8f49\u8d77\u4f86\u4e86&#8230;B2&#8230;99&#8230;\u5c4f\u5e55\u6210\u529f\u9ede\u4eae\uff0c\u83ef\u78a9LOGO\u51fa\u4f86\u4e86\uff01\uff08<a href=\"https:\/\/www.youtube.com\/@fydn\" target=\"_blank\">\u4fee\u96fb\u8166\u7684\u5f35\u54e5<\/a>\uff09<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 005730.png\" height=\"395\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743181105-3167807184-g_l.png\" title=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 005730.png\" width=\"720\"><\/p>\n<p>AI \u8a8d\u70ba\u662f\u72af\u4e86\u5211\u6cd5 203 \u689d\uff0c\u5211\u671f\u6700\u91cd\u662f\u4e00\u5e74\u4ee5\u4e0b\u3002\u8207\u539f\u59cb\u7684\u5211\u6cd5\u6587\u672c\u4f86\u6e90\u5c0d\u7167\u4e00\u4e0b\uff0c\u78ba\u5b9a\u4f86\u81ea\u65bc\u63d0\u4f9b\u7684\u5167\u5bb9\uff1a<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u3010\u78bc\u8fb2\u3011RAG \u61c9\u7528\uff1a\u8b93 AI \u66f4\u807d\u8a71\" height=\"737\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743181432-1557010286-g_l.png\" title=\"\u3010\u78bc\u8fb2\u3011RAG \u61c9\u7528\uff1a\u8b93 AI \u66f4\u807d\u8a71\" width=\"720\"><\/p>\n<p>\u67e5\u4e86\u4e00\u4e0b<a href=\"https:\/\/www.nownews.com\/news\/6664081\" target=\"_blank\">\u65b0\u805e<\/a>\uff0c\u6aa2\u67e5\u5b98\u662f\u4ee5\u507d\u9020\u6587\u66f8\u7f6a\u3001\u52a0\u91cd\u8a50\u6b3a\u7f6a\u8d77\u8a34\uff0c\u800c\u4e14\u5acc\u7591\u72af\u9084\u88ab\u8acb\u5403\u96de\u9d28\u98ef\uff0c\u5211\u671f\u6700\u8f15\u90fd\u662f\u4e00\u5e74\u4ee5\u4e0a\u3002\u53ef\u60f3\u800c\u77e5\u5c46\u6642\u4e0a\u4e86\u6cd5\u5ead\uff0c\u88ab\u544a\u7684\u8faf\u8b77\u5f8b\u5e2b\u4e5f\u6703\u4ee5\u5176\u4ed6\u6cd5\u689d\u722d\u53d6\u6e1b\u5211\u6216\u812b\u7f6a\uff0c\u5230\u5e95\u72af\u4e86\u54ea\u4e00\u689d\u6cd5\u5f8b\u53ef\u80fd\u4e5f\u4e0d\u4e00\u5b9a\uff0c\u6240\u4ee5 AI \u7684\u7406\u7531\u770b\u770b\u5c31\u597d\u3002<\/p>\n<p>\u554f AI \u9019\u5169\u689d\u6cd5\u5f8b\uff0cAI \u4e5f\u7b54\u5f97\u51fa\u4f86\uff0c\u6240\u5f15\u7528\u7684\u689d\u6587\u4e5f\u662f\u6211\u5011\u63d0\u4f9b\u7684\u3002\u81f3\u65bc\u6709\u6c92\u6709\u7b54\u5c0d\uff0c\u6211\u500b\u4eba\u5df2\u7d93\u7121\u6cd5\u5206\u8fa8\uff0c\u5f97\u554f\u5c08\u696d\u6cd5\u5f8b\u4eba\u58eb\u3002<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 012240.png\" height=\"537\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743182760-4085794205-g.png\" title=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 012240.png\" width=\"720\"><\/p>\n<p>\u518d\u96a8\u4fbf\u554f\u5e7e\u500b\u554f\u984c\uff1a<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 011339.png\" height=\"556\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743182029-2593876467-g_l.png\" title=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 011339.png\" width=\"720\"><\/p>\n<p>\u4e0a\u9762\u7684\u63d0\u554f\u4e2d\u523b\u610f\u4ee5\u60c5\u5883\u63cf\u8ff0\u5c0f\u660e\u6216\u5c0f\u83ef\u7684\u5be6\u969b\u884c\u70ba\uff0c\u800c\u975e\u76f4\u63a5\u6d89\u53ca\u6cd5\u689d\u5167\u5bb9\uff0c\u85c9\u6b64\u89c0\u5bdf\u662f\u5426\u8207\u7b54\u6848\u7522\u751f\u95dc\u9023\uff0c\u7d50\u679c\u770b\u8d77\u4f86\u662f\u6709\u7684\u3002\u8981\u5f37\u8abf\u7684\u662f\uff0c\u6cd5\u5f8b\u554f\u984c\u9084\u662f\u5f97\u8acb\u771f\u6b63\u7684\u5c08\u5bb6\uff0c\u6240\u4ee5 AI \u7684\u610f\u898b\u770b\u770b\u5c31\u597d\u3002\u5176\u4e2d\u6709\u4e9b\u770b\u5f97\u51fa\u4f3c\u4e4e\u6709\u9ede\u9053\u7406\u4f46\u53c8\u602a\u602a\u7684\uff0c\u9019\u662f\u56e0\u70ba LLM&nbsp;\u662f\u67e5\u8a62\u5f97\u5230\u300c\u6709\u9650\u7684\u7d50\u679c\u300d\u518d\u9032\u884c\u8a9e\u610f\u63a8\u7406\uff0c\u56e0\u6b64\u771f\u6b63\u95dc\u9375\u5728\u65bc RAG \u7d22\u5f15\u8cc7\u6599\u5eab\u80fd\u5426\u6839\u64da\u63d0\u554f\u5167\u5bb9\uff0c\u67e5\u8a62\u547d\u4e2d\u66f4\u7cbe\u78ba\u7684\u7b54\u6848\u3002<\/p>\n<p>\u65e2\u7136\u8981\u73a9\u5c31\u73a9\u5927\u4e00\u9ede\u7684\u3002\u518d\u52a0\u5165<a href=\"https:\/\/law.moj.gov.tw\/LawClass\/LawAll.aspx?pcode=B0000001\" target=\"_blank\">\u6c11\u6cd5<\/a>\u8207<a href=\"https:\/\/law.moj.gov.tw\/LawClass\/LawAll.aspx?pcode=K0040012\" target=\"_blank\">\u9053\u8def\u4ea4\u901a\u7ba1\u7406\u8655\u7f70\u689d\u4f8b<\/a>\uff0c\u9023\u540c\u5211\u6cd5\u5168\u90e8\u52a0\u8d77\u4f86\u8d85\u904e\u5169\u5343\u689d\u3001\u6578\u842c\u5b57\uff0c\u51fa\u5e7e\u984c\u770b\u770b AI \u600e\u9ebc\u56de\u7b54\uff1a<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 015311.png\" height=\"466\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743184406-3922116609-g_l.png\" title=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 015311.png\" width=\"720\"><\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 015811.png\" height=\"531\" src=\"https:\/\/pic.pimg.tw\/benjenq\/1743184701-3668598014-g.png\" title=\"\u87a2\u5e55\u64f7\u53d6\u756b\u9762 2025-03-29 015811.png\" width=\"720\"><\/p>\n<p>\u770b\u5f97\u51fa AI \u56de\u7b54\u5167\u5bb9\u6709\u6db5\u84cb\u5169\u90e8\u4ee5\u4e0a\u7684\u6cd5\u5f8b\uff08\u9152\u99d5\u6d89\u53ca\u5211\u6cd5\u8207\u4ea4\u901a\u8655\u7f70\u689d\u4f8b\uff09\u3002\u81f3\u65bc AI \u7684\u56de\u7b54\u662f\u5426\u6b63\u78ba\uff0c\u8001\u5be6\u8aaa\uff0c\u6709\u4e9b\u7b54\u6848\u770b\u8d77\u4f86\u6709\u9ede\u727d\u5f37\uff08\u53ef\u80fd\u662f\u56e0\u70ba\u6c92\u80fd\u627e\u5230\u6700\u5408\u9069\u7684\u8cc7\u6599\uff0c\u5c0e\u81f4 LLM \u63a8\u7406\u51fa\u4e00\u4e9b\u4e0d\u592a\u5408\u7406\u7684\u7b54\u6848\uff09\uff0c\u4f46\u4e5f\u6709\u4e9b\u7b54\u6848\u7684\u908f\u8f2f\u4f3c\u4e4e\u9817\u6709\u9053\u7406\u3002\u518d\u6b21\u5f37\u8abf\uff0c\u6cd5\u5f8b\u9084\u662f\u61c9\u8a72\u4ea4\u7d66\u5c08\u696d\u4eba\u58eb\u4f86\u5224\u65b7\uff0c\u6848\u4f8b\u4e5f\u4e0d\u4e00\u5b9a\u6709\u6b63\u78ba\u7b54\u6848\uff08\u4e0d\u7136\u5169\u9020\u5c31\u4e0d\u7528\u6253\u5b98\u53f8\u4e86\uff09\uff0c\u4f46\u53ef\u4ee5\u78ba\u5b9a\u7684\u662f\uff0cAI \u6240\u5f15\u7528\u7684\u6cd5\u5f8b\u689d\u6587\uff0c\u78ba\u5be6\u9075\u5b88\u5f9e\u6211\u5011\u63d0\u4f9b\u7684\u8cc7\u6599\u627e\u7b54\u6848\u7684\u6307\u4ee4\u3002<\/p>\n<p><span style=\"font-size:14px\"><strong>\u7d50\u8ad6\u8207\u5fc3\u5f97\uff1a<\/strong><\/span><\/p>\n<p data-end=\"286\" data-start=\"25\">RAG \u65b9\u6848\u662f\u6211\u7b2c\u4e00\u6b21\u5728\u91cd\u5ea6\u4f9d\u8cf4\u514d\u8cbb ChatGPT \u7684\u9805\u76ee\u4e0a\u9032\u884c\u5617\u8a66\u3002\u904e\u53bb\u9047\u5230\u554f\u984c\u6642\uff0c\u6211\u7fd2\u6163\u76f4\u63a5 Google \u641c\u5c0b\u7b54\u6848\uff0c\u7136\u800c\u9019\u6b21\u5982\u958b\u6587\u6240\u8a00\uff0c\u7db2\u8def\u6559\u5b78\u5728\u532f\u5165\u5927\u8cc7\u6599\u5eab\u4e4b\u5f8c\u5c31\u5931\u6548\u7ffb\u8eca\u4e86\uff0c\u6240\u4ee5\u9019\u6b21\u6211\u60f3\u770b\u770b ChatGPT \u80fd\u5e36\u6211\u8d70\u5230\u54ea\u88e1\uff0c\u9664\u975e\u5b83\u5361\u4f4f\uff0c\u7e3d\u662f\u5728\u76f8\u540c\u554f\u984c\u4e0a\u7e5e\u5708\u5708\uff0c\u6211\u624d\u6703\u4e0a\u7db2\u641c\u5c0b\u65b0\u7684\u89e3\u6cd5\u3002\u4e0a\u8ff0\u5167\u5bb9\u662f\u7d93\u904e\u5e7e\u9031\u7684\u53cd\u8986\u6e2c\u8a66\u5f8c\uff0c\u6240\u5f97\u5230\u7684\u6700\u4f73\u7d50\u679c\uff0c\u904e\u7a0b\u4e2d\u78ba\u5be6\u8e29\u4e86\u4e0d\u5c11\u5751\u3002\u5f9e\u8cc7\u6599\u5207\u5272\u65b9\u6cd5\u5230\u65b9\u6848\u9078\u64c7\uff0c\u6700\u5f8c\u5728 chromadb \u65b9\u6848\u4e2d\u9078\u64c7 embedding \u6a21\u578b\u6642\uff0cChatGPT \u63a8\u85a6\u4e86\u958b\u6e90\u7684 multilingual-e5-large\uff08<a href=\"https:\/\/ihower.tw\/blog\/archives\/12167\" target=\"_blank\">\u5728\u7e41\u4e2d embedding \u6392\u540d\u4e2d\u4e0d\u932f<\/a>\uff09\uff0c\u6700\u7d42\u5f97\u5230\u4e86\u76f8\u5c0d\u6eff\u610f\u7684\u7d50\u679c\u3002\u6b64\u5916\uff0c<a href=\"https:\/\/ollama.com\/\" target=\"_blank\">ollama<\/a> \u4e5f\u652f\u63f4 <a href=\"https:\/\/ollama.com\/search?c=embedding\" target=\"_blank\">embeddings \u6a21\u578b<\/a>\uff08<a href=\"https:\/\/ollama.com\/blog\/embedding-models\" target=\"_blank\">\u8aaa\u660e<\/a>\uff09\uff0c\u5be6\u6e2c\u642d\u914d&nbsp;<a href=\"https:\/\/ollama.com\/library\/bge-m3\" target=\"_blank\">bge-m3<\/a>&nbsp;\u5c0d\u4e2d\u6587\u7684\u6548\u679c\u4e5f\u4e0d\u932f\u3002<\/p>\n<p data-end=\"615\" data-start=\"288\">\u4e0d\u904e\uff0c\u9019\u4e00\u5207\u624d\u525b\u525b\u958b\u59cb\uff0c\u4ecd\u6709\u8a31\u591a\u5730\u65b9\u53ef\u4ee5\u9032\u884c\u512a\u5316\u3002\u8209\u4f8b\u4f86\u8aaa\uff0c\u7576\u6211\u8a62\u554f\u300c\u884c\u99db\u4eba\u884c\u9053\u7f70\u591a\u5c11\u300d\u6642\uff0c\u7e3d\u662f\u5f97\u5230\u932f\u8aa4\u7684\u7b54\u6848\u3002\u6b63\u78ba\u7684\u7b54\u6848\u61c9\u8a72\u662f\u300c<a href=\"https:\/\/law.moj.gov.tw\/LawClass\/LawAll.aspx?pcode=K0040012\" target=\"_blank\">\u9053\u8def\u4ea4\u901a\u7ba1\u7406\u8655\u7f70\u689d\u4f8b<\/a>&nbsp;\u7b2c 45 \u689d\u7b2c 1 \u9805\u7b2c\u516d\u6b3e\u300d\uff0c\u8abf\u67e5\u767c\u73fe\uff0c\u7b2c 45 \u689d\u7684\u5167\u5bb9\u975e\u5e38\u5197\u9577\uff0c\u8d85\u51fa\u4e86 multilingual-e5-large \u7684\u7a97\u53e3\u9577\u5ea6\uff08512 tokens\uff09\uff0c\u56e0\u6b64\u7121\u8ad6\u5982\u4f55\u90fd\u7121\u6cd5\u627e\u5230\u6b63\u78ba\u7b54\u6848\u3002\u7576\u6211\u624b\u52d5\u5c07\u7b2c 45 \u689d\u5207\u5272\u6210\u66f4\u5c0f\u7684\u6bb5\u843d\uff0c\u518d\u88dc\u5145\u76f8\u95dc\u689d\u6587\u9032\u884c embedding \u7a0b\u5e8f\uff0c\u554f\u984c\u6700\u7d42\u89e3\u6c7a\u4e86\u3002\u5f9e\u89c0\u5bdf\u4f86\u770b\uff0c\u50cf\u662f\u8f03\u9ad8\u5c64\u6b21\u7684\u6bcd\u6cd5\uff08\u5982\u6c11\u6cd5\u3001\u5211\u6cd5\u3001\u61b2\u6cd5\uff09\u8f03\u5c11\u9047\u5230\u9019\u7a2e\u60c5\u6cc1\uff0c\u53cd\u800c\u662f\u5176\u4ed6\u689d\u4f8b\u6216\u8fa6\u6cd5\u4e2d\uff0c\u689d\u6587\u5167\u5bb9\u904e\u9577\u7684\u60c5\u6cc1\u8f03\u591a\u3002\u4ee5\u53ca\u6bcf\u4e00\u7a2e\u6cd5\u5f8b\u7684\u6587\u4ef6\u67b6\u69cb\u90fd\u4e0d\u592a\u4e00\u6a23\uff08\u7de8-\u7ae0-\u7bc0-\u6b3e-\u689d\uff09\uff0c\u56e0\u6b64\uff0c\u8cc7\u6599\u5207\u5272\u65b9\u6cd5\u4ecd\u6709\u9032\u4e00\u6b65\u512a\u5316\u7684\u7a7a\u9593\u3002\u7576\u7136\u4e86\uff0c\u6cd5\u898f\u9019\u7a2e\u5bb9\u6613\u5206\u5272\u7684\u77e5\u8b58\u4f86\u7df4\u7fd2 RAG \u662f\u76f8\u7576\u7406\u60f3\u7684\u72c0\u6cc1\uff0c\u73fe\u5be6\u4e2d\u5404\u7a2e\u77e5\u8b58\u6587\u4ef6\u8a72\u5982\u4f55\u5206\u5272\u5c31\u662f\u4e00\u9580\u5b78\u554f\uff0c\u5426\u5247\u53ea\u80fd\u7b49\u5f85<a href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\" target=\"_blank\">\u652f\u63f4\u66f4\u591a tokens<\/a> \u7684\u7e41\u4e2d embedding \u6a21\u578b\u51fa\u73fe\u3002<\/p>\n<p data-end=\"728\" data-start=\"617\">\u96a8\u8457 AI \u79d1\u6280\u7684\u767c\u5c55\uff0cLLM \u548c embedding \u6280\u8853\u4e0d\u65b7\u9032\u6b65\uff0c\u6216\u8a31\u5c07\u4f86\u5c31\u80fd\u628a\u6574\u5957\u516d\u6cd5\u5168\u66f8\u7d0d\u5165\u5176\u4e2d\uff0c\u5c46\u6642\u9019\u4e9b\u7a0b\u5f0f\u78bc\u5c31\u662f\u59a5\u59a5\u7684\u514d\u8cbb AI \u6cd5\u5f8b\u9867\u554f\uff0c\u96e3\u9053\u5f8b\u5e2b\u4e5f\u8981\u8ddf\u8457\u5931\u696d\u4e86\u55ce\uff1f\u55ef\uff5e\u6709\u5922\u6700\u7f8e\uff0c\u5e0c\u671b\u76f8\u96a8&#8230;<\/p>\n<p data-end=\"728\" data-start=\"617\">\uff08\u5ef6\u4f38\u95b1\u8b80\uff1a\u7a81\u7834 RAG \u74f6\u9838\uff01<a href=\"https:\/\/buzzorange.com\/techorange\/2025\/04\/01\/beyond-rag-search-r1-ai\/\" target=\"_blank\">Search-R1 \u76f4\u63a5\u628a\u300c\u641c\u5c0b\u5f15\u64ce\u300d\u6574\u5408\u5230\u63a8\u7406\u6a21\u578b\u4e2d<\/a>\uff09<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u4e0a\u4e00\u7bc7\u6587\u7ae0\u4e2d\u4ecb\u7d39\u4e86\u5982\u4f55\u5229\u7528 Telegram \u548c Ollama \u6253\u9020\u4e00\u500b\u4f4e\u6210\u672c\u7684 AI \u804a\u5929\u6a5f\u5668\u4eba\u3002\u96d6\u7136\u4f7f\u7528 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-container-style":"default","site-container-layout":"default","site-sidebar-layout":"default","disable-article-header":"default","disable-site-header":"default","disable-site-footer":"default","disable-content-area-spacing":"default","footnotes":""},"categories":[56],"tags":[],"class_list":["post-3880","post","type-post","status-publish","format-standard","hentry","category-56"],"_links":{"self":[{"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/posts\/3880","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/comments?post=3880"}],"version-history":[{"count":0,"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/posts\/3880\/revisions"}],"wp:attachment":[{"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/media?parent=3880"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/categories?post=3880"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/benjenq.ddns.net\/blog\/wp-json\/wp\/v2\/tags?post=3880"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}