{"id":6355,"date":"2024-05-22T19:54:26","date_gmt":"2024-05-22T11:54:26","guid":{"rendered":"https:\/\/aict.nkust.edu.tw\/digitrans\/?p=6355"},"modified":"2024-12-13T20:42:50","modified_gmt":"2024-12-13T12:42:50","slug":"%e5%bf%ab%e9%80%9f%e5%b8%b6%e4%bd%a0%e7%9c%8b-phi-3-vision-%e5%be%ae%e8%bb%9f%e6%89%80%e5%87%ba%e7%9a%84%e5%a4%9a%e6%a8%a1%e6%85%8b%e5%b0%8f%e5%9e%8b%e6%96%87%e5%ad%97%e5%9c%96%e5%83%8f","status":"publish","type":"post","link":"https:\/\/aict.nkust.edu.tw\/digitrans\/?p=6355","title":{"rendered":"[\u5feb\u901f\u5e36\u4f60\u770b] Phi-3 Vision \u2014 \u5fae\u8edf\u6240\u51fa\u7684\u591a\u6a21\u614b\u5c0f\u578b\u6587\u5b57\u5716\u50cf\u958b\u6e90\u6a21\u578b"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">2024-05-22 | Simon Liu<\/p>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"fbd2\">\u7c21\u4ecb<\/h1>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"162c\">Phi-3 Vision-128K-Instruct \u662f\u7531 Microsoft \u958b\u767c\u7684\u591a\u6a21\u614b\u6a21\u578b\uff0c\u65bc\u4eca\uff085\u670822\u65e5\uff09\u6b63\u5f0f\u5728 Huggingface \u4e0a\u958b\u6e90\u3002\u9019\u500b\u6a21\u578b\u80fd\u540c\u6642\u8655\u7406\u6587\u672c\u548c\u5716\u50cf\u6578\u64da\uff0c\u64c1\u6709 128K \u7684\u4e0a\u4e0b\u6587\u9577\u5ea6\uff0c\u9069\u5408\u9ad8\u54c1\u8cea\u3001\u6df1\u5165\u63a8\u7406\u7684\u6578\u64da\u8655\u7406\u4efb\u52d9\u3002\u5ee3\u6cdb\u61c9\u7528\u65bc\u5716\u50cf\u7406\u89e3\u3001\u5149\u5b78\u5b57\u7b26\u8b58\u5225\uff08OCR\uff09\u3001\u5716\u8868\u548c\u8868\u683c\u89e3\u6790\u7b49\u9818\u57df\u3002<\/p>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"c279\">\u4e3b\u8981\u529f\u80fd<\/h1>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u591a\u6a21\u614b\u8655\u7406<\/strong>\uff1a\u540c\u6642\u652f\u6301\u6587\u672c\u548c\u5716\u50cf\u8f38\u5165\uff0c\u9069\u5408\u8cc7\u6e90\u6709\u9650\u7684\u74b0\u5883\u4e2d\u9ad8\u6548\u904b\u884c\u3002<\/li>\n\n\n\n<li><strong>\u4f4e\u5ef6\u9072\u5834\u666f<\/strong>\uff1a\u9069\u7528\u65bc\u9700\u8981\u5feb\u901f\u53cd\u61c9\u7684\u61c9\u7528\u3002<\/li>\n\n\n\n<li><strong>\u5716\u50cf\u7406\u89e3<\/strong>\uff1a\u64c1\u6709\u5f37\u5927\u7684\u5716\u50cf\u89e3\u6790\u80fd\u529b\uff0c\u53ef\u8655\u7406\u5404\u7a2e\u5716\u8868\u548c\u8868\u683c\u3002<\/li>\n<\/ol>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"3a59\">\u6a21\u578b\u67b6\u69cb<\/h1>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"1b66\">Phi-3 Vision-128K-Instruct \u5305\u542b 4.2 \u5104\u500b\u53c3\u6578\uff0c\u7531\u5716\u50cf\u7de8\u78bc\u5668\u3001\u9023\u63a5\u5668\u3001\u6295\u5f71\u5668\u548c Phi-3 Mini \u8a9e\u8a00\u6a21\u578b\u7d44\u6210\u3002\u8a72\u6a21\u578b\u4f7f\u7528\u591a\u9054 5000 \u5104 token \u7684\u591a\u7a2e\u985e\u578b\u5716\u7247\u53ca\u6587\u5b57\u8cc7\u6599\u9032\u884c\u8a13\u7df4\uff0c\u5305\u62ec\u56b4\u9078\u516c\u958b\u5167\u5bb9\u3001\u9ad8\u54c1\u8cea\u6559\u80b2\u8cc7\u6599\u8207\u7a0b\u5f0f\u78bc\u3001\u9ad8\u54c1\u8cea\u7684\u5716\u6587\u6574\u5408\u8cc7\u6599\u3001\u65b0\u7684\u300c\u6559\u79d1\u66f8\u7b49\u7d1a\u300d\u5408\u6210\u8cc7\u6599\u53ca\u5716\u8868\u5716\u7247\uff0c\u9084\u6709\u9ad8\u54c1\u8cea\u7684\u76e3\u7763\u5f0f\u804a\u5929\u683c\u5f0f\u8cc7\u6599\uff0c\u6db5\u84cb\u9075\u5f9e\u6307\u4ee4\u3001\u771f\u5be6\u3001\u8aa0\u5be6\u548c\u52a9\u76ca\u7b49\u4e3b\u984c\u3002\u8cc7\u6599\u8490\u96c6\u904e\u7a0b\u4e2d\u5df2\u7be9\u9078\u6389\u5305\u542b\u500b\u8cc7\u7684\u8cc7\u6599\uff0c\u4ee5\u78ba\u4fdd\u96b1\u79c1\u3002<\/p>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"0b93\">\u8ca0\u8cac\u4efb\u7684 AI \u8003\u91cf<\/h1>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"a7ae\">\u4f7f\u7528\u6a21\u578b\u6642\u61c9\u9075\u5b88\u6cd5\u5f8b\u6cd5\u898f\uff0c\u4e26\u5728\u9ad8\u98a8\u96aa\u5834\u666f\u4e2d\u9032\u884c\u5b89\u5168\u8a55\u4f30\u3002\u5efa\u8b70\u5be6\u65bd\u900f\u660e\u5ea6\u6700\u4f73\u5be6\u8e10\u4e26\u5efa\u7acb\u53cd\u994b\u6a5f\u5236\u3002<\/p>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"69a4\">\u4f7f\u7528\u65b9\u6cd5<\/h1>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"25b6\">\u900f\u904e&nbsp;<code>transformers<\/code>&nbsp;\u5eab\u4e2d\u7684&nbsp;<code>AutoModelForCausalLM<\/code>&nbsp;\u548c&nbsp;<code>AutoProcessor<\/code>&nbsp;\u985e\u52a0\u8f09\u548c\u904b\u884c\u6a21\u578b\uff0c\u4ee5\u4e0b\u70ba\u7bc4\u4f8b\u4ee3\u78bc\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">from PIL import Image<br>import requests<br>from transformers import AutoModelForCausalLM, AutoProcessor<br><br>model_id = \"microsoft\/Phi-3-vision-128k-instruct\"<br>model = AutoModelForCausalLM.from_pretrained(model_id, device_map=\"cuda\", trust_remote_code=True, torch_dtype=\"auto\")<br>processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)<br><br>messages = [<br>    {\"role\": \"user\", \"content\": \"&lt;|image_1|&gt;\\nWhat is shown in this image?\"},<br>    {\"role\": \"assistant\", \"content\": \"This chart shows the percentage of respondents agreeing with various statements about meeting preparedness.\"},<br>]<br><br>url = \"https:\/\/example.com\/image.png\"<br>image = Image.open(requests.get(url, stream=True).raw)<br>prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)<br>inputs = processor(prompt, [image], return_tensors=\"pt\").to(\"cuda:0\")<br><br>generation_args = {\"max_new_tokens\": 500, \"temperature\": 0.0, \"do_sample\": False}<br>generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args)<br>generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]<br>response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]<br><br>print(response)<\/pre>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"2d45\">\u8cc7\u6e90\u8207\u6280\u8853\u6587\u6a94<\/h1>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"db1f\">\u66f4\u591a\u8a73\u60c5\u53ca\u6280\u8853\u6587\u6a94\u8acb\u53c3\u8003\u4ee5\u4e0b\u8cc7\u6e90\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Phi-3 Microsoft Blog<\/li>\n\n\n\n<li>Phi-3 Technical Report<\/li>\n\n\n\n<li>Phi-3 on Azure AI Studio<\/li>\n\n\n\n<li>Phi-3 Cookbook<\/li>\n<\/ul>\n\n\n\n<h1 class=\"wp-block-heading\" id=\"f288\">\u7d50\u8ad6<\/h1>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"16df\">Phi-3 Vision-128K-Instruct \u662f\u4e00\u6b3e\u8f15\u91cf\u7d1a\u3001\u591a\u6a21\u614b\u6a21\u578b\uff0c\u5177\u5099\u9ad8\u9054 128K \u7684\u4e0a\u4e0b\u6587\u9577\u5ea6\u3002\u8a72\u6a21\u578b\u57fa\u65bc\u7d9c\u5408\u7684\u6587\u672c\u8207\u5716\u50cf\u6578\u64da\u9032\u884c\u8a13\u7df4\uff0c\u5c08\u6ce8\u65bc\u9ad8\u8cea\u91cf\u548c\u9ad8\u63a8\u7406\u5bc6\u5ea6\u7684\u6578\u64da\u3002\u9069\u7528\u65bc\u5ee3\u6cdb\u7684\u5546\u696d\u548c\u7814\u7a76\u7528\u9014\uff0c\u5c24\u5176\u662f\u5728\u8a08\u7b97\u8cc7\u6e90\u6709\u9650\u548c\u5ef6\u9072\u654f\u611f\u7684\u74b0\u5883\u4e2d\u3002Phi-3 Vision-128K-Instruct \u6a21\u578b\u5177\u6709\u5f37\u5927\u7684\u5716\u50cf\u7406\u89e3\u548c\u5149\u5b78\u5b57\u7b26\u8b58\u5225 (OCR) \u80fd\u529b\uff0c\u4e26\u63d0\u4f9b\u4e86\u5b89\u5168\u548c\u8cac\u4efbAI\u7684\u8003\u91cf\u3002\u8a72\u6a21\u578b\u4f7f\u7528\u65b9\u4fbf\uff0c\u4e26\u4e14\u5728\u5404\u7a2e\u96f6\u6a23\u672c\u57fa\u6e96\u6e2c\u8a66\u4e2d\u8868\u73fe\u512a\u7570\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"1d30\">\u66f4\u591a\u8a73\u60c5\u8acb\u53c3\u8003\u00a0Phi-3 Vision-128K-Instruct<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u8cc7\u6599\u4f86\u6e90:<a href=\"https:\/\/medium.com\/@simon3458\/phi-3-vision-brief-introduction-de97639d4eb8\" data-type=\"link\" data-id=\"https:\/\/medium.com\/@simon3458\/phi-3-vision-brief-introduction-de97639d4eb8\">https:\/\/medium.com\/@simon3458\/phi-3-vision-brief-introduction-de97639d4eb8<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>2024-05-22 | Simon Liu \u7c21\u4ecb Phi-3 Vision-128K-Instruct \u662f\u7531 Microsoft \u958b\u767c\u7684\u591a\u6a21\u614b\u6a21\u578b\uff0c\u65bc\u4eca\uff085\u670822\u65e5\uff09\u6b63\u5f0f\u5728 Hu&hellip;<\/p>\n","protected":false},"author":9,"featured_media":6356,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_post_was_ever_published":false},"categories":[579,4],"tags":[26,40],"class_list":["post-6355","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-579","category-industry-news","tag-ai","tag-40"],"gutentor_comment":0,"jetpack_featured_media_url":"https:\/\/i0.wp.com\/aict.nkust.edu.tw\/digitrans\/wp-content\/uploads\/2024\/07\/1_WlghnohYEjLfR1pIyLHpgg.webp?fit=1100%2C620&ssl=1","jetpack-related-posts":[],"jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/posts\/6355","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/users\/9"}],"replies":[{"embeddable":true,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=6355"}],"version-history":[{"count":1,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/posts\/6355\/revisions"}],"predecessor-version":[{"id":6357,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/posts\/6355\/revisions\/6357"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=\/wp\/v2\/media\/6356"}],"wp:attachment":[{"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=6355"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=6355"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/aict.nkust.edu.tw\/digitrans\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=6355"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}