Prechádzať zdrojové kódy

:memo: 更新 OCR 注意事项

Liang Ding 2 rokov pred
rodič
commit
f2004f2b07

+ 97 - 20
app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20210808180303-xaduj2o/20200924100744-br924ar.sy

@@ -6,7 +6,7 @@
 		"id": "20200924100744-br924ar",
 		"title": "Assets",
 		"type": "doc",
-		"updated": "20230117003842"
+		"updated": "20230202231916"
 	},
 	"Children": [
 		{
@@ -696,33 +696,110 @@
 				{
 					"Type": "NodeText",
 					"Data": " program needs to be manually installed first. "
-				},
-				{
-					"Type": "NodeTextMark",
-					"TextMarkType": "tag",
-					"TextMarkTextContent": "Note"
-				},
+				}
+			]
+		},
+		{
+			"ID": "20230202231728-0z5bs0m",
+			"Type": "NodeParagraph",
+			"Properties": {
+				"id": "20230202231728-0z5bs0m",
+				"updated": "20230202231731"
+			},
+			"Children": [
 				{
 					"Type": "NodeText",
-					"Data": "​ that you need to check the Chinese language pack when installing Tesseract OCR, and add the installed Tesseract-OCR directory path to the environment variable PATH, so that SiYuan can directly call the "
+					"Data": "​"
 				},
 				{
 					"Type": "NodeTextMark",
-					"TextMarkType": "code",
-					"TextMarkTextContent": "tesseract"
-				},
-				{
-					"Type": "NodeText",
-					"Data": "​ command to extract text from image OCR. If the installation is normal, you can search for "
-				},
+					"TextMarkType": "tag",
+					"TextMarkTextContent": "Note:"
+				}
+			]
+		},
+		{
+			"ID": "20230202231731-bdh7lab",
+			"Type": "NodeList",
+			"ListData": {},
+			"Properties": {
+				"id": "20230202231731-bdh7lab",
+				"updated": "20230202231916"
+			},
+			"Children": [
 				{
-					"Type": "NodeTextMark",
-					"TextMarkType": "code",
-					"TextMarkTextContent": "tesseract-ocr enabled"
+					"ID": "20230202231732-n7z8jth",
+					"Type": "NodeListItem",
+					"ListData": {
+						"BulletChar": 42,
+						"Marker": "Kg=="
+					},
+					"Properties": {
+						"id": "20230202231732-n7z8jth",
+						"updated": "20230202231916"
+					},
+					"Children": [
+						{
+							"ID": "20230202231732-f3jkj7p",
+							"Type": "NodeParagraph",
+							"Properties": {
+								"id": "20230202231732-f3jkj7p",
+								"updated": "20230202231916"
+							},
+							"Children": [
+								{
+									"Type": "NodeText",
+									"Data": "When installing Tesseract OCR, you need to check the language pack you need, and add the installed Tesseract-OCR directory path to the environment variable PATH, so that SiYuan can directly call the "
+								},
+								{
+									"Type": "NodeTextMark",
+									"TextMarkType": "code",
+									"TextMarkTextContent": "tesseract"
+								},
+								{
+									"Type": "NodeText",
+									"Data": "​ command to extract text from image OCR. If the installation is normal, you can search for "
+								},
+								{
+									"Type": "NodeTextMark",
+									"TextMarkType": "code",
+									"TextMarkTextContent": "tesseract-ocr enabled"
+								},
+								{
+									"Type": "NodeText",
+									"Data": "​ in the kernel boot log"
+								}
+							]
+						}
+					]
 				},
 				{
-					"Type": "NodeText",
-					"Data": "​ in the kernel boot log."
+					"ID": "20230202231800-z8hswmk",
+					"Type": "NodeListItem",
+					"ListData": {
+						"BulletChar": 42,
+						"Marker": "Kg=="
+					},
+					"Properties": {
+						"id": "20230202231800-z8hswmk",
+						"updated": "20230202231819"
+					},
+					"Children": [
+						{
+							"ID": "20230202231800-c3x45ky",
+							"Type": "NodeParagraph",
+							"Properties": {
+								"id": "20230202231800-c3x45ky",
+								"updated": "20230202231819"
+							},
+							"Children": [
+								{
+									"Type": "NodeText",
+									"Data": "Do not install too many language packs, otherwise it will cause OCR to be slow or even timeout to return empty results, and take up too many system resources"
+								}
+							]
+						}
+					]
 				}
 			]
 		},

+ 98 - 17
app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20210808180321-hbvl5c2/20200915214115-42b8zma.sy

@@ -6,7 +6,7 @@
 		"id": "20200915214115-42b8zma",
 		"title": "资源文件",
 		"type": "doc",
-		"updated": "20230117003750"
+		"updated": "20230202231842"
 	},
 	"Children": [
 		{
@@ -718,33 +718,114 @@
 				{
 					"Type": "NodeText",
 					"Data": " 程序。"
-				},
-				{
-					"Type": "NodeTextMark",
-					"TextMarkType": "tag",
-					"TextMarkTextContent": "注意"
-				},
+				}
+			]
+		},
+		{
+			"ID": "20230202231304-22lvszc",
+			"Type": "NodeParagraph",
+			"Properties": {
+				"id": "20230202231304-22lvszc",
+				"updated": "20230202231309"
+			},
+			"Children": [
 				{
 					"Type": "NodeText",
-					"Data": "​在安装 Tesseract OCR 时需要勾选中文语言包,并将安装后的 Tesseract-OCR 目录路径添加到环境变量 PATH 中,这样思源才能直接调用 "
+					"Data": "​"
 				},
 				{
 					"Type": "NodeTextMark",
-					"TextMarkType": "code",
-					"TextMarkTextContent": "tesseract"
+					"TextMarkType": "tag",
+					"TextMarkTextContent": "注意"
 				},
 				{
 					"Type": "NodeText",
-					"Data": "​ 命令进行图片 OCR 提取文本。如果安装正常的话,在内核启动日志中可以搜索到 "
-				},
+					"Data": "​:"
+				}
+			]
+		},
+		{
+			"ID": "20230202231309-pcjl7c2",
+			"Type": "NodeList",
+			"ListData": {},
+			"Properties": {
+				"id": "20230202231309-pcjl7c2",
+				"updated": "20230202231842"
+			},
+			"Children": [
 				{
-					"Type": "NodeTextMark",
-					"TextMarkType": "code",
-					"TextMarkTextContent": "tesseract-ocr enabled"
+					"ID": "20230202231311-7qdk1za",
+					"Type": "NodeListItem",
+					"ListData": {
+						"BulletChar": 42,
+						"Marker": "Kg=="
+					},
+					"Properties": {
+						"id": "20230202231311-7qdk1za",
+						"updated": "20230202231842"
+					},
+					"Children": [
+						{
+							"ID": "20230202231311-n1pf7in",
+							"Type": "NodeParagraph",
+							"Properties": {
+								"id": "20230202231311-n1pf7in",
+								"updated": "20230202231842"
+							},
+							"Children": [
+								{
+									"Type": "NodeText",
+									"Data": "在安装 Tesseract OCR 时需要勾选你需要的语言包,并将安装后的 Tesseract-OCR 目录路径添加到环境变量 PATH 中,这样思源才能直接调用 "
+								},
+								{
+									"Type": "NodeTextMark",
+									"TextMarkType": "code",
+									"TextMarkTextContent": "tesseract"
+								},
+								{
+									"Type": "NodeText",
+									"Data": "​ 命令进行图片 OCR 提取文本。如果安装正常的话,在内核启动日志中可以搜索到 "
+								},
+								{
+									"Type": "NodeTextMark",
+									"TextMarkType": "code",
+									"TextMarkTextContent": "tesseract-ocr enabled"
+								},
+								{
+									"Type": "NodeText",
+									"Data": "​"
+								}
+							]
+						}
+					]
 				},
 				{
-					"Type": "NodeText",
-					"Data": "​。"
+					"ID": "20230202231321-q1b1tza",
+					"Type": "NodeListItem",
+					"ListData": {
+						"BulletChar": 42,
+						"Marker": "Kg=="
+					},
+					"Properties": {
+						"id": "20230202231321-q1b1tza",
+						"updated": "20230202231443"
+					},
+					"Children": [
+						{
+							"ID": "20230202231321-5ugmgf0",
+							"Type": "NodeParagraph",
+							"Properties": {
+								"id": "20230202231321-5ugmgf0",
+								"updated": "20230202231443"
+							},
+							"Children": [
+								{
+									"Type": "NodeText",
+									"Data": "语言包不要安装太多,否则会导致 OCR 缓慢甚至超时返回空结果,并且占用过多的系统资源"
+								}
+							]
+						}
+					]
 				}
 			]
 		},

+ 99 - 18
app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20211226121203-rjjngpz/20211226123038-4umgpxy.sy

@@ -5,7 +5,7 @@
 	"Properties": {
 		"id": "20211226123038-4umgpxy",
 		"title": "資料文件",
-		"updated": "20230117003908"
+		"updated": "20230202231927"
 	},
 	"Children": [
 		{
@@ -696,34 +696,115 @@
 				},
 				{
 					"Type": "NodeText",
-					"Data": " 程序。​"
-				},
-				{
-					"Type": "NodeTextMark",
-					"TextMarkType": "tag",
-					"TextMarkTextContent": "注意"
-				},
+					"Data": " 程序。"
+				}
+			]
+		},
+		{
+			"ID": "20230202231513-80u3j7f",
+			"Type": "NodeParagraph",
+			"Properties": {
+				"id": "20230202231513-80u3j7f",
+				"updated": "20230202231515"
+			},
+			"Children": [
 				{
 					"Type": "NodeText",
-					"Data": "​在安裝 Tesseract OCR 時需要勾選中文語言包,並將安裝後的 Tesseract-OCR 目錄路徑添加到環境變量 PATH 中,這樣思源才能直接調用 "
+					"Data": "​"
 				},
 				{
 					"Type": "NodeTextMark",
-					"TextMarkType": "code",
-					"TextMarkTextContent": "tesseract"
+					"TextMarkType": "tag",
+					"TextMarkTextContent": "注意"
 				},
 				{
 					"Type": "NodeText",
-					"Data": "​ 命令進行圖片 OCR 提取文本。如果安裝正常的話,在內核啟動日誌中可以搜索到 "
-				},
+					"Data": "​:"
+				}
+			]
+		},
+		{
+			"ID": "20230202231516-o6k9mj1",
+			"Type": "NodeList",
+			"ListData": {},
+			"Properties": {
+				"id": "20230202231516-o6k9mj1",
+				"updated": "20230202231927"
+			},
+			"Children": [
 				{
-					"Type": "NodeTextMark",
-					"TextMarkType": "code",
-					"TextMarkTextContent": "tesseract-ocr enabled"
+					"ID": "20230202231516-pwj2ndg",
+					"Type": "NodeListItem",
+					"ListData": {
+						"BulletChar": 42,
+						"Marker": "Kg=="
+					},
+					"Properties": {
+						"id": "20230202231516-pwj2ndg",
+						"updated": "20230202231927"
+					},
+					"Children": [
+						{
+							"ID": "20230202231516-8trf08t",
+							"Type": "NodeParagraph",
+							"Properties": {
+								"id": "20230202231516-8trf08t",
+								"updated": "20230202231927"
+							},
+							"Children": [
+								{
+									"Type": "NodeText",
+									"Data": "在安裝 Tesseract OCR 時需要勾選你需要的語言包,並將安裝後的 Tesseract-OCR 目錄路徑添加到環境變量 PATH 中,這樣思源才能直接調用 "
+								},
+								{
+									"Type": "NodeTextMark",
+									"TextMarkType": "code",
+									"TextMarkTextContent": "tesseract"
+								},
+								{
+									"Type": "NodeText",
+									"Data": "​ 命令進行圖片 OCR 提取文本。如果安裝正常的話,在內核啟動日誌中可以搜索到 "
+								},
+								{
+									"Type": "NodeTextMark",
+									"TextMarkType": "code",
+									"TextMarkTextContent": "tesseract-ocr enabled"
+								},
+								{
+									"Type": "NodeText",
+									"Data": "​"
+								}
+							]
+						}
+					]
 				},
 				{
-					"Type": "NodeText",
-					"Data": "​。"
+					"ID": "20230202231519-x47s7he",
+					"Type": "NodeListItem",
+					"ListData": {
+						"BulletChar": 42,
+						"Marker": "Kg=="
+					},
+					"Properties": {
+						"id": "20230202231519-x47s7he",
+						"updated": "20230202231811"
+					},
+					"Children": [
+						{
+							"ID": "20230202231519-04f6dh6",
+							"Type": "NodeParagraph",
+							"Properties": {
+								"id": "20230202231519-04f6dh6",
+								"updated": "20230202231811"
+							},
+							"Children": [
+								{
+									"Type": "NodeText",
+									"Data": "語言包不要安裝太多,否則會導致 OCR 緩慢甚至超時返回空結果,並且佔用過多的系統資源"
+								}
+							]
+						}
+					]
 				}
 			]
 		},