{"id":6720,"date":"2020-05-17T20:09:21","date_gmt":"2020-05-17T12:09:21","guid":{"rendered":"http:\/\/www.brofive.org\/?p=6720"},"modified":"2020-05-17T21:07:14","modified_gmt":"2020-05-17T13:07:14","slug":"%e6%a1%8c%e9%9d%a2%e6%90%9c%e7%b4%a2%ef%bc%9arecoll%e8%bf%98%e6%98%afanytxt%ef%bc%9f%e6%8e%a8%e8%8d%90recoll","status":"publish","type":"post","link":"http:\/\/www.brofive.net\/?p=6720","title":{"rendered":"\u684c\u9762\u641c\u7d22\uff1arecoll\u8fd8\u662fAnyTxt\uff1f\u63a8\u8350recoll"},"content":{"rendered":"<p><span style=\"font-size: large;\">Everything\u662f\u4e00\u6b3e\u5fc5\u9009\u7684\u684c\u9762\u6587\u4ef6\u641c\u7d22\uff0c\u4f46\u662f\u8fd9\u4e2a\u8f6f\u4ef6\u6ca1\u6cd5\u9488\u5bf9\u5185\u5bb9\u8fdb\u884c\u68c0\u7d22\u3002\u5728Google Desktop\u505c\u7528\u4e4b\u540e, \u4e5f\u4e00\u76f4\u82e6\u82e6\u8ffd\u5bfb\u4e86\u5f88\u4e45\uff0c\u751a\u81f3\u627e\u8fc7\u4e00\u4e2a\u5f00\u6e90Lucene\u7684\u65b9\u6848\uff0c\u4f46\u662f\u8981\u81ea\u5df1\u6539\u9020\u5e76\u4e14\u5199\u7684\u524d\u7aef\u3002\u540e\u6765\u5c31\u77e5\u9053\u6709\u4e00\u4e2aLinux\u5e73\u53f0\u7684\u5f00\u6e90\u5185\u5bb9\u641c\u7d22\u5f15\u64cerecoll\uff0c\u770b\u4e86\u4e00\u4e0b\uff0c\u6b63\u662f\u8981\u627e\u7684\uff01\u4f46\u662f\u8fd9\u4e2arecoll\u9700\u8981\u79fb\u690d\u5230Windows\u624d\u80fd\u4f7f\u7528\u3002\u6709\u4e00\u4e2a\u79fb\u690d\uff0c\u4f46\u662f\u5e76\u4e0d\u5f00\u653e\uff0c\u4e8e\u662f\u5c31\u505c\u4e86\u4e00\u9635\u5b50\u6ca1\u7ba1\u3002\u540e\u6765\u6709\u4eba\u627e\u5230\u4e86\u53ef\u4ee5\u4f7f\u7528\u7684Windows\u7248\uff0c\u8d76\u5feb\u4e0b\u8f7d\u4e00\u4e2a\uff0c\u679c\u7136\u4e0d\u9519\uff01<\/span><\/p>\n<p><span style=\"font-size: large;\">\u540c\u65f6\u4e5f\u6709\u4e00\u4e2a\u5185\u5bb9\u641c\u7d22\u8f6f\u4ef6\uff0c\u53f7\u79f0Everything\u4f34\u4fa3\uff0c\u662f\u4e2a\u4e2d\u56fd\u5f00\u53d1\u8005\u7684\u4f5c\u54c1\uff0cAnyTxt\u3002\u4e5f\u5b89\u88c5\u4e0a\u6bd4\u8f83\u4e00\u4e0b\u3002<\/span><\/p>\n<p><a href=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image-3.png\"><span style=\"font-size: large;\"><img loading=\"lazy\" decoding=\"async\" style=\"display: inline; background-image: none;\" title=\"image\" src=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image_thumb-2.png\" alt=\"image\" width=\"132\" height=\"125\" border=\"0\" \/><\/span><\/a><\/p>\n<p><span style=\"font-size: large;\">\u4e00\u3001recoll<\/span><\/p>\n<p><span style=\"font-size: large;\">recoll\u662fJean-Francois Dockes\u7b49\u4e00\u4e9b\u5f00\u53d1\u8005\u7ef4\u62a4\u7684\u4e00\u4e2a\u684c\u9762\u641c\u7d22\u5f15\u64ce\u9879\u76ee\uff0c\u662f\u57fa\u4e8eGPLv2\u7684\u5f00\u6e90\u8f6f\u4ef6\u3002\u8fd9\u4e2a\u9879\u76ee\u5927\u7ea6\u59cb\u4e8e2012\u5e749\u670811\u65e5\u30022020\u5e745\u6708\u5206\u5e03\u7684\u7248\u672c\u662f1.27.0\uff0c\u770b\u6765\u4f9d\u7136\u662f\u4e00\u4e2a\u6d3b\u8dc3\u7684\u9879\u76ee\u3002\u4f5c\u8005\u5f00\u53d1\u4e86\u4e00\u4e2aWindows\u7684\u79fb\u690d\u7248\u672c<\/span><a title=\"https:\/\/www.lesbonscomptes.com\/recoll\/pages\/recoll-windows.html\" href=\"https:\/\/www.lesbonscomptes.com\/recoll\/pages\/recoll-windows.html\"><span style=\"font-size: large;\">https:\/\/www.lesbonscomptes.com\/recoll\/pages\/recoll-windows.html<\/span><\/a><span style=\"font-size: large;\">\uff0c\u4f46\u662f\u770b\u6765\u8fd9\u4e2a\u79fb\u690d\u5de5\u4f5c\u6bd4\u8f83\u8d39\u52b2\uff0c\u4f5c\u8005\u8981\u6c42\u6350\u6b3e\u624d\u80fd\u4e0b\u8f7d\uff085\/10\/20\u6b27\u5143\u5373\u53ef\uff09\u3002\u73b0\u5728\u7f51\u4e0a\u53ef\u4ee5\u770b\u52301.26.3\u7684Windows\u7248\u672c\u3002\u6211\u4e0b\u8f7d\u7684\u4e5f\u662f\u8fd9\u4e2a\u7248\u672c\uff0c\u4f5c\u8005\u7528\u4e86Qt\u4f5c\u4e3a\u8de8\u5e73\u53f0\u7684GUI\u7cfb\u7edf\u3002\u6837\u5b50\u548c\u539f\u751f\u7684Windows\u5e94\u7528\u6ca1\u5565\u533a\u522b\u3002<\/span><\/p>\n<p><a href=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image-4.png\"><span style=\"font-size: large;\"><img loading=\"lazy\" decoding=\"async\" style=\"display: inline; background-image: none;\" title=\"image\" src=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image_thumb-3.png\" alt=\"image\" width=\"861\" height=\"608\" border=\"0\" \/><\/span><\/a><\/p>\n<p><span style=\"font-size: large;\">Recoll\u7684\u4f7f\u7528\u548c\u5176\u4ed6\u7684\u641c\u7d22\u5f15\u64ce\u7c7b\u4f3c\uff0c\u9996\u5148\u9700\u8981\u5bf9\u672c\u673a\u6587\u4ef6\u505a\u7d22\u5f15\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e0d\u7d22\u5f15\u7684\u76ee\u5f55\uff0c\u4f8b\u5982windows\uff0c\u7136\u540e\u5c31\u7b49\u5f85\u7d22\u5f15\u7ed3\u675f\u5427\u3002\u8bb0\u5f97\u5927\u7ea6\u7d22\u5f15\u4e861-2\u5929\u7684\u6837\u5b50\uff0c\u901f\u5ea6\u8fd8\u53ef\u4ee5\uff0c<strong>\u751f\u6210\u7684\u7d22\u5f15\u6587\u4ef6\u662f20GB\u5de6\u53f3<\/strong>\uff0c\u76ee\u524d\u770b\u4e0d\u4f1a\u81ea\u52a8\u66f4\u65b0\uff0c\u9700\u8981\u624b\u52a8\u624d\u884c\u3002recoll\u4f1a\u81ea\u52a8\u7d22\u5f15\u538b\u7f29\u5305\u4e2d\u7684\u6587\u4ef6\uff0c\u8fd9\u4e00\u70b9\u6bd4\u8f83\u65b9\u4fbf\u3002<\/span><\/p>\n<p><a href=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image-5.png\"><span style=\"font-size: large;\"><img loading=\"lazy\" decoding=\"async\" style=\"display: inline; background-image: none;\" title=\"image\" src=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image_thumb-4.png\" alt=\"image\" width=\"850\" height=\"600\" border=\"0\" \/><\/span><\/a><\/p>\n<p><span style=\"font-size: large;\">\u641c\u7d22\u901f\u5ea6\u5f88\u5feb\uff0c\u53ef\u4ee5\u5206\u9875\u663e\u793a\uff0c\u663e\u793a\u65b9\u5f0f\u662f\u6458\u8981+\u94fe\u63a5\u65b9\u5f0f\u3002recoll\u7684\u6587\u6863\u4e5f\u975e\u5e38\u5b8c\u6574\uff0c<\/span><a title=\"https:\/\/www.lesbonscomptes.com\/recoll\/usermanual\/\" href=\"https:\/\/www.lesbonscomptes.com\/recoll\/usermanual\/\"><span style=\"font-size: large;\">https:\/\/www.lesbonscomptes.com\/recoll\/usermanual\/<\/span><\/a><span style=\"font-size: large;\">\u3002<\/span><\/p>\n<p><span style=\"font-size: large;\">\u8fd9\u4e2a\u641c\u7d22\u5f15\u64ce\u8ba9\u6211\u77ac\u95f4\u627e\u5230\u4e86\u5f88\u591a\u5c18\u5c01\u5df2\u4e45\u7684\u4fe1\u606f\uff0c<strong><span style=\"color: #ff0000;\">\u91cd\u70b9\u63a8\u8350<\/span><\/strong>\u3002<\/span><\/p>\n<p><span style=\"font-size: large;\">\u00a0<\/span><span style=\"font-size: large;\">\u4e8c\u3001AnyTxt<\/span><\/p>\n<p><span style=\"font-size: large;\">AnyTxt\u662f\u4e2a\u514d\u8d39\u8f6f\u4ef6<\/span><a title=\"https:\/\/anytxt.net\/\" href=\"https:\/\/anytxt.net\/\"><span style=\"font-size: large;\">https:\/\/anytxt.net\/<\/span><\/a><span style=\"font-size: large;\">\uff0c\u5e76\u4e0d\u5f00\u6e90\u3002\u770b\u4e0a\u53bb\u4f5c\u8005\u662f\u4e2d\u56fd\u4eba\uff0c\u5927\u7ea6\u59cb\u4e8e2019\u5e745\u670823\u65e5\uff0c\u76ee\u524d\u5df2\u77e5\u5728\u6d3b\u8dc3\u66f4\u65b0\u3002\u6700\u65b0\u7684\u7248\u672c\u662f1.2.201\u3002AnyTxt\u4e5f\u662f\u57fa\u4e8eQt\u505a\u7684GUI\u754c\u9762\u3002\u662f\u81ea\u52a8\u540e\u53f0\u505a\u7d22\u5f15\u7684\uff0c\u53ef\u80fd\u662f\u91c7\u7528\u4e86\u5e38\u7528\u6587\u4ef6\u7d22\u5f15\u7684\u7b56\u7565\uff0c\uff08\u540c\u6837\u53ef\u4ee5\u8bbe\u7f6e\u4e0d\u7d22\u5f15\u76ee\u5f55\uff0c\u4f46\u91c7\u7528\u6309\u7167\u6587\u4ef6\u540e\u7f00\u7684\u65b9\u5f0f\u8bbe\u7f6e\uff0c\u800c\u4e0d\u662f\u5168\u5c40\u8bbe\u7f6e\uff0c\u8fd9\u4e00\u70b9\u4e0d\u5982recoll\uff09\u3002<\/span><\/p>\n<p><a href=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image-6.png\"><span style=\"font-size: large;\"><img loading=\"lazy\" decoding=\"async\" style=\"display: inline; background-image: none;\" title=\"image\" src=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image_thumb-5.png\" alt=\"image\" width=\"860\" height=\"661\" border=\"0\" \/><\/span><\/a><\/p>\n<p><span style=\"font-size: large;\">\u641c\u7d22\u7ed3\u679c\u6ca1\u6709\u663e\u793a\u6458\u8981\u3002\u8fd9\u4e00\u70b9\u4e0d\u592a\u53cb\u597d\u3002\u7d22\u5f15\u4fdd\u5b58\u5728SQLlite\u6570\u636e\u5e93\u4e2d\uff0c\u8fd9\u4e2a\u8bbe\u8ba1\u7684\u7ed3\u679c\u5c31\u662f\u67e5\u8be2\u7ed3\u679c\u663e\u793a\u7684\u901f\u5ea6\u975e\u5e38\u6162\uff0c\u8981\u6392\u5e8f\u4e00\u9635\u5b50\uff0c\u6bd4\u4e0d\u4e0arecoll\u3002\u7531\u4e8e\u5229\u7528\u4e86\u6570\u636e\u5e93\uff0c\u7d22\u5f15\u53ef\u80fd\u76f8\u5bf9\u5c0f\u4e00\u4e9b\uff0c\u67e5\u770b\u4e86\u4e00\u4e0b\uff0c\u5927\u7ea62GB\u7684\u6837\u5b50\uff0c\u8fd9\u4e2a\u4e5f\u4e0d\u80fd\u8bf4\u597d\u5230\u54ea\u91cc\u53bb\uff0c\u56e0\u4e3a\u6709\u7684\u6570\u636e\u5e76\u6ca1\u6709\u88ab\u7d22\u5f15\u3002<\/span><\/p>\n<p><a href=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image-7.png\"><span style=\"font-size: large;\"><img loading=\"lazy\" decoding=\"async\" style=\"display: inline; background-image: none;\" title=\"image\" src=\"http:\/\/www.brofive.net\/wp-content\/uploads\/2020\/05\/image_thumb-6.png\" alt=\"image\" width=\"860\" height=\"546\" border=\"0\" \/><\/span><\/a><\/p>\n<p><strong><span style=\"color: #ff0000; font-size: large;\">\u5982\u679c\u60a8\u559c\u6b22\u50bb\u74dc\u578b\u5e94\u7528\uff0c\u7c7b\u4f3cEverything\u8fd9\u79cd\u7684\uff0c\u7528AnyTxt\u66f4\u597d\uff0c\u5982\u679c\u559c\u6b22\u66f4\u4e3a\u590d\u6742\u8bbe\u7f6e\u3001\u652f\u6301\u7684\u6587\u6863\u66f4\u591a\u3001\u6392\u5e8f\u66f4\u597d\u7684\u7cfb\u7edf\uff0c\u53e6\u5916\u5b58\u50a8\u7a7a\u95f4\u5982\u679c\u4e5f\u6bd4\u8f83\u591a\uff0c\u5efa\u8bae\u4f7f\u7528recoll<\/span><\/strong><\/p>\n<h5><span style=\"font-size: large;\">\u00a0<\/span><\/h5>\n<p><span style=\"font-size: large;\"><strong>\u76f8\u5173\u4ecb\u7ecd\u548c\u4e0b\u8f7d\u94fe\u63a5<\/strong>\uff1a<\/span><\/p>\n<p><a title=\"https:\/\/www.lesbonscomptes.com\/recoll\/index.html\" href=\"https:\/\/www.lesbonscomptes.com\/recoll\/index.html\"><span style=\"font-size: large;\">https:\/\/www.lesbonscomptes.com\/recoll\/index.html<\/span><\/a><\/p>\n<p><a title=\"https:\/\/www.lesbonscomptes.com\/recoll\/pages\/recoll-windows.html\" href=\"https:\/\/www.lesbonscomptes.com\/recoll\/pages\/recoll-windows.html\"><span style=\"font-size: large;\">https:\/\/www.lesbonscomptes.com\/recoll\/pages\/recoll-windows.html<\/span><\/a><\/p>\n<p><a title=\"https:\/\/gitter.im\/anytxt\/community\" href=\"https:\/\/gitter.im\/anytxt\/community\"><span style=\"font-size: large;\">https:\/\/gitter.im\/anytxt\/community<\/span><\/a><\/p>\n<p><a title=\"https:\/\/anytxt.net\/\" href=\"https:\/\/anytxt.net\/\"><span style=\"font-size: large;\">https:\/\/anytxt.net\/<\/span><\/a><\/p>\n<p><a href=\"https:\/\/sourceforge.net\/projects\/anytxt\/\"><span style=\"font-size: large;\">https:\/\/sourceforge.net\/projects\/anytxt\/<\/span><\/a><\/p>\n<p><span style=\"font-size: large;\">\u00a0<\/span><\/p>\n<p><span style=\"font-size: large;\">\uff081\uff09recoll\u652f\u6301\u7684\u6587\u6863\u7c7b\u578b<\/span><\/p>\n<h5><span style=\"font-size: large;\">File types indexed natively<\/span><\/h5>\n<ul>\n<li><span style=\"font-size: large;\">text<\/span><\/li>\n<li><span style=\"font-size: large;\">html<\/span><\/li>\n<li><span style=\"font-size: large;\">maildir, mh, and mailbox (Mozilla, Thunderbird and Evolution mail ok). <strong><em>Evolution note<\/em><\/strong>: <em>be sure to remove .cache from the <code>skippedNames<\/code> list in the GUI <code>Indexing preferences\/Local Parameters<\/code> pane if you want to index local copies of Imap mail<\/em>. <strong>Outlook<\/strong> archives are processed with an external helper, see further.<\/span><\/li>\n<li><span style=\"font-size: large;\">gaim and purple log files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Scribus files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Man pages (needs groff).<\/span><\/li>\n<li><span style=\"font-size: large;\">Mimehtml web archive format (this is based on the mail filter, which introduces some mild weirdness, but is still usable).<\/span><\/li>\n<\/ul>\n<p><span style=\"font-size: large;\">All the following need Python3 (or Python2 for older Recoll versions):<\/span><\/p>\n<ul>\n<li><span style=\"font-size: large;\">Dia diagrams.<\/span><\/li>\n<li><span style=\"font-size: large;\">Excel and Powerpoint files (pre-open-xml).<\/span><\/li>\n<li><span style=\"font-size: large;\">Tar archives. Tar file indexing is disabled by default (because tar archives don\u2019t typically contain the kind of documents that people search for), you will need to enable it explicitely, e.g., with the following in your <code>$HOME\/.recoll\/mimeconf<\/code> file:<\/span><\/li>\n<\/ul>\n<pre><span style=\"font-size: large;\">[index]\napplication\/x-tar = execm rcltar<\/span><\/pre>\n<ul>\n<li><span style=\"font-size: large;\">Zip archives.<\/span><\/li>\n<li><span style=\"font-size: large;\">Konqueror webarchive format (uses the <code>tarfile<\/code> Python standard library module).<\/span><\/li>\n<\/ul>\n<h5><span style=\"font-size: large;\">File types indexed with external helpers<\/span><\/h5>\n<h6><span style=\"font-size: large;\">The XML ones<\/span><\/h6>\n<p><span style=\"font-size: large;\">Recoll 1.26 and later process XML internally, by using the <strong>libxml2<\/strong> and <strong>libxslt<\/strong> C++ libraries. Quite a few formats also need the <strong>unzip<\/strong> command.<\/span><\/p>\n<p><span style=\"font-size: large;\">Recoll 1.25 used <strong>python3-lxml<\/strong>. Versions from 1.22 to 1.24 used <strong>python-libxslt<\/strong> and <strong>python-libxml2<\/strong>, Versions older than 1.22 needed the <strong>xsltproc<\/strong> command.<\/span><\/p>\n<ul>\n<li><span style=\"font-size: large;\">OpenOffice files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Office Open XML files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Abiword files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Kword files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Fb2 ebooks.<\/span><\/li>\n<li><span style=\"font-size: large;\">SVG files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Gnumeric files.<\/span><\/li>\n<li><span style=\"font-size: large;\">Okular annotations files.<\/span><\/li>\n<\/ul>\n<h6><span style=\"font-size: large;\">Other formats<\/span><\/h6>\n<p><span style=\"font-size: large;\">The following need miscellaneous helper programs to extract the document text.<\/span><\/p>\n<ul>\n<li><span style=\"font-size: large;\">PDF needs the <strong>pdftotext<\/strong> command, which comes with <\/span><a href=\"http:\/\/poppler.freedesktop.org\/\"><span style=\"font-size: large;\">poppler<\/span><\/a><span style=\"font-size: large;\">. The package name is quite often <code>poppler-utils<\/code>. <strong><em>Note<\/em><\/strong>: <em>the older pdftotext command which comes with xpdf is not compatible with Recoll<\/em>. PDF has its <\/span><a href=\"https:\/\/www.lesbonscomptes.com\/recoll\/pages\/features.html#doctypes.pdf\"><span style=\"font-size: large;\">own section<\/span><\/a><span style=\"font-size: large;\"> further, with details about OCR support and opening documents at the right page.<\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Word is processed with <\/span><a href=\"http:\/\/www.winfield.demon.nl\/\"><span style=\"font-size: large;\">antiword<\/span><\/a><span style=\"font-size: large;\">, which is not maintained much, but keeps working. I maintain a very<\/span><a href=\"https:\/\/opensourceprojects.eu\/p\/antiword\/code\"><span style=\"font-size: large;\">slightly improved antiword version<\/span><\/a><span style=\"font-size: large;\">, it can extract a little extra data in some cases. It is also useful to have <\/span><a href=\"http:\/\/wvware.sourceforge.net\/\"><span style=\"font-size: large;\">wvWare<\/span><\/a><span style=\"font-size: large;\">installed as Recoll can use it as a fallback for some files which antiword does not handle.<\/span><\/li>\n<li><span style=\"font-size: large;\">RTF files with <\/span><a href=\"http:\/\/www.gnu.org\/software\/unrtf\/unrtf.html\"><span style=\"font-size: large;\">unrtf<\/span><\/a><span style=\"font-size: large;\">. Note that up to version 0.21.3, <strong>unrtf<\/strong> mostly does not work with non western-european character sets. Many serious problems (crashes with serious security implications and infinite loops) were fixed in unrtf 0.21.8, so you really want to use this or a newer release. Building <strong>unrtf<\/strong> from source is quick and easy.<\/span><\/li>\n<li><span style=\"font-size: large;\">CHM (Microsoft help) files with Python <\/span><a href=\"http:\/\/gnochm.sourceforge.net\/pychm.html\"><span style=\"font-size: large;\">pychm<\/span><\/a><span style=\"font-size: large;\"> and <\/span><a href=\"http:\/\/www.jedrea.com\/chmlib\/\"><span style=\"font-size: large;\">chmlib<\/span><\/a><span style=\"font-size: large;\">. Recoll 1.25 and later bundle a Python3 version of the CHM package, (this is necessary because the original package was not ported to Python3).<\/span><\/li>\n<li><span style=\"font-size: large;\">EPUB files with Python and the <\/span><a href=\"http:\/\/pypi.python.org\/pypi\/epub\/\"><span style=\"font-size: large;\">epub<\/span><\/a><span style=\"font-size: large;\"> module, which is packaged on Fedora, but not Debian. The packaged version by the original author (0.5.2) is old and suffers from a lot of bitrot, so Recoll now bundles <\/span><a href=\"https:\/\/bitbucket.org\/arthurdarcet\/epub\"><span style=\"font-size: large;\">an unpackaged version, updated by Arthur Darcet<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Outlook .pst and .ost files are processed with <\/span><a href=\"https:\/\/github.com\/libyal\/libpff\"><span style=\"font-size: large;\">libpff<\/span><\/a><span style=\"font-size: large;\">. We use a slightly modified version (to provide streaming output), stored in <\/span><a href=\"https:\/\/opensourceprojects.eu\/p\/libpff\/code\/\"><span style=\"font-size: large;\">this repository<\/span><\/a><\/li>\n<li><span style=\"font-size: large;\">Hancom office Hanword .hwp format for Korean text processing, using the <strong>pyhwp<\/strong> Python module. See the <\/span><a href=\"https:\/\/pypi.org\/project\/pyhwp\/\"><span style=\"font-size: large;\">the module page<\/span><\/a><span style=\"font-size: large;\">. Use <code>pip3 install pyhwp<\/code> to install on Linux. This will be bundled with Recoll Windows future versions (1.26.6 and later). On Debian, you also probably want to install the <strong>fonts-nanum<\/strong> package, which is not part of the default install.<\/span><\/li>\n<li><span style=\"font-size: large;\">Wordperfect is processed with the <strong>wpd2html<\/strong> command from <\/span><a href=\"http:\/\/libwpd.sourceforge.net\/\"><span style=\"font-size: large;\">libwpd<\/span><\/a><span style=\"font-size: large;\"> package. On some distributions, the command may come with a package named <code>libwpd-tools<\/code> or such, not the base <code>libwpd<\/code> package.<\/span><\/li>\n<li><span style=\"font-size: large;\">djvu with <\/span><a href=\"http:\/\/djvu.sourceforge.net\/\"><span style=\"font-size: large;\">DjVuLibre<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<li><span style=\"font-size: large;\">Audio: Recoll releases 1.14 and later use a Python script based on the <\/span><a href=\"http:\/\/code.google.com\/p\/mutagen\/\"><span style=\"font-size: large;\">mutagen<\/span><\/a><span style=\"font-size: large;\"> package to extract tags for all audio types.<\/span><\/li>\n<li><span style=\"font-size: large;\">Images tags are extracted with <strong>perl<\/strong> and <\/span><a href=\"http:\/\/www.sno.phy.queensu.ca\/~phil\/exiftool\/\"><span style=\"font-size: large;\">exiftool<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<li><span style=\"font-size: large;\">GNU info files are processed with Python and the <strong>info<\/strong> command.<\/span><\/li>\n<li><span style=\"font-size: large;\">Lyx files need <strong>Lyx<\/strong> to be installed.<\/span><\/li>\n<li><span style=\"font-size: large;\">Rar archives with the Python <\/span><a href=\"http:\/\/pypi.python.org\/pypi\/rarfile\/\"><span style=\"font-size: large;\">rarfile<\/span><\/a><span style=\"font-size: large;\"> module and the <\/span><a href=\"http:\/\/www.rarlab.com\/rar_add.htm\"><span style=\"font-size: large;\">unrar<\/span><\/a><span style=\"font-size: large;\"> utility. The Python module is packaged as python3-rarfile by both Fedora and Debian. Note that the free version of unrar (<code>unrar-free<\/code>) fails for many files with the message &#8220;Failed the read enough data&#8221;.<\/span><\/li>\n<li><span style=\"font-size: large;\">7zip archives with the <\/span><a href=\"https:\/\/pypi.python.org\/pypi\/pylzma\"><span style=\"font-size: large;\">pylzma module<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<li><span style=\"font-size: large;\">iCalendar(.ics) files with the <\/span><a href=\"http:\/\/pypi.python.org\/pypi\/icalendar\/2.1\"><span style=\"font-size: large;\">icalendar<\/span><\/a><span style=\"font-size: large;\"> module.<\/span><\/li>\n<li><span style=\"font-size: large;\">Mozilla calendar data. See <\/span><a href=\"https:\/\/www.lesbonscomptes.com\/recoll\/faqsandhowtos\/IndexMozillaCalendari.html\"><span style=\"font-size: large;\">the Howto<\/span><\/a><span style=\"font-size: large;\"> about this.<\/span><\/li>\n<li><span style=\"font-size: large;\">Postscript with the <\/span><a href=\"http:\/\/www.gnu.org\/software\/ghostscript\/ghostscript.html\"><span style=\"font-size: large;\">ghostscript<\/span><\/a><span style=\"font-size: large;\">, <strong>ps2pdf<\/strong> command, and <strong>pdftotext<\/strong> from <\/span><a href=\"http:\/\/poppler.freedesktop.org\/\"><span style=\"font-size: large;\">poppler<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<li><span style=\"font-size: large;\">TeX with <strong>untex<\/strong>. If there is no <strong>untex<\/strong> package for your distribution, this site stores a <\/span><a href=\"https:\/\/www.lesbonscomptes.com\/recoll\/untex\/untex-1.3.jf.tar.gz\"><span style=\"font-size: large;\">source package<\/span><\/a><span style=\"font-size: large;\">, as untex has no obvious home. Will also work with <\/span><a href=\"http:\/\/www.cs.purdue.edu\/homes\/trinkle\/detex\/\"><span style=\"font-size: large;\">detex<\/span><\/a><span style=\"font-size: large;\"> if this is installed.<\/span><\/li>\n<li><span style=\"font-size: large;\">DVI with <\/span><a href=\"http:\/\/catdvi.sourceforge.net\/\"><span style=\"font-size: large;\">catdvi<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<li><span style=\"font-size: large;\">Midi karaoke files are processed with the Python <\/span><a href=\"http:\/\/pypi.python.org\/pypi\/midi\/0.2.1\"><span style=\"font-size: large;\">midi module<\/span><\/a><span style=\"font-size: large;\">, and some help from <\/span><a href=\"http:\/\/chardet.feedparser.org\/\"><span style=\"font-size: large;\">chardet<\/span><\/a><span style=\"font-size: large;\">. There is probably a<code>python-chardet<\/code> package for your distribution, but you will quite probably need to build the midi package. This is easy but see the <\/span><a href=\"https:\/\/www.lesbonscomptes.com\/recoll\/helpernotes.html#midi\"><span style=\"font-size: large;\">notes here<\/span><\/a><span style=\"font-size: large;\">. Recoll 1.24 and later bundle the midi decoding module (modified and ported to python3), and just need the standard Python &#8216;six&#8217; module and chardet.<\/span><\/li>\n<li><span style=\"font-size: large;\"><code>MediaWiki<\/code> dump files: Thomas Levine has written a handler for these, you will find it here: <\/span><a href=\"https:\/\/bitbucket.org\/tlevine\/recoll\/src\/0127be78bffdd8a294067966a3ba7b2663d7b0cf\/src\/filters\/rclmwdump?at=default&amp;fileviewer=file-view-default\"><span style=\"font-size: large;\">rclmwdump<\/span><\/a><span style=\"font-size: large;\">.<\/span><\/li>\n<\/ul>\n<p><span style=\"font-size: large;\">\u00a0<\/span><\/p>\n<p><span style=\"font-size: large;\">\uff082\uff09AnyTxt\u652f\u6301\u7684\u7c7b\u578b<\/span><\/p>\n<h5><span style=\"font-size: large;\">Formats Supported<\/span><\/h5>\n<blockquote>\n<ul>\n<li><span style=\"font-size: large;\">Plain Text Format (txt, cpp, html, etc.) <\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Outlook (eml) <\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Word (doc, docx) <\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Excel (xls, xlsx) <\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft PowerPoint (ppt, pptx) <\/span><\/li>\n<li><span style=\"font-size: large;\">Portable Document Format (pdf) (beta) <\/span><\/li>\n<li><span style=\"font-size: large;\">More Document Types are coming<\/span><\/li>\n<\/ul>\n<\/blockquote>\n<h5><span style=\"font-size: large;\">More Features<\/span><\/h5>\n<blockquote>\n<ul>\n<li><span style=\"font-size: large;\">Microsoft Office (doc, xls, ppt) Supported <\/span><\/li>\n<li><span style=\"font-size: large;\">Microsoft Office 2007 (docx, xlsx, pptx, docm, xlsm, docm) Supported <\/span><\/li>\n<li><span style=\"font-size: large;\">PDF Supported(Beta) <\/span><\/li>\n<li><span style=\"font-size: large;\">Non-English document Supported <\/span><\/li>\n<li><span style=\"font-size: large;\">Full Text Search <\/span><\/li>\n<li><span style=\"font-size: large;\">Real Time Search (Beta) <\/span><\/li>\n<li><span style=\"font-size: large;\">SSD Optimization <\/span><\/li>\n<li><span style=\"font-size: large;\">Fast Index <\/span><\/li>\n<li><span style=\"font-size: large;\">Fast Search<\/span><\/li>\n<\/ul>\n<\/blockquote>\n","protected":false},"excerpt":{"rendered":"<p>Everything\u662f\u4e00\u6b3e\u5fc5\u9009\u7684\u684c\u9762\u6587\u4ef6\u641c\u7d22\uff0c\u4f46\u662f\u8fd9\u4e2a\u8f6f\u4ef6\u6ca1\u6cd5\u9488\u5bf9\u5185\u5bb9\u8fdb\u884c\u68c0\u7d22\u3002\u5728Google Desktop&#8230;<\/p>\n","protected":false},"author":1,"featured_media":6714,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[22,284,189,170,308],"tags":[797,799,798,796],"views":15866,"_links":{"self":[{"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/posts\/6720"}],"collection":[{"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.brofive.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=6720"}],"version-history":[{"count":3,"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/posts\/6720\/revisions"}],"predecessor-version":[{"id":6723,"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/posts\/6720\/revisions\/6723"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/www.brofive.net\/index.php?rest_route=\/wp\/v2\/media\/6714"}],"wp:attachment":[{"href":"http:\/\/www.brofive.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=6720"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.brofive.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=6720"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.brofive.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=6720"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}