{"id":22331,"date":"2014-08-05T09:19:31","date_gmt":"2014-08-05T07:19:31","guid":{"rendered":"https:\/\/mamchenkov.net\/wordpress\/?p=22331"},"modified":"2014-08-05T09:19:31","modified_gmt":"2014-08-05T07:19:31","slug":"textract-extract-text-from-any-document","status":"publish","type":"post","link":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/","title":{"rendered":"textract &#8211; extract text from any document"},"content":{"rendered":"<!-- google_ad_section_start -->\n<p><a href=\"http:\/\/textract.readthedocs.org\/en\/latest\/\">textract<\/a> &#8211; extract text from any document. \u00a0Currently supports\u00a0.doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.<\/p>\n<!-- google_ad_section_end -->\n","protected":false},"excerpt":{"rendered":"<!-- google_ad_section_start -->\n<p>textract &#8211; extract text from any document. \u00a0Currently supports\u00a0.doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.<\/p>\n<!-- google_ad_section_end -->\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"link","meta":{"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_feature_clip_id":0,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2},"jetpack_post_was_ever_published":false,"_links_to":"","_links_to_target":""},"categories":[1,18,133,62],"tags":[1960,3181,3051,3313,3310,37],"keyring_services":[],"class_list":["post-22331","post","type-post","status-publish","format-link","hentry","category-general","category-programming","category-sysadmin","category-technology","tag-command-line","tag-file-formats","tag-microsoft-outlook","tag-microsoft-powerpoint","tag-microsoft-word","tag-python","post_format-post-format-link"],"aioseo_notices":[],"aioseo_head":"\n\t\t<!-- All in One SEO 4.9.8 - aioseo.com -->\n\t<meta name=\"description\" content=\"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.\" \/>\n\t<meta name=\"robots\" content=\"max-image-preview:large\" \/>\n\t<meta name=\"author\" content=\"Leonid Mamchenkov\"\/>\n\t<meta name=\"google-site-verification\" content=\"VHvdD0_usx1_4DzKy_QCVcICVgX2EgA2ybELT-wl7kQ\" \/>\n\t<link rel=\"canonical\" href=\"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/\" \/>\n\t<meta name=\"generator\" content=\"All in One SEO (AIOSEO) 4.9.8\" \/>\n\t\t<meta property=\"og:locale\" content=\"en_US\" \/>\n\t\t<meta property=\"og:site_name\" content=\"Leonid Mamchenkov - Life, universe, and everything else\" \/>\n\t\t<meta property=\"og:type\" content=\"article\" \/>\n\t\t<meta property=\"og:title\" content=\"textract \u2013 extract text from any document - Leonid Mamchenkov\" \/>\n\t\t<meta property=\"og:description\" content=\"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.\" \/>\n\t\t<meta property=\"og:url\" content=\"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/\" \/>\n\t\t<meta property=\"og:image\" content=\"https:\/\/mamchenkov.net\/wordpress\/wp-content\/uploads\/2026\/03\/leonid-sailing-beer.jpg\" \/>\n\t\t<meta property=\"og:image:secure_url\" content=\"https:\/\/mamchenkov.net\/wordpress\/wp-content\/uploads\/2026\/03\/leonid-sailing-beer.jpg\" \/>\n\t\t<meta property=\"og:image:width\" content=\"1024\" \/>\n\t\t<meta property=\"og:image:height\" content=\"1024\" \/>\n\t\t<meta property=\"article:published_time\" content=\"2014-08-05T07:19:31+00:00\" \/>\n\t\t<meta property=\"article:modified_time\" content=\"2014-08-05T07:19:31+00:00\" \/>\n\t\t<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/MamchenkovBlog\" \/>\n\t\t<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n\t\t<meta name=\"twitter:site\" content=\"@mamchenkov\" \/>\n\t\t<meta name=\"twitter:title\" content=\"textract \u2013 extract text from any document - Leonid Mamchenkov\" \/>\n\t\t<meta name=\"twitter:description\" content=\"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.\" \/>\n\t\t<meta name=\"twitter:creator\" content=\"@mamchenkov\" \/>\n\t\t<meta name=\"twitter:image\" content=\"https:\/\/mamchenkov.net\/wordpress\/wp-content\/uploads\/2026\/03\/leonid-sailing-beer.jpg\" \/>\n\t\t<script type=\"application\/ld+json\" class=\"aioseo-schema\">\n\t\t\t{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"BlogPosting\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#blogposting\",\"name\":\"textract \\u2013 extract text from any document - Leonid Mamchenkov\",\"headline\":\"textract &#8211; extract text from any document\",\"author\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/author\\\/leonid\\\/#author\"},\"publisher\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/#person\"},\"image\":{\"@type\":\"ImageObject\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#articleImage\",\"url\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/3cf6df002a284d78fb6e9d8222ca4d102e0832035ed6bc8447008bd234e131a4?s=96&d=identicon&r=g\",\"width\":96,\"height\":96,\"caption\":\"Leonid Mamchenkov\"},\"datePublished\":\"2014-08-05T09:19:31+02:00\",\"dateModified\":\"2014-08-05T09:19:31+02:00\",\"inLanguage\":\"en-US\",\"mainEntityOfPage\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#webpage\"},\"isPartOf\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#webpage\"},\"articleSection\":\"All, Programming, Sysadmin, Technology, command line, file formats, Microsoft Outlook, Microsoft PowerPoint, Microsoft Word, Python, Link\"},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#breadcrumblist\",\"itemListElement\":[{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress#listItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\",\"nextItem\":{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/#listItem\",\"name\":\"Technology\"}},{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/#listItem\",\"position\":2,\"name\":\"Technology\",\"item\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/\",\"nextItem\":{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/programming\\\/#listItem\",\"name\":\"Programming\"},\"previousItem\":{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress#listItem\",\"name\":\"Home\"}},{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/programming\\\/#listItem\",\"position\":3,\"name\":\"Programming\",\"item\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/programming\\\/\",\"nextItem\":{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#listItem\",\"name\":\"textract &#8211; extract text from any document\"},\"previousItem\":{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/#listItem\",\"name\":\"Technology\"}},{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#listItem\",\"position\":4,\"name\":\"textract &#8211; extract text from any document\",\"previousItem\":{\"@type\":\"ListItem\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/category\\\/technology\\\/programming\\\/#listItem\",\"name\":\"Programming\"}}]},{\"@type\":\"Person\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/#person\",\"name\":\"Leonid Mamchenkov\",\"image\":{\"@type\":\"ImageObject\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#personImage\",\"url\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/3cf6df002a284d78fb6e9d8222ca4d102e0832035ed6bc8447008bd234e131a4?s=96&d=identicon&r=g\",\"width\":96,\"height\":96,\"caption\":\"Leonid Mamchenkov\"}},{\"@type\":\"Person\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/author\\\/leonid\\\/#author\",\"url\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/author\\\/leonid\\\/\",\"name\":\"Leonid Mamchenkov\",\"image\":{\"@type\":\"ImageObject\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#authorImage\",\"url\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/3cf6df002a284d78fb6e9d8222ca4d102e0832035ed6bc8447008bd234e131a4?s=96&d=identicon&r=g\",\"width\":96,\"height\":96,\"caption\":\"Leonid Mamchenkov\"}},{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#webpage\",\"url\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/\",\"name\":\"textract \\u2013 extract text from any document - Leonid Mamchenkov\",\"description\":\"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.\",\"inLanguage\":\"en-US\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/#website\"},\"breadcrumb\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/2014\\\/08\\\/05\\\/textract-extract-text-from-any-document\\\/#breadcrumblist\"},\"author\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/author\\\/leonid\\\/#author\"},\"creator\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/author\\\/leonid\\\/#author\"},\"datePublished\":\"2014-08-05T09:19:31+02:00\",\"dateModified\":\"2014-08-05T09:19:31+02:00\"},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/#website\",\"url\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/\",\"name\":\"Blog of Leonid Mamchenkov\",\"description\":\"Life, universe, and everything else\",\"inLanguage\":\"en-US\",\"publisher\":{\"@id\":\"https:\\\/\\\/mamchenkov.net\\\/wordpress\\\/#person\"}}]}\n\t\t<\/script>\n\t\t<!-- All in One SEO -->\n\n","aioseo_head_json":{"title":"textract \u2013 extract text from any document - Leonid Mamchenkov","description":"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.","canonical_url":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/","robots":"max-image-preview:large","keywords":"","webmasterTools":{"google-site-verification":"VHvdD0_usx1_4DzKy_QCVcICVgX2EgA2ybELT-wl7kQ","miscellaneous":""},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"BlogPosting","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#blogposting","name":"textract \u2013 extract text from any document - Leonid Mamchenkov","headline":"textract &#8211; extract text from any document","author":{"@id":"https:\/\/mamchenkov.net\/wordpress\/author\/leonid\/#author"},"publisher":{"@id":"https:\/\/mamchenkov.net\/wordpress\/#person"},"image":{"@type":"ImageObject","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#articleImage","url":"https:\/\/secure.gravatar.com\/avatar\/3cf6df002a284d78fb6e9d8222ca4d102e0832035ed6bc8447008bd234e131a4?s=96&d=identicon&r=g","width":96,"height":96,"caption":"Leonid Mamchenkov"},"datePublished":"2014-08-05T09:19:31+02:00","dateModified":"2014-08-05T09:19:31+02:00","inLanguage":"en-US","mainEntityOfPage":{"@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#webpage"},"isPartOf":{"@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#webpage"},"articleSection":"All, Programming, Sysadmin, Technology, command line, file formats, Microsoft Outlook, Microsoft PowerPoint, Microsoft Word, Python, Link"},{"@type":"BreadcrumbList","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#breadcrumblist","itemListElement":[{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress#listItem","position":1,"name":"Home","item":"https:\/\/mamchenkov.net\/wordpress","nextItem":{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/#listItem","name":"Technology"}},{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/#listItem","position":2,"name":"Technology","item":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/","nextItem":{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/programming\/#listItem","name":"Programming"},"previousItem":{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress#listItem","name":"Home"}},{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/programming\/#listItem","position":3,"name":"Programming","item":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/programming\/","nextItem":{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#listItem","name":"textract &#8211; extract text from any document"},"previousItem":{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/#listItem","name":"Technology"}},{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#listItem","position":4,"name":"textract &#8211; extract text from any document","previousItem":{"@type":"ListItem","@id":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/programming\/#listItem","name":"Programming"}}]},{"@type":"Person","@id":"https:\/\/mamchenkov.net\/wordpress\/#person","name":"Leonid Mamchenkov","image":{"@type":"ImageObject","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#personImage","url":"https:\/\/secure.gravatar.com\/avatar\/3cf6df002a284d78fb6e9d8222ca4d102e0832035ed6bc8447008bd234e131a4?s=96&d=identicon&r=g","width":96,"height":96,"caption":"Leonid Mamchenkov"}},{"@type":"Person","@id":"https:\/\/mamchenkov.net\/wordpress\/author\/leonid\/#author","url":"https:\/\/mamchenkov.net\/wordpress\/author\/leonid\/","name":"Leonid Mamchenkov","image":{"@type":"ImageObject","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#authorImage","url":"https:\/\/secure.gravatar.com\/avatar\/3cf6df002a284d78fb6e9d8222ca4d102e0832035ed6bc8447008bd234e131a4?s=96&d=identicon&r=g","width":96,"height":96,"caption":"Leonid Mamchenkov"}},{"@type":"WebPage","@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#webpage","url":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/","name":"textract \u2013 extract text from any document - Leonid Mamchenkov","description":"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.","inLanguage":"en-US","isPartOf":{"@id":"https:\/\/mamchenkov.net\/wordpress\/#website"},"breadcrumb":{"@id":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/#breadcrumblist"},"author":{"@id":"https:\/\/mamchenkov.net\/wordpress\/author\/leonid\/#author"},"creator":{"@id":"https:\/\/mamchenkov.net\/wordpress\/author\/leonid\/#author"},"datePublished":"2014-08-05T09:19:31+02:00","dateModified":"2014-08-05T09:19:31+02:00"},{"@type":"WebSite","@id":"https:\/\/mamchenkov.net\/wordpress\/#website","url":"https:\/\/mamchenkov.net\/wordpress\/","name":"Blog of Leonid Mamchenkov","description":"Life, universe, and everything else","inLanguage":"en-US","publisher":{"@id":"https:\/\/mamchenkov.net\/wordpress\/#person"}}]},"og:locale":"en_US","og:site_name":"Leonid Mamchenkov - Life, universe, and everything else","og:type":"article","og:title":"textract \u2013 extract text from any document - Leonid Mamchenkov","og:description":"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.","og:url":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/","og:image":"https:\/\/mamchenkov.net\/wordpress\/wp-content\/uploads\/2026\/03\/leonid-sailing-beer.jpg","og:image:secure_url":"https:\/\/mamchenkov.net\/wordpress\/wp-content\/uploads\/2026\/03\/leonid-sailing-beer.jpg","og:image:width":1024,"og:image:height":1024,"article:published_time":"2014-08-05T07:19:31+00:00","article:modified_time":"2014-08-05T07:19:31+00:00","article:publisher":"https:\/\/www.facebook.com\/MamchenkovBlog","twitter:card":"summary_large_image","twitter:site":"@mamchenkov","twitter:title":"textract \u2013 extract text from any document - Leonid Mamchenkov","twitter:description":"textract - extract text from any document. Currently supports .doc, .docx, .eml, .json, .html, .pptx, .pdf, and .txt.","twitter:creator":"@mamchenkov","twitter:image":"https:\/\/mamchenkov.net\/wordpress\/wp-content\/uploads\/2026\/03\/leonid-sailing-beer.jpg"},"aioseo_meta_data":{"post_id":"22331","title":null,"description":null,"keywords":null,"keyphrases":null,"primary_term":null,"canonical_url":null,"og_title":null,"og_description":null,"og_object_type":"default","og_image_type":"default","og_image_url":null,"og_image_width":null,"og_image_height":null,"og_image_custom_url":null,"og_image_custom_fields":null,"og_video":null,"og_custom_url":null,"og_article_section":null,"og_article_tags":null,"twitter_use_og":false,"twitter_card":"default","twitter_image_type":"default","twitter_image_url":null,"twitter_image_custom_url":null,"twitter_image_custom_fields":null,"twitter_title":null,"twitter_description":null,"schema":{"blockGraphs":[],"customGraphs":[],"default":{"data":{"Article":[],"Course":[],"Dataset":[],"FAQPage":[],"Movie":[],"Person":[],"Product":[],"ProductReview":[],"Car":[],"Recipe":[],"Service":[],"SoftwareApplication":[],"WebPage":[]},"graphName":"","isEnabled":true},"graphs":[]},"schema_type":"default","schema_type_options":null,"pillar_content":false,"robots_default":true,"robots_noindex":false,"robots_noarchive":false,"robots_nosnippet":false,"robots_nofollow":false,"robots_noimageindex":false,"robots_noodp":false,"robots_notranslate":false,"robots_max_snippet":null,"robots_max_videopreview":null,"robots_max_imagepreview":"large","priority":null,"frequency":null,"local_seo":null,"breadcrumb_settings":null,"limit_modified_date":false,"ai":null,"created":"2023-07-20 02:27:38","updated":"2026-01-15 10:42:46","seo_analyzer_scan_date":null},"aioseo_breadcrumb":"<div class=\"aioseo-breadcrumbs\"><span class=\"aioseo-breadcrumb\">\n\t\t\t<a href=\"https:\/\/mamchenkov.net\/wordpress\" title=\"Home\">Home<\/a>\n\t\t<\/span><span class=\"aioseo-breadcrumb-separator\">&raquo;<\/span><span class=\"aioseo-breadcrumb\">\n\t\t\t<a href=\"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/\" title=\"Technology\">Technology<\/a>\n\t\t<\/span><span class=\"aioseo-breadcrumb-separator\">&raquo;<\/span><span class=\"aioseo-breadcrumb\">\n\t\t\t<a href=\"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/programming\/\" title=\"Programming\">Programming<\/a>\n\t\t<\/span><span class=\"aioseo-breadcrumb-separator\">&raquo;<\/span><span class=\"aioseo-breadcrumb\">\n\t\t\ttextract \u2013 extract text from any document\n\t\t<\/span><\/div>","aioseo_breadcrumb_json":[{"label":"Home","link":"https:\/\/mamchenkov.net\/wordpress"},{"label":"Technology","link":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/"},{"label":"Programming","link":"https:\/\/mamchenkov.net\/wordpress\/category\/technology\/programming\/"},{"label":"textract &#8211; extract text from any document","link":"https:\/\/mamchenkov.net\/wordpress\/2014\/08\/05\/textract-extract-text-from-any-document\/"}],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack-related-posts":[{"id":15565,"url":"https:\/\/mamchenkov.net\/wordpress\/2011\/09\/21\/microsoft-vulnerability-now-served-with-plain-text-files\/","url_meta":{"origin":22331,"position":0},"title":"Microsoft vulnerability, now served with plain text files","author":"Leonid Mamchenkov","date":"September 21, 2011","format":false,"excerpt":"It is the year 2011 and we learn that even opening plain text files in Microsoft Windows is not as safe as you thought. The vulnerability could allow remote code execution if a user opens a legitimate rich text format file (.rtf), text file (.txt), or Word document (.doc) that\u2026","rel":"","context":"In &quot;All&quot;","block_context":{"text":"All","link":"https:\/\/mamchenkov.net\/wordpress\/category\/general\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":27145,"url":"https:\/\/mamchenkov.net\/wordpress\/2016\/12\/16\/json-api-no-hal\/","url_meta":{"origin":22331,"position":1},"title":"JSON API?  No &#8230; HAL!","author":"Leonid Mamchenkov","date":"December 16, 2016","format":false,"excerpt":"Wait, what? \u00a0That's exactly what I said when I read this blog post. \u00a0I am still making my way through the JSON API specification. \u00a0And now it seems I might be wasting my time, as I should be learning HAL. Whereas JSON API is almost like an \u201cORM over HTTP\u201d,\u2026","rel":"","context":"In &quot;All&quot;","block_context":{"text":"All","link":"https:\/\/mamchenkov.net\/wordpress\/category\/general\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":28403,"url":"https:\/\/mamchenkov.net\/wordpress\/2018\/02\/19\/automerge-a-json-like-data-structure-for-concurrent-multi-user-editing\/","url_meta":{"origin":22331,"position":2},"title":"Automerge &#8211; a JSON-like data structure for concurrent multi-user editing","author":"Leonid Mamchenkov","date":"February 19, 2018","format":false,"excerpt":"Collaborative editing is a very challenging subject, technically speaking.\u00a0 The old days of users editing a file, sending it to another user, and back are long gone.\u00a0 Version control tools like git helped with tracking changes and resolving conflicts.\u00a0 But the newer generation of tools - Google Docs for example\u2026","rel":"","context":"In &quot;All&quot;","block_context":{"text":"All","link":"https:\/\/mamchenkov.net\/wordpress\/category\/general\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":22003,"url":"https:\/\/mamchenkov.net\/wordpress\/2014\/06\/17\/jq-a-lightweight-and-flexible-command-line-json-processor\/","url_meta":{"origin":22331,"position":3},"title":"jq &#8211; a lightweight and flexible command-line JSON processor","author":"Leonid Mamchenkov","date":"June 17, 2014","format":"link","excerpt":"jq - a lightweight and flexible command-line JSON processor. jq is like sed for JSON data \u2013 you can use it to slice and filter and map and transform structured data with the same ease that sed, awk, grep and friends let you play with text.","rel":"","context":"In &quot;All&quot;","block_context":{"text":"All","link":"https:\/\/mamchenkov.net\/wordpress\/category\/general\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":27806,"url":"https:\/\/mamchenkov.net\/wordpress\/2017\/07\/27\/public-json-apis\/","url_meta":{"origin":22331,"position":4},"title":"Public JSON APIs","author":"Leonid Mamchenkov","date":"July 27, 2017","format":false,"excerpt":"Public APIs is a\u00a0collective list of free JSON APIs for use in web development. \u00a0You can find there web services for all kind of data - anything from weather and currency exchange, through government database, to random snippets like GIF images, Lorem Ipsum text, and quotes.","rel":"","context":"In &quot;All&quot;","block_context":{"text":"All","link":"https:\/\/mamchenkov.net\/wordpress\/category\/general\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":7494,"url":"https:\/\/mamchenkov.net\/wordpress\/2004\/05\/10\/vim-for-perl-developers\/","url_meta":{"origin":22331,"position":5},"title":"Vim for Perl developers","author":"Leonid Mamchenkov","date":"May 10, 2004","format":false,"excerpt":"This is my attempt to provide a clear and simple instructions on adopting Vim text editor for programming needs. I am using Perl as the programming language in the examples, but most of this document will apply equally for any other programming language. Update: This post was translated into Portuguese\u2026","rel":"","context":"In &quot;All&quot;","block_context":{"text":"All","link":"https:\/\/mamchenkov.net\/wordpress\/category\/general\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]}],"jetpack_sharing_enabled":true,"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/posts\/22331","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/comments?post=22331"}],"version-history":[{"count":0,"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/posts\/22331\/revisions"}],"wp:attachment":[{"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/media?parent=22331"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/categories?post=22331"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/tags?post=22331"},{"taxonomy":"keyring_services","embeddable":true,"href":"https:\/\/mamchenkov.net\/wordpress\/wp-json\/wp\/v2\/keyring_services?post=22331"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}