From 4dec09c7952a6ddc186d7682db129033ada67f38 Mon Sep 17 00:00:00 2001 From: Roland Hieber Date: Wed, 25 Dec 2013 21:47:43 +0100 Subject: [PATCH] projects: pdf highlighter --- blag/post/pdf-highlighter.mdwn | 18 ++++++++++ projects.mdwn | 3 ++ projects/pdf.lang | 63 ++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 blag/post/pdf-highlighter.mdwn create mode 100644 projects/pdf.lang diff --git a/blag/post/pdf-highlighter.mdwn b/blag/post/pdf-highlighter.mdwn new file mode 100644 index 0000000..4d0aad8 --- /dev/null +++ b/blag/post/pdf-highlighter.mdwn @@ -0,0 +1,18 @@ +[[!meta title="A Highlighting plugin for PDF files"]] +[[!meta author="rohieb"]] +[[!meta license="CC-BY-SA 3.0"]] +[[!img defaults size=x200]] + +In my [[last post|optimizing-xsane-s-scanned-pdfs]], I used ikiwiki‘s [highlight +plugin][iwhl] to highlight PDF files. But since the underlying [highlight][] +library did not support highlighting for PDF files yet, I had to write my own +highlighting definition. Due to limitiations in the library, it's not perfect; +for example, it does not highlight things inside streams, but in case you’re +interested, you can get the source: + +* [`pdf.lang`](/projects/pdf.lang) + +[iwhl]: http://ikiwiki.info/plugins/highlight/ +[highlight]: http://www.andre-simon.de/doku/highlight/en/highlight.php + +[[!tag PDF meta project highlight]] diff --git a/projects.mdwn b/projects.mdwn index 4a54865..8d22ce4 100644 --- a/projects.mdwn +++ b/projects.mdwn @@ -7,6 +7,9 @@ of them finished. * [[projects/geojackal]] – A geocache management database * [[projects/infopoint-html]] – A full screen web page viewer for info terminals +* [`pdf.lang`](pdf.lang) – PDF syntax description for the [highlight][] library + +[highlight]: http://www.andre-simon.de/doku/highlight/en/highlight.php ## Ideas for new software projects diff --git a/projects/pdf.lang b/projects/pdf.lang new file mode 100644 index 0000000..7ed6f64 --- /dev/null +++ b/projects/pdf.lang @@ -0,0 +1,63 @@ +-- vim: set ft=lua ts=2 sw=2 et : +-- Language definition for PDF files +-- Author: Roland Hieber +-- Date: 2013-11-22 +-- Known Bugs: +-- * Does not highlight files with MacOS (CR-only) line endings +-- * Does not (yet) highlight inside streams due to limitations in the library +-- + +Description="Highlighting definitions for the Portable Document Format (PDF)" + +IgnoreCase=false + +-- File Structure +PreProcessor={ + Prefix=[[%PDF-[0-9]\.[0-9]|%%EOF|xref|startxref|trailer]] +} + +-- Comments. But do not match file structure elements. +Comments={ + { Block=false, + Delimiter={ [[%(?!PDF-[0-9]\.[0-9]|%EOF)]] }, + }, +} + +-- Numbers: 0.45, +1.34, -.4, 123, 4., and so on. +Digits=[[ [-+]?\.[0-9]+|[-+]?[0-9]+\.?[0-9]* ]] + +-- Strings: (string), and streams +Strings={ + DelimiterPairs= { + { Open=[[ \( ]], Close=[[ \) ]] }, + { Open=[[ < ]], Close=[[ > ]] }, + { Open=[[ ^stream ]], Close=[[ ^endstream ]], Raw=true }, + } +} + +-- Note: we highlight dictionary and array syntax as "keywords", so we have to +-- include them in Identifiers. This definition basically matches the allowed +-- characters for Names. Also, we do not want to match Numbers, Streams, +-- References and file structure elements as identifiers +Identifiers=[[ (?!%PDF-[0-9]\.[0-9]|%%EOF|xref|startxref|trailer|[0-9]+\s+[0-9]+\s+(R|obj)|[-+]?\.[0-9]+|[-+]?[0-9]+\.?[0-9]*)[^\s\[\]\(\){}<>/%]+ ]] + +Keywords={ + -- Indirect Objects + { Id=1, + Regex=[[ [0-9]+\s+[0-9]+\s+(obj|R)|endobj]], + Group=0 + }, + -- Arrays and Dictionaries + { Id=2, + Regex=[[ \[|\]|<<|>> ]], + }, + -- Names + { Id=3, + Regex=[[ /[^\s\[\]\(\){}<>/%]+ ]], + }, + -- Constants + { Id=4, + --List={"true", "false", "null"}, + Regex=[[ true|false|null ]], + }, +} -- 2.20.1