;;; -*- Mode: Lisp; Package: html-support -*-
;;; HTML入力支援
;;; by S.Sugiyama(2001/12/23)
#|
Copyright (C) 2001 Shinichi Sugiyama. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1 Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer as the first lines
of this file unmodified.
2 Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY Shinichi Sugiyama ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL Shinichi Sugiyama BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
|#
;;; author
;;; Shinichi Sugiyama
;;; Last updated: Sat, 30 Mar 2002 21:10:14 +0900
;;; description
;;; xyzzyでHTMLを入力するときに補助的に使うと便利なライブ
;;; ラリーです。minor-modeとして作成してみました。xyzzy標準のHTMLモードまた
;;; はYMTZさんのHTML+モードなどと一緒にお使いください。といっても特にそれら
;;; の存在を前提とはしていませんので、お好きな場面でどうぞ。
;;; 現状提供する機能は次の3つです。
;;; ・HTML文字実体参照とUnicodeキャラクターの相互変換
;;; ・特殊文字の入力
;;; ・tidyを利用した構文チェック、xhtmlへの変換
;;; 詳細な説明は http://sugi.pobox.ne.jp/xyzzy/library.html を参照してください。
;;; history
;;; Sat, 30 Mar 2002 21:10:14 +0900
;;; ・latin文字の入力は iso8859-1.l という標準添付のライブラリで可能なことにいまさらながら気がついたので、こちらの同等な機能は削除。
;;; Thu, 31 Jan 2002 01:26:56 +0900
;;; ・バグフィックス。HH; という形の文字実体参照が正しく認識されていなかった。
;;; 2001-12-24
;;; ・とりあえず公開
(provide 'html-support)
(defpackage html-support
(:use "lisp" "editor")
(:nicknames "ht"))
(in-package 'html-support)
(export '(execute-tidy *tidy-command* *tidy-options*
entity-to-character character-to-entity
entity-to-character-region character-to-entity-region
html-support-mode))
(defvar *tidy-command* "tidy.exe")
(defvar *tidy-options*
"-m --char-encoding utf8 --output-xhtml true --clean true")
(defun execute-tidy ()
(interactive)
(let ((infile (make-temp-file-name))
(buffer (selected-buffer))
status)
(write-file infile t nil *encoding-utf8n*)
(setq status (execute-shell-command
(concatenate 'string *tidy-command* " " *tidy-options* " " infile)
nil "*Shell output*"))
(when (< status 2)
(save-window-excursion
(set-buffer buffer)
(kill-region (point-min) (point-max))
(insert-file infile)
(character-to-entity nil)
(goto-char (point-min))
(insert-xml-decl)
))
(delete-file infile)))
;;;
;;; 特殊文字入力
(defvar *entity-to-character-alist*
'(("nbsp" . 160)
("iexcl" . 161)
("cent" . 162)
("pound" . 163)
("curren" . 164)
("yen" . 165)
("brvbar" . 166)
("sect" . 167)
("uml" . 168)
("copy" . 169)
("ordf" . 170)
("laquo" . 171)
("not" . 172)
("shy" . 173)
("reg" . 174)
("macr" . 175)
("deg" . 176)
("plusmn" . 177)
("sup2" . 178)
("sup3" . 179)
("acute" . 180)
("micro" . 181)
("para" . 182)
("middot" . 183)
("cedil" . 184)
("sup1" . 185)
("ordm" . 186)
("raquo" . 187)
("frac14" . 188)
("frac12" . 189)
("frac34" . 190)
("iquest" . 191)
("Agrave" . 192)
("Aacute" . 193)
("Acirc" . 194)
("Atilde" . 195)
("Auml" . 196)
("Aring" . 197)
("AElig" . 198)
("Ccedil" . 199)
("Egrave" . 200)
("Eacute" . 201)
("Ecirc" . 202)
("Euml" . 203)
("Igrave" . 204)
("Iacute" . 205)
("Icirc" . 206)
("Iuml" . 207)
("ETH" . 208)
("Ntilde" . 209)
("Ograve" . 210)
("Oacute" . 211)
("Ocirc" . 212)
("Otilde" . 213)
("Ouml" . 214)
("times" . 215)
("Oslash" . 216)
("Ugrave" . 217)
("Uacute" . 218)
("Ucirc" . 219)
("Uuml" . 220)
("Yacute" . 221)
("THORN" . 222)
("szlig" . 223)
("agrave" . 224)
("aacute" . 225)
("acirc" . 226)
("atilde" . 227)
("auml" . 228)
("aring" . 229)
("aelig" . 230)
("ccedil" . 231)
("egrave" . 232)
("eacute" . 233)
("ecirc" . 234)
("euml" . 235)
("igrave" . 236)
("iacute" . 237)
("icirc" . 238)
("iuml" . 239)
("eth" . 240)
("ntilde" . 241)
("ograve" . 242)
("oacute" . 243)
("ocirc" . 244)
("otilde" . 245)
("ouml" . 246)
("divide" . 247)
("oslash" . 248)
("ugrave" . 249)
("uacute" . 250)
("ucirc" . 251)
("uuml" . 252)
("yacute" . 253)
("thorn" . 254)
("yuml" . 255)
("fnof" . 402)
("Alpha" . 913)
("Beta" . 914)
("Gamma" . 915)
("Delta" . 916)
("Epsilon" . 917)
("Zeta" . 918)
("Eta" . 919)
("Theta" . 920)
("Iota" . 921)
("Kappa" . 922)
("Lambda" . 923)
("Mu" . 924)
("Nu" . 925)
("Xi" . 926)
("Omicron" . 927)
("Pi" . 928)
("Rho" . 929)
("Sigma" . 931)
("Tau" . 932)
("Upsilon" . 933)
("Phi" . 934)
("Chi" . 935)
("Psi" . 936)
("Omega" . 937)
("alpha" . 945)
("beta" . 946)
("gamma" . 947)
("delta" . 948)
("epsilon" . 949)
("zeta" . 950)
("eta" . 951)
("theta" . 952)
("iota" . 953)
("kappa" . 954)
("lambda" . 955)
("mu" . 956)
("nu" . 957)
("xi" . 958)
("omicron" . 959)
("pi" . 960)
("rho" . 961)
("sigmaf" . 962)
("sigma" . 963)
("tau" . 964)
("upsilon" . 965)
("phi" . 966)
("chi" . 967)
("psi" . 968)
("omega" . 969)
("thetasym" . 977)
("upsih" . 978)
("piv" . 982)
("bull" . 8226)
("hellip" . 8230)
("prime" . 8242)
("Prime" . 8243)
("oline" . 8254)
("frasl" . 8260)
("weierp" . 8472)
("image" . 8465)
("real" . 8476)
("trade" . 8482)
("alefsym" . 8501)
("larr" . 8592)
("uarr" . 8593)
("rarr" . 8594)
("darr" . 8595)
("harr" . 8596)
("crarr" . 8629)
("lArr" . 8656)
("uArr" . 8657)
("rArr" . 8658)
("dArr" . 8659)
("hArr" . 8660)
("forall" . 8704)
("part" . 8706)
("exist" . 8707)
("empty" . 8709)
("nabla" . 8711)
("isin" . 8712)
("notin" . 8713)
("ni" . 8715)
("prod" . 8719)
("sum" . 8721)
("minus" . 8722)
("lowast" . 8727)
("radic" . 8730)
("prop" . 8733)
("infin" . 8734)
("ang" . 8736)
("and" . 8743)
("or" . 8744)
("cap" . 8745)
("cup" . 8746)
("int" . 8747)
("there4" . 8756)
("sim" . 8764)
("cong" . 8773)
("asymp" . 8776)
("ne" . 8800)
("equiv" . 8801)
("le" . 8804)
("ge" . 8805)
("sub" . 8834)
("sup" . 8835)
("nsub" . 8836)
("sube" . 8838)
("supe" . 8839)
("oplus" . 8853)
("otimes" . 8855)
("perp" . 8869)
("sdot" . 8901)
("lceil" . 8968)
("rceil" . 8969)
("lfloor" . 8970)
("rfloor" . 8971)
("lang" . 9001)
("rang" . 9002)
("loz" . 9674)
("spades" . 9824)
("clubs" . 9827)
("hearts" . 9829)
("diams" . 9830)
("quot" . 34)
("amp" . 38)
("lt" . 60)
("gt" . 62)
("OElig" . 338)
("oelig" . 339)
("Scaron" . 352)
("scaron" . 353)
("Yuml" . 376)
("circ" . 710)
("tilde" . 732)
("ensp" . 8194)
("emsp" . 8195)
("thinsp" . 8201)
("zwnj" . 8204)
("zwj" . 8205)
("lrm" . 8206)
("rlm" . 8207)
("ndash" . 8211)
("mdash" . 8212)
("lsquo" . 8216)
("rsquo" . 8217)
("sbquo" . 8218)
("ldquo" . 8220)
("rdquo" . 8221)
("bdquo" . 8222)
("dagger" . 8224)
("Dagger" . 8225)
("permil" . 8240)
("lsaquo" . 8249)
("rsaquo" . 8250)
("euro" . 8364)))
(defvar *html-entity-init-p* nil)
(defvar *entity-to-character-hash* nil)
(defvar *character-to-entity-hash* nil)
(defun html-entity-init ()
(unless *html-entity-init-p*
(setq *html-entity-init-p* t)
(setq *entity-to-character-hash* (make-hash-table :test #'equal))
(setq *character-to-entity-hash* (make-hash-table))
(dolist (elm *entity-to-character-alist*)
(setf (gethash (car elm) *entity-to-character-hash*) (cdr elm))
(setf (gethash (cdr elm) *character-to-entity-hash*) (car elm)))))
(defun entity-to-character-region (flag mark point)
(interactive "P\nr")
(save-restriction
(narrow-to-region mark point)
(entity-to-character flag)))
(defun character-to-entity-region (flag mark point)
(interactive "P\nr")
(save-restriction
(narrow-to-region mark point)
(character-to-entity flag)))
(defun entity-to-character (flag)
(interactive "P")
(html-entity-init)
(save-excursion
(goto-char (point-min))
(while (scan-buffer "&\\(\\w+\\|#[xX]?[0-9A-Fa-f]+\\);" :regexp t)
(let* ((key (match-string 1))
(radix 10)
(value (if (eq (char key 0) #\#)
(if (or (eq (char key 1) #\x) (eq (char key 1) #\X))
(parse-integer (substring key 2) :radix 16)
(parse-integer (substring key 1)))
(gethash key *entity-to-character-hash*))))
(if (and value
(or flag (not (member key '("quot" "amp" "lt" "gt") :test 'equal))))
(progn
(delete-region (match-beginning 0)(match-end 0))
(insert (unicode-char value)))
(goto-char (1+ (point))))))))
(defun character-to-entity(flag)
(interactive "P")
(html-entity-init)
(save-excursion
(goto-char (point-min))
(while (< (point) (point-max))
(let* ((key (char-unicode (following-char)))
(charset (iso-char-charset (following-char)))
(value (gethash key *character-to-entity-hash*)))
(or value
(member charset '(:us-ascii :jisx0201-kana :jisx0208))
(null key)
(setq value (format nil "#~D" key)))
(if (and value
(or flag (not (member key '(34 38 60 62)))))
(progn
(delete-char 1)
(insert #\& value #\;))
(goto-char (1+ (point))))))))
(defvar *html-support-mode-map*
(let ((keymap (make-sparse-keymap)))
(define-key keymap '(#\C-c #\e) #'character-to-entity)
(define-key keymap '(#\C-c #\c) #'entity-to-character)
(define-key keymap '(#\C-c #\t) #'execute-tidy)
keymap))
(defvar-local html-support-mode nil)
(defun html-support-mode (&optional (arg nil sv))
(interactive "p")
(ed::toggle-mode 'html-support-mode arg sv)
(if html-support-mode
(set-minor-mode-map *html-support-mode-map*)
(unset-minor-mode-map *html-support-mode-map*))
(update-mode-line t))
(pushnew '(html-support-mode . "HTML Support") *minor-mode-alist* :key #'car)
(defun insert-xml-decl (&optional (version "1.0") encoding)
(unless encoding
(let ((string (char-encoding-display-name (buffer-fileio-encoding)) ))
(string-match "(\\([^)]+\\))" string)
(setq encoding (match-string 1))))
(insert (format nil "\n"
version encoding)))
t