;;; w3m-rss.el --- RSS functions ;; Copyright (C) 2004, 2005 TSUCHIYA Masatoshi ;; Authors: TSUCHIYA Masatoshi ;; Keywords: w3m, WWW, hypermedia ;; This file is a part of emacs-w3m. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, you can either send email to this ;; program's maintainer or write to: The Free Software Foundation, ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA. ;;; Commentary: ;; w3m-rss.el provides RSS-related functions for emacs-w3m. For more ;; detail about emacs-w3m, see: ;; ;; http://emacs-w3m.namazu.org/ ;;; Acknowledgment: ;; I refered functions in `sb-rss.el' to implement this module. ;; Thanks to Koichiro Ohba and NAKAJIMA Mikio. ;;; Code: (eval-when-compile (require 'cl)) (autoload 'xml-parse-region "xml") (eval-and-compile (autoload 'timezone-parse-date "timezone") (autoload 'timezone-parse-time "timezone")) (eval-when-compile ;; Avoid warning for Emacs 19 and XEmacs. (unless (fboundp 'match-string-no-properties) (autoload 'match-string-no-properties "poe")) ;; Avoid warning for Emacs 19. (unless (fboundp 'split-string) (autoload 'split-string "poe"))) (defun w3m-rss-parse-date-string (date) "Decode DATE string written in the ISO 8601 format or the RFC822 style. Return a list of numbers which conforms to the Emacs internal format. Valid types in the ISO 8601 format include: Year: YYYY (eg 1997) Year and month: YYYY-MM (eg 1997-07) Complete date: YYYY-MM-DD (eg 1997-07-16) Complete date plus hours and minutes: YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) Complete date plus hours, minutes and seconds: YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) Complete date plus hours, minutes, seconds and a decimal fraction of a second YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) where: YYYY = four-digit year MM = two-digit month (01=January, etc.) DD = two-digit day of month (01 through 31) hh = two digits of hour (00 through 23) (am/pm NOT allowed) mm = two digits of minute (00 through 59) ss = two digits of second (00 through 59) s = one or more digits representing a decimal fraction of a second TZD = time zone designator (Z or +hh:mm or -hh:mm) For more detail about ISO 8601 date format, see . In addition to the above, it also supports the date format in the RFC822 style which RSS 2.0 allows. Valid types are the same as ones which are supported by the `timezone-parse-date' function (which see)." (cond ((not date) nil) ((string-match " [0-9]+ " date) (let* ((vector (timezone-parse-date date)) (year (string-to-number (aref vector 0))) time) (when (>= year 1970) (setq time (timezone-parse-time (aref vector 3))) (encode-time (string-to-number (aref time 2)) (string-to-number (aref time 1)) (string-to-number (aref time 0)) (string-to-number (aref vector 2)) (string-to-number (aref vector 1)) year (aref vector 4))))) ((string-match "\ \\([0-9][0-9][0-9][0-9]\\)\\(-\\([0-9][0-9]\\)\\)?\\(-\\([0-9][0-9]\\)\\)?\ T?\\(\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([.0-9]+\\)\\)?\\)?\ \\(\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)\\|Z\\)?" date) (labels ((substr (n default) (if (match-beginning n) (string-to-number (match-string-no-properties n date)) default))) (encode-time (substr 10 0) ;; seconds (substr 8 0) ;; minitue (substr 7 0) ;; hour (substr 5 1) ;; day (substr 3 1) ;; month (substr 1 0) ;; year (if (match-beginning 12) (funcall (intern (match-string-no-properties 12 date)) 0 (* 3600 (substr 13 0)) (* 60 (substr 14 0))) 0)))))) (defun w3m-rss-find-el (tag data) "Find the all matching elements in the data. Careful with this on large documents!" (let (found) (when (listp data) (dolist (bit data) (when (car-safe bit) (when (equal tag (car bit)) (setq found (nconc found (list bit)))) (setq found (nconc found (w3m-rss-find-el tag (if (and (listp (car-safe (caddr bit))) (not (stringp (caddr bit)))) (caddr bit) (cddr bit)))))))) found)) (defun w3m-rss-get-namespace-prefix (el uri) "Given EL (containing a parsed element) and URI (containing a string that gives the URI for which you want to retrieve the namespace prefix), return the prefix. See http://feeds.archive.org/validator/docs/howto/declare_namespaces.html for more RSS namespaces." (let* ((prefix (car (rassoc uri (cadar el)))) (nslist (when prefix (split-string (symbol-name prefix) ":"))) (ns (cond ((eq (length nslist) 1) ; no prefix given "") ((eq (length nslist) 2) ; extract prefix (cadr nslist))))) (if (and ns (not (equal ns ""))) (concat ns ":") ns))) (provide 'w3m-rss) ;;; w3m-rss.el ends here