[Date Index][Thread Index]
[Date Prev][Date Next][Thread Prev][Thread Next]

Re: [wml] [LONG][RFC] WML 2.0.0, quotes and tags



On Sat, 20 May 2000, Tobias Oetiker wrote:

> Today you sent me mail regarding [wml] [LONG][RFC] WML 2.0.0, quotes and tags:
> 
> *> 4. What must be changed?
> *> =-=-=-=-=-=-=-=-=-=-=-=-
> *> 
> *> I agree with your bug reports, it is a very bad idea to have different
> *> behaviours depending whether tags are defined or not.
> *> 
> *> Of course it is easy to consider that unknown tags are simple tags, this
> *> will fix
> *>     <ifeq 0 0 <img src="foo.png" alt="">>
> *> 
> *> But it breaks
> *>     <: print "<img src=\"" . $file . "\" alt=\"" . $alt ."\">"; :>
> *> 
> *> And even worse, consider
> *>     <: print <<EOT;
> *>     Hey, how to finish this pseudo EOT tag?
> *>     EOT
> *>     :>
> *> 
> *> In fact i focused my view on these last topics, to ease mixing ePerl and
> *> mp4h, but i did not realize that this syntax was confusing.
> *> Now that the problem is clearly (?) expressed, it should be solved
> *> quickly ;-)
> 
> the problem in my eyes is that you try to anticipate what will be
> done in a later pass with the input ... and this will always be a
> problem ... (I give that it is possible to anticipate some problems
> and behave in a way to make them simpler ...) 
>
> As for above problems I guess the behaviour of mp4h should be to
> only parse VALID structures and leave everything which is invalid
> alone ... For bad cases like the one above it is up to the author
> to make sure things work (this can be confusing ...)

But what do you call valid structure?
I stated in WML 2.0.0 that unknown tags must not be parsed, this is way
   <ifeq 0 0 <img src=foo.html>>
is broken.

The patch below changes this behaviour, an unknown tag is trweated as a
single tag.

As you suggested, ePerl code must now be fixed to protect '<', any ideas
are welcome.

--- wml_backend/p2_mp4h/src/macro.c	2000/05/06 22:13:09	1.27
+++ wml_backend/p2_mp4h/src/macro.c	2000/05/21 22:34:17
@@ -34,8 +34,8 @@
 #include "mp4h.h"
 
 static void expand_macro __P ((symbol *, read_type));
+static void unexpand_macro __P ((char *, read_type));
 static void expand_token __P ((struct obstack *, read_type, token_type, token_data *));
-static void collect_body __P ((symbol *, struct obstack *, struct obstack *));
 
 /* Current recursion level in expand_macro ().  */
 int expansion_level = 0;
@@ -102,14 +102,14 @@
          If another character is found, this string is not a
          macro, it could be ePerl delimiters.  */
 
-      if (! IS_ALPHA (*text))
+      if (! IS_ALPHA (*text) && *text != '/')
         {
           shipout_text (obs, TOKEN_DATA_TEXT (td), strlen (TOKEN_DATA_TEXT (td)));
           break;
         }
       sym = lookup_symbol (text, SYMBOL_LOOKUP);
       if (sym == NULL || SYMBOL_TYPE (sym) == TOKEN_VOID)
-        shipout_text (obs, TOKEN_DATA_TEXT (td), strlen (TOKEN_DATA_TEXT (td)));
+        unexpand_macro (text, expansion);
       else
         expand_macro (sym, expansion);
       break;
@@ -245,8 +245,8 @@
 `-------------------------------------------------------------------------*/
 
 static boolean
-collect_arguments (symbol *sym, read_type expansion, struct obstack *argptr,
-                   struct obstack *arguments)
+collect_arguments (char *symbol_name, read_type expansion,
+                   struct obstack *argptr, struct obstack *arguments)
 {
   int ch;
   token_data td;
@@ -256,7 +256,7 @@
   char last_char = ' ';
 
   TOKEN_DATA_TYPE (&td) = TOKEN_TEXT;
-  TOKEN_DATA_TEXT (&td) = SYMBOL_NAME (sym);
+  TOKEN_DATA_TEXT (&td) = symbol_name;
   tdp = (token_data *) obstack_copy (arguments, (voidstar) &td, sizeof (td));
   obstack_grow (argptr, (voidstar) &tdp, sizeof (tdp));
 
@@ -275,7 +275,7 @@
             {
               MP4HERROR ((EXIT_FAILURE, 0,
                 _("ERROR:%s:%d: EOF when reading argument of the `%s' tag"),
-                     CURRENT_FILE_LINE, SYMBOL_NAME(sym)));
+                     CURRENT_FILE_LINE, symbol_name));
             }
           tdp = (token_data *)
             obstack_copy (arguments, (voidstar) &td, sizeof (td));
@@ -293,7 +293,7 @@
     {
       MP4HERROR ((warning_status, 0,
         _("INTERNAL ERROR: Bad tag expression in `%s'"),
-             CURRENT_FILE_LINE, SYMBOL_NAME (sym)));
+             CURRENT_FILE_LINE, symbol_name));
     }
   return (last_char == '/');
 }
@@ -387,7 +387,7 @@
             {
               newsym = lookup_symbol (text, SYMBOL_LOOKUP);
               if (newsym == NULL || SYMBOL_TYPE (newsym) == TOKEN_VOID)
-                shipout_text (bodyptr, TOKEN_DATA_TEXT (&td), strlen (TOKEN_DATA_TEXT (&td)));
+                unexpand_macro (text, READ_ATTR_ASIS);
               else
                 expand_macro (newsym, READ_ATTR_ASIS);
             }
@@ -488,7 +488,8 @@
   else
     attr_expansion = READ_ATTRIBUTE;
 
-  slash = collect_arguments (sym, attr_expansion, &argptr, &arguments);
+  slash = collect_arguments (SYMBOL_NAME (sym), attr_expansion, &argptr,
+                             &arguments);
   argc = obstack_object_size (&argptr) / sizeof (token_data *);
 
   if (SYMBOL_CONTAINER (sym))
@@ -570,4 +571,56 @@
 
   if (expansion_level == 0)
     clear_tag_attr ();
+}
+
+/*-------------------------------------------------------------------------.
+| This macro reads attributes without expanding macro.  It is useful to    |
+| print unknown tags.                                                      |
+`-------------------------------------------------------------------------*/
+
+static void
+unexpand_macro (char *name, read_type expansion)
+{
+  struct obstack arguments, argptr;
+  token_data **argv;
+  int argc, i;
+  struct obstack *obs_expansion;
+  const char *expanded;
+  read_type attr_expansion;
+  char *symbol_name;
+
+  symbol_name = xstrdup (name);
+
+  obstack_init (&arguments);
+  obstack_init (&argptr);
+
+  if (expansion == READ_ATTR_ASIS || expansion == READ_BODY
+          || expansion == READ_ATTR_VERB)
+    attr_expansion = READ_ATTR_ASIS;
+  else
+    attr_expansion = READ_ATTRIBUTE;
+
+  (void) collect_arguments (symbol_name, attr_expansion, &argptr,
+                             &arguments);
+  argc = obstack_object_size (&argptr) / sizeof (token_data *);
+  argv = (token_data **) obstack_finish (&argptr);
+
+  obs_expansion = push_string_init ();
+  obstack_1grow (obs_expansion, '<');
+  shipout_string (obs_expansion, symbol_name, 0);
+
+  for (i = 1; i < argc; i++)
+    {
+      obstack_1grow (obs_expansion, ' ');
+      shipout_string (obs_expansion, TOKEN_DATA_TEXT (argv[i]), 0);
+    }
+  obstack_1grow (obs_expansion, '>');
+
+  /*  Input must not be rescanned, so expansion is set to READ_BODY.  */
+  expanded = push_string_finish (READ_BODY);
+
+  obstack_free (&arguments, NULL);
+  obstack_free (&argptr, NULL);
+
+  xfree (symbol_name);
 }

-- 
Denis Barbier
WML Maintainer

______________________________________________________________________
Website META Language (WML)                www.engelschall.com/sw/wml/
Official Support Mailing List                   sw-wml@engelschall.com
Automated List Manager                       majordomo@engelschall.com