Beefy Boxes and Bandwidth Generously Provided by pair Networks
more useful options
 
PerlMonks  

ctags: better Perl support

by dmitri (Priest)
on Apr 01, 2005 at 21:29 UTC ( [id://444294]=sourcecode: print w/replies, xml ) Need Help??
Category: Programming
Author/Contact Info Dmitri Tikhonov dtikhonov@yahoo.com
Description: This patch is against ctags 5.5.4. It introduces several very neat features for ctags's perl parser.

test.pl should show the improvements of the patched parser. To generate tags for an exception class, ctags should be run like this:

ctags --language-specific:perl=exception=My::Exception::Class

There can be more than one --language-specific:perl=exception options.

Short description of perl functionality:

  • reintroduced package tags (why are they 'useless?' (see NEWS in the distribution tarball))
  • working label tags (original 5.5.4 misidentifies ABC::xyz() function call as ABC label)
  • support for user-defined exception definitions tags
  • "sub xyz {" (in the enclosed test.pl) generates "xyz", "ABC::xyz", and "ABC->xyz" subroutine tags instead of a single "xyz" tag

In order to support language-specific options, I modified options parser by adding processLangSpecificOption() function. Now each parser can specify its own option parser callback. The change is backward-compatible, as the parser_opts is set to NULL by default.

This is an update to an older patch against 5.5.2: 326112.

Test program:
use My::Exception::Class (
    'My::Exception::BadInput' => {
        isa => 'My::Exception',
    },
);

package ABC;

use constant PACKAGE_CONSTANT => 'abc';

sub xyz {
    print "hello\n";
}

package main;
use ABC;

ABC::xyz();

if (1) {
    goto XYZ;
}

XYZ:
    print "OK";
    My::Exception::BadInput->throw(message => ABC::PACKAGE_CONSTANT);

The patch:

Index: options.c
===================================================================
RCS file: /cvsroot/ThirdParty/ctags/options.c,v
retrieving revision 1.1.1.2
retrieving revision 1.3
diff -u -r1.1.1.2 -r1.3
--- options.c    31 Mar 2005 22:12:49 -0000    1.1.1.2
+++ options.c    31 Mar 2005 22:17:59 -0000    1.3
@@ -1433,6 +1433,29 @@
     return found;
 }
 
+static boolean processLangSpecificOption (
+        const char *const option, const char *const parameter)
+{
+    boolean handled = FALSE;
+
+    if (option == strstr(option, "language-specific:")) {
+        const char *language = option + sizeof("language-specific:") 
+- 1;
+        parserSpecificOption parser = getLanguageSpecificOptionParser
+(language);
+
+        if (!parser)
+            error(FATAL, "No option parser could be found for languag
+e '%s'",
+                    language);
+
+        if (0 != parser(parameter))
+            error(FATAL, "Apparently, %s option parser does not like 
+"
+                  "this argument: '%s'", language, parameter);
+
+        handled = TRUE;
+    }
+
+    return handled;
+}
+
 static boolean getBooleanOption (
     const char *const option, const char *const parameter)
 {
@@ -1488,6 +1511,8 @@
     ;
     else if (processRegexOption (option, parameter))
     ;
+    else if (processLangSpecificOption (option, parameter))
+        ;
 #ifndef RECURSE_SUPPORTED
     else if (strcmp (option, "recurse") == 0)
     error (WARNING, "%s option not supported on this host", option);
Index: parse.c
===================================================================
RCS file: /cvsroot/ThirdParty/ctags/parse.c,v
retrieving revision 1.1.1.2
retrieving revision 1.3
diff -u -r1.1.1.2 -r1.3
--- parse.c    31 Mar 2005 22:12:49 -0000    1.1.1.2
+++ parse.c    31 Mar 2005 22:17:59 -0000    1.3
@@ -64,6 +64,7 @@
 {
     parserDefinition* result = xCalloc (1, parserDefinition);
     result->name = eStrdup (name);
+    result->parser_opts = NULL;     /* Backward-compatible */
     return result;
 }
 
@@ -506,6 +507,21 @@
     printKinds (language, FALSE);
 }
 
+extern parserSpecificOption getLanguageSpecificOptionParser (
+        const char *language)
+{
+    int i;
+
+    if (!language)
+        return NULL;
+    
+    for (i = 0; i < LanguageCount; ++i)
+        if (0 == strcasecmp(language, LanguageTable[i]->name))
+            return LanguageTable[i]->parser_opts;
+
+    return NULL;
+}
+
 static void printMaps (const langType language)
 {
     const parserDefinition* lang;
Index: parse.h
===================================================================
RCS file: /cvsroot/ThirdParty/ctags/parse.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- parse.h    24 Jan 2005 17:51:42 -0000    1.1.1.1
+++ parse.h    10 Feb 2005 23:34:12 -0000    1.2
@@ -35,6 +35,8 @@
 typedef void (*simpleParser) (void);
 typedef boolean (*rescanParser) (const unsigned int passCount);
 typedef void (*parserInitialize) (langType language);
+/* Return 0 on success, -1 on failure */
+typedef int (*parserSpecificOption) (const char *option);
 
 typedef struct sKindOption {
     boolean enabled;            /* are tags for kind enabled? */
@@ -54,6 +56,7 @@
     simpleParser parser;        /* simple parser (common case) */
     rescanParser parser2;        /* rescanning parser (unusual case) 
+*/
     boolean regex;            /* is this a regex parser? */
+    parserSpecificOption parser_opts;   /* parser-specific options pa
+rser */
 
     /* used internally */
     unsigned int id;            /* id assigned to language */
@@ -122,6 +125,7 @@
 extern void printRegexKinds (const langType language, boolean indent)
+;
 extern void freeRegexResources (void);
 extern void checkRegex (void);
+extern parserSpecificOption getLanguageSpecificOptionParser (const ch
+ar *language);
 
 #endif    /* _PARSE_H */
 
Index: perl.c
===================================================================
RCS file: /cvsroot/ThirdParty/ctags/perl.c,v
retrieving revision 1.1.1.2
retrieving revision 1.7
diff -u -r1.1.1.2 -r1.7
--- perl.c    31 Mar 2005 22:12:49 -0000    1.1.1.2
+++ perl.c    11 Feb 2005 00:13:01 -0000    1.7
@@ -1,5 +1,5 @@
 /*
-*   $Id: perl.c,v 1.13 2004/03/13 21:51:07 darren Exp $
+*   $Id: perl.c,v 1.8 2003/04/01 04:55:27 darren Exp $
 *
 *   Copyright (c) 2000-2003, Darren Hiebert
 *
@@ -15,69 +15,189 @@
 */
 #include "general.h"    /* must always come first */
 
+#include <ctype.h>
 #include <string.h>
 
-#include "options.h"
 #include "read.h"
-#include "routines.h"
 #include "vstring.h"
 
 /*
 *   DATA DEFINITIONS
 */
 typedef enum {
-    K_NONE = -1,
+    K_SUBROUTINE,
+    K_PACKAGE,
     K_CONSTANT,
     K_LABEL,
-    K_SUBROUTINE
+    K_EXCEPTION,
 } perlKind;
 
 static kindOption PerlKinds [] = {
+    { TRUE, 's', "subroutine", "subroutines" },
+    { TRUE, 'p', "package",    "packages" },
     { TRUE, 'c', "constant",   "constants" },
     { TRUE, 'l', "label",      "labels" },
-    { TRUE, 's', "subroutine", "subroutines" }
+    { TRUE, 'e', "exception",  "exceptions" },
 };
 
-/*
-*   FUNCTION DEFINITIONS
-*/
+static struct {
+    char str[1024];
+    int set;
+} package = { "", 0 };
+
+struct exception_node {
+    const char *name;
+    struct exception_node *next;
+};
+
+typedef struct exception_node exception_node_t;
+
+static exception_node_t *exceptions = NULL;
+
+#define NOLABEL_STR "{(;="
+#define LABEL_STR ":"
+
 
-static boolean isIdentifier1 (int c)
+#define SKIP_WHITESPACE(cp) \
+    do { \
+        while (*cp && isspace(*cp)) \
+            ++cp; \
+    } while (!*cp && NULL != (cp = fileReadLine()))
+
+
+const unsigned char *
+create_exception_class_tags (const unsigned char *cp)
 {
-    return (boolean) (isalpha (c) || c == '_');
+    vString *name;
+    const unsigned char *s;
+
+    do {
+        SKIP_WHITESPACE(cp);
+        if (!cp)
+            return NULL;
+
+START_EXC:
+        /* First, try to detect end of exception list */
+        if (')' == *cp) {
+            SKIP_WHITESPACE(cp);
+            if (cp && ';' == *cp)
+                return cp + 1;
+            else
+                return cp;      /* Should be NULL */
+        }
+
+        /* Second, find exception name */
+        if ('\'' == *cp || '"' == *cp)
+            ++cp;
+        s = cp;
+
+        while (*cp && (isalnum(*cp) || ':' == *cp))
+            ++cp;
+
+        name = vStringNew();
+        vStringNCatS(name, s, cp - s);
+        makeSimpleTag(name, PerlKinds, K_EXCEPTION);
+        vStringDelete(name);
+
+        /* Third, skip exception value */
+        do {
+            while (*cp && '}' != *cp)
+                ++cp;
+            if ('}' == *cp) {
+                ++cp;
+                SKIP_WHITESPACE(cp);
+                if (!cp)
+                    return NULL;
+                if (',' == *cp) {
+                    ++cp;
+                    break;
+                }
+                else if (')' == *cp)
+                    goto START_EXC;
+            } else if ('\0' == *cp) {
+                cp = fileReadLine();
+            }
+        } while (cp);
+    } while (1);
+
+    return cp;      /* Should never reach here... */
 }
 
-static boolean isIdentifier (int c)
+
+static int
+push_exception (const char *name)
 {
-    return (boolean) (isalnum (c) || c == '_');
+    exception_node_t *node, *ptr;
+
+    if (!name)
+        return -1;
+
+    node = malloc(sizeof(exception_node_t));
+    node->name = name;
+    node->next = NULL;
+
+    if (exceptions) {
+        for (ptr = exceptions; ptr->next; ptr = ptr->next)
+            ;
+        ptr->next = node;
+    } else {
+        exceptions = node;
+    }
+
+    return 0;
 }
 
-static boolean isPodWord (const char *word)
+
+static int
+match_exception (const unsigned char *cp)
 {
-    boolean result = FALSE;
-    if (isalpha (*word))
-    {
-    const char *const pods [] = {
-        "head1", "head2", "head3", "head4", "over", "item", "back",
-        "pod", "begin", "end", "for"
-    };
-    const size_t count = sizeof (pods) / sizeof (pods [0]);
-    const char *white = strpbrk (word, " \t");
-    const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen 
+(word);
-    char *const id = (char*) eMalloc (len + 1);
-    size_t i;
-    strncpy (id, word, len);
-    id [len] = '\0';
-    for (i = 0  ;  i < count  &&  ! result  ;  ++i)
-    {
-        if (strcmp (id, pods [i]) == 0)
-        result = TRUE;
-    }
-    eFree (id);
+    char *name;
+    int len = 0;
+    exception_node_t *node;
+
+    for (name = (char *) cp; *name; ++name) {
+        if (!(isalnum(*name) || ':' == *name))
+            break;
+        ++len;
     }
-    return result;
+
+    if (!len)
+        return 0;
+
+    name = malloc(len + 1);
+    strncpy(name, cp, len);
+    name[len] = '\0';
+
+    for (node = exceptions; node; node = node->next) {
+        if (0 == strcmp(node->name, name)) {
+            free(name);
+            return len;
+        }
+    }
+
+    free(name);
+    return 0;
 }
 
+
+static int
+parsePerlOption (const char *option)
+{
+    if (!option)
+        return -1;
+
+    if (option == strstr(option, "exception=")) {
+        const char *name = option + sizeof("exception=") - 1;
+        if (push_exception(name))
+            return -1;
+    } else {
+        return -1;
+    }
+
+    return 0;
+}
+
+
 /* Algorithm adapted from from GNU etags.
  * Perl support by Bart Robinson <lomew@cs.utah.edu>
  * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
@@ -85,16 +205,16 @@
 static void findPerlTags (void)
 {
     vString *name = vStringNew ();
-    vString *package = NULL;
     boolean skipPodDoc = FALSE;
     const unsigned char *line;
+    perlKind kind;
+
+    package.set = 0;
 
     while ((line = fileReadLine ()) != NULL)
     {
-    boolean spaceRequired = FALSE;
-    boolean qualified = FALSE;
-    const unsigned char *cp = line;
-    perlKind kind = K_NONE;
+        const unsigned char *cp = line, *aux;
+        int sub = 0, use = 0, lbl = 0;  /* This will save up strcmps 
+later */
 
     if (skipPodDoc)
     {
@@ -104,7 +224,8 @@
     }
     else if (line [0] == '=')
     {
-        skipPodDoc = isPodWord ((const char*)line + 1);
+        skipPodDoc = (boolean) (strncmp (
+            (const char*) line + 1, "cut", (size_t) 3) != 0);
         continue;
     }
     else if (strcmp ((const char*) line, "__DATA__") == 0)
@@ -117,86 +238,102 @@
     while (isspace (*cp))
         cp++;
 
-    if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
+        /* Try to find a label here */
+        for (aux = cp; *aux && isalnum(*aux); ++aux)
+            ;
+        if (aux > cp) {
+            while (isspace(*aux))
+                ++aux;
+            if (':' == *aux && (aux[1] ? ':' != aux[1] : 1))
+                lbl = 1;
+        }
+
+        if (lbl                                                      
+        ||
+            (strncmp((const char*) cp, "sub", (size_t) 3) == 0 && (su
+b = 1)) ||
+            (strncmp((const char*) cp, "use", (size_t) 3) == 0 && (us
+e = 1)) ||
+        strncmp((const char*) cp, "package", (size_t) 7) == 0)
     {
-        cp += 3;
-        kind = K_SUBROUTINE;
-        spaceRequired = TRUE;
-        qualified = TRUE;
-    }
-    else if (strncmp((const char*) cp, "use", (size_t) 3) == 0)
-    {
-        cp += 3;
-        if (!isspace(*cp))
-        continue;
-        while (*cp && isspace (*cp))
-        ++cp;
-        if (strncmp((const char*) cp, "constant", (size_t) 8) != 0)
-        continue;
-        cp += 8;
-        kind = K_CONSTANT;
-        spaceRequired = TRUE;
-        qualified = TRUE;
-    }
-    else if (strncmp((const char*) cp, "package", (size_t) 7) == 0)
-    {
-        cp += 7;
-        if (package == NULL)
-        package = vStringNew ();
-        else
-        vStringClear (package);
-        while (isspace (*cp))
-        cp++;
-        while ((int) *cp != ';'  &&  !isspace ((int) *cp))
+            if (sub)
         {
-        vStringPut (package, (int) *cp);
-        cp++;
-        }
-        vStringCatS (package, "::");
-    }
-    else
-    {
-        if (isIdentifier1 (*cp))
-        {
-        const unsigned char *p = cp;
-        while (isIdentifier (*p))
-            ++p;
-        if ((int) *p == ':')
-            kind = K_LABEL;
+            cp += 3;
+        kind = K_SUBROUTINE;
+            } else if (use) {
+                size_t exc_len;
+
+                cp += 3;
+                if (!isspace(*cp))
+                    continue;
+                while (*cp && isspace(*cp))
+                    ++cp;
+                if (!strncmp((const char*) cp, "constant", (size_t) 8
+)) {
+                    cp += 8;
+                    kind = K_CONSTANT;
+                } else if ((exc_len = match_exception(cp)) > 0) {
+                    cp += exc_len;
+                    SKIP_WHITESPACE(cp);
+                    if (!cp)
+                        break;  /* EOF */
+                    if ('(' != *cp)
+                        continue;
+
+                    cp = create_exception_class_tags(cp + 1);
+                    continue;
+                } else {
+                    continue;
+                }
+            } else if (lbl) {
+                kind = K_LABEL;
+        } else {
+            cp += 7;
+        kind = K_PACKAGE;
         }
-    }
-    if (kind != K_NONE)
-    {
-        if (spaceRequired && !isspace (*cp))
+
+            /* This check is only performed if not a label */
+            if (!(lbl || isspace(*cp))) /* woops, not followed by a s
+pace */
             continue;
 
         while (isspace (*cp))
         cp++;
-        while (isIdentifier (*cp))
+        while (! isspace ((int) *cp) && *cp != '\0' &&
+                   strchr ((lbl ? LABEL_STR : NOLABEL_STR), (int) *cp
+) == NULL)
         {
         vStringPut (name, (int) *cp);
         cp++;
         }
         vStringTerminate (name);
-        if (vStringLength (name) > 0)
-        {
+
+            if (vStringLength (name) > 0) {
         makeSimpleTag (name, PerlKinds, kind);
-        if (Option.include.qualifiedTags && qualified &&
-            package != NULL  && vStringLength (package) > 0)
-        {
-            vString *const qualifiedName = vStringNew ();
-            vStringCopy (qualifiedName, package);
-            vStringCat (qualifiedName, name);
-            makeSimpleTag (qualifiedName, PerlKinds, kind);
-            vStringDelete (qualifiedName);
-        }
-        }
+                if (K_PACKAGE == kind &&
+                    vStringLength(name) < sizeof(package.str))
+                {
+                    strcpy(package.str, vStringValue(name));
+                    package.set = 1;
+                } else if ((K_SUBROUTINE == kind || K_CONSTANT == kin
+d)
+                           && package.set)
+                {
+                    /* Make full subroutine tag, e.g.  Net::NAD::new(
+) */
+                    vString *fname = vStringNew();
+                    vStringCatS(fname, package.str);
+                    vStringCatS(fname, "::");
+                    vStringCatS(fname, vStringValue(name));
+                    makeSimpleTag(fname, PerlKinds, kind);
+                    vStringDelete(fname);
+
+                    /* Make method tag, e.g.  Net::NAD->new() */
+                    fname = vStringNew();
+                    vStringCatS(fname, package.str);
+                    vStringCatS(fname, "->");
+                    vStringCatS(fname, vStringValue(name));
+                    makeSimpleTag(fname, PerlKinds, kind);
+                    vStringDelete(fname);
+                }
+            }
+
         vStringClear (name);
     }
     }
     vStringDelete (name);
-    if (package != NULL)
-    vStringDelete (package);
 }
 
 extern parserDefinition* PerlParser (void)
@@ -207,6 +344,7 @@
     def->kindCount  = KIND_COUNT (PerlKinds);
     def->extensions = extensions;
     def->parser     = findPerlTags;
+    def->parser_opts= parsePerlOption;
     return def;
 }

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://444294]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others imbibing at the Monastery: (3)
As of 2024-03-19 08:10 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found