gri3-wm/src/config_parser.c

/*
 * vim:ts=4:sw=4:expandtab
 *
 * i3 - an improved dynamic tiling window manager
 * © 2009 Michael Stapelberg and contributors (see also: LICENSE)
 *
 * config_parser.c: hand-written parser to parse configuration directives.
 *
 * See also src/commands_parser.c for rationale on why we use a custom parser.
 *
 * This parser works VERY MUCH like src/commands_parser.c, so read that first.
 * The differences are:
 *
 * 1. config_parser supports the 'number' token type (in addition to 'word' and
 *    'string'). Numbers are referred to using &num (like $str).
 *
 * 2. Criteria are not executed immediately, they are just stored.
 *
 * 3. config_parser recognizes \n and \r as 'end' token, while commands_parser
 *    ignores them.
 *
 * 4. config_parser skips the current line on invalid inputs and follows the
 *    nearest <error> token.
 *
 */
#include "all.h"

#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#include <xcb/xcb_xrm.h>

// Macros to make the YAJL API a bit easier to use.
#define y(x, ...) yajl_gen_##x(command_output.json_gen, ##__VA_ARGS__)
#define ystr(str) yajl_gen_string(command_output.json_gen, (unsigned char *)str, strlen(str))

xcb_xrm_database_t *database = NULL;

#ifndef TEST_PARSER
pid_t config_error_nagbar_pid = -1;
static struct context *context;
#endif

/*******************************************************************************
 * The data structures used for parsing. Essentially the current state and a
 * list of tokens for that state.
 *
 * The GENERATED_* files are generated by generate-commands-parser.pl with the
 * input parser-specs/configs.spec.
 ******************************************************************************/

#include "GENERATED_config_enums.h"

typedef struct token {
    char *name;
    char *identifier;
    /* This might be __CALL */
    cmdp_state next_state;
    union {
        uint16_t call_identifier;
    } extra;
} cmdp_token;

typedef struct tokenptr {
    cmdp_token *array;
    int n;
} cmdp_token_ptr;

#include "GENERATED_config_tokens.h"

/*******************************************************************************
 * The (small) stack where identified literals are stored during the parsing
 * of a single command (like $workspace).
 ******************************************************************************/

struct stack_entry {
    /* Just a pointer, not dynamically allocated. */
    const char *identifier;
    enum {
        STACK_STR = 0,
        STACK_LONG = 1,
    } type;
    union {
        char *str;
        long num;
    } val;
};

/* 10 entries should be enough for everybody. */
static struct stack_entry stack[10];

/*
 * Pushes a string (identified by 'identifier') on the stack. We simply use a
 * single array, since the number of entries we have to store is very small.
 *
 */
static void push_string(const char *identifier, const char *str) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier != NULL &&
            strcmp(stack[c].identifier, identifier) != 0)
            continue;
        if (stack[c].identifier == NULL) {
            /* Found a free slot, let’s store it here. */
            stack[c].identifier = identifier;
            stack[c].val.str = sstrdup(str);
            stack[c].type = STACK_STR;
        } else {
            /* Append the value. */
            char *prev = stack[c].val.str;
            sasprintf(&(stack[c].val.str), "%s,%s", prev, str);
            free(prev);
        }
        return;
    }

    /* When we arrive here, the stack is full. This should not happen and
     * means there’s either a bug in this parser or the specification
     * contains a command with more than 10 identified tokens. */
    fprintf(stderr, "BUG: config_parser stack full. This means either a bug "
                    "in the code, or a new command which contains more than "
                    "10 identified tokens.\n");
    exit(EXIT_FAILURE);
}

static void push_long(const char *identifier, long num) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier != NULL)
            continue;
        /* Found a free slot, let’s store it here. */
        stack[c].identifier = identifier;
        stack[c].val.num = num;
        stack[c].type = STACK_LONG;
        return;
    }

    /* When we arrive here, the stack is full. This should not happen and
     * means there’s either a bug in this parser or the specification
     * contains a command with more than 10 identified tokens. */
    fprintf(stderr, "BUG: config_parser stack full. This means either a bug "
                    "in the code, or a new command which contains more than "
                    "10 identified tokens.\n");
    exit(EXIT_FAILURE);
}

static const char *get_string(const char *identifier) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier == NULL)
            break;
        if (strcmp(identifier, stack[c].identifier) == 0)
            return stack[c].val.str;
    }
    return NULL;
}

static long get_long(const char *identifier) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].identifier == NULL)
            break;
        if (strcmp(identifier, stack[c].identifier) == 0)
            return stack[c].val.num;
    }
    return 0;
}

static void clear_stack(void) {
    for (int c = 0; c < 10; c++) {
        if (stack[c].type == STACK_STR)
            free(stack[c].val.str);
        stack[c].identifier = NULL;
        stack[c].val.str = NULL;
        stack[c].val.num = 0;
    }
}

/*******************************************************************************
 * The parser itself.
 ******************************************************************************/

static cmdp_state state;
static Match current_match;
static struct ConfigResultIR subcommand_output;
static struct ConfigResultIR command_output;

/* A list which contains the states that lead to the current state, e.g.
 * INITIAL, WORKSPACE_LAYOUT.
 * When jumping back to INITIAL, statelist_idx will simply be set to 1
 * (likewise for other states, e.g. MODE or BAR).
 * This list is used to process the nearest error token. */
static cmdp_state statelist[10] = {INITIAL};
/* NB: statelist_idx points to where the next entry will be inserted */
static int statelist_idx = 1;

#include "GENERATED_config_call.h"

static void next_state(const cmdp_token *token) {
    cmdp_state _next_state = token->next_state;

    //printf("token = name %s identifier %s\n", token->name, token->identifier);
    //printf("next_state = %d\n", token->next_state);
    if (token->next_state == __CALL) {
        subcommand_output.json_gen = command_output.json_gen;
        GENERATED_call(token->extra.call_identifier, &subcommand_output);
        _next_state = subcommand_output.next_state;
        clear_stack();
    }

    state = _next_state;
    if (state == INITIAL) {
        clear_stack();
    }

    /* See if we are jumping back to a state in which we were in previously
     * (statelist contains INITIAL) and just move statelist_idx accordingly. */
    for (int i = 0; i < statelist_idx; i++) {
        if (statelist[i] != _next_state)
            continue;
        statelist_idx = i + 1;
        return;
    }

    /* Otherwise, the state is new and we add it to the list */
    statelist[statelist_idx++] = _next_state;
}

/*
 * Returns a pointer to the start of the line (one byte after the previous \r,
 * \n) or the start of the input, if this is the first line.
 *
 */
static const char *start_of_line(const char *walk, const char *beginning) {
    while (walk >= beginning && *walk != '\n' && *walk != '\r') {
        walk--;
    }

    return walk + 1;
}

/*
 * Copies the line and terminates it at the next \n, if any.
 *
 * The caller has to free() the result.
 *
 */
static char *single_line(const char *start) {
    char *result = sstrdup(start);
    char *end = strchr(result, '\n');
    if (end != NULL)
        *end = '\0';
    return result;
}

struct ConfigResultIR *parse_config(const char *input, struct context *context) {
    /* Dump the entire config file into the debug log. We cannot just use
     * DLOG("%s", input); because one log message must not exceed 4 KiB. */
    const char *dumpwalk = input;
    int linecnt = 1;
    while (*dumpwalk != '\0') {
        char *next_nl = strchr(dumpwalk, '\n');
        if (next_nl != NULL) {
            DLOG("CONFIG(line %3d): %.*s\n", linecnt, (int)(next_nl - dumpwalk), dumpwalk);
            dumpwalk = next_nl + 1;
        } else {
            DLOG("CONFIG(line %3d): %s\n", linecnt, dumpwalk);
            break;
        }
        linecnt++;
    }
    state = INITIAL;
    statelist_idx = 1;

    /* A YAJL JSON generator used for formatting replies. */
    command_output.json_gen = yajl_gen_alloc(NULL);

    y(array_open);

    const char *walk = input;
    const size_t len = strlen(input);
    int c;
    const cmdp_token *token;
    bool token_handled;
    linecnt = 1;

// TODO: make this testable
#ifndef TEST_PARSER
    cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
#endif

    /* The "<=" operator is intentional: We also handle the terminating 0-byte
     * explicitly by looking for an 'end' token. */
    while ((size_t)(walk - input) <= len) {
        /* Skip whitespace before every token, newlines are relevant since they
         * separate configuration directives. */
        while ((*walk == ' ' || *walk == '\t') && *walk != '\0')
            walk++;

        //printf("remaining input: %s\n", walk);

        cmdp_token_ptr *ptr = &(tokens[state]);
        token_handled = false;
        for (c = 0; c < ptr->n; c++) {
            token = &(ptr->array[c]);

            /* A literal. */
            if (token->name[0] == '\'') {
                if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) {
                    if (token->identifier != NULL)
                        push_string(token->identifier, token->name + 1);
                    walk += strlen(token->name) - 1;
                    next_state(token);
                    token_handled = true;
                    break;
                }
                continue;
            }

            if (strcmp(token->name, "number") == 0) {
                /* Handle numbers. We only accept decimal numbers for now. */
                char *end = NULL;
                errno = 0;
                long int num = strtol(walk, &end, 10);
                if ((errno == ERANGE && (num == LONG_MIN || num == LONG_MAX)) ||
                    (errno != 0 && num == 0))
                    continue;

                /* No valid numbers found */
                if (end == walk)
                    continue;

                if (token->identifier != NULL)
                    push_long(token->identifier, num);

                /* Set walk to the first non-number character */
                walk = end;
                next_state(token);
                token_handled = true;
                break;
            }

            if (strcmp(token->name, "string") == 0 ||
                strcmp(token->name, "word") == 0) {
                const char *beginning = walk;
                /* Handle quoted strings (or words). */
                if (*walk == '"') {
                    beginning++;
                    walk++;
                    while (*walk != '\0' && (*walk != '"' || *(walk - 1) == '\\'))
                        walk++;
                } else {
                    if (token->name[0] == 's') {
                        while (*walk != '\0' && *walk != '\r' && *walk != '\n')
                            walk++;
                    } else {
                        /* For a word, the delimiters are white space (' ' or
                         * '\t'), closing square bracket (]), comma (,) and
                         * semicolon (;). */
                        while (*walk != ' ' && *walk != '\t' &&
                               *walk != ']' && *walk != ',' &&
                               *walk != ';' && *walk != '\r' &&
                               *walk != '\n' && *walk != '\0')
                            walk++;
                    }
                }
                if (walk != beginning) {
                    char *str = scalloc(walk - beginning + 1, 1);
                    /* We copy manually to handle escaping of characters. */
                    int inpos, outpos;
                    for (inpos = 0, outpos = 0;
                         inpos < (walk - beginning);
                         inpos++, outpos++) {
                        /* We only handle escaped double quotes to not break
                         * backwards compatibility with people using \w in
                         * regular expressions etc. */
                        if (beginning[inpos] == '\\' && beginning[inpos + 1] == '"')
                            inpos++;
                        str[outpos] = beginning[inpos];
                    }
                    if (token->identifier)
                        push_string(token->identifier, str);
                    free(str);
                    /* If we are at the end of a quoted string, skip the ending
                     * double quote. */
                    if (*walk == '"')
                        walk++;
                    next_state(token);
                    token_handled = true;
                    break;
                }
            }

            if (strcmp(token->name, "line") == 0) {
                while (*walk != '\0' && *walk != '\n' && *walk != '\r')
                    walk++;
                next_state(token);
                token_handled = true;
                linecnt++;
                walk++;
                break;
            }

            if (strcmp(token->name, "end") == 0) {
                //printf("checking for end: *%s*\n", walk);
                if (*walk == '\0' || *walk == '\n' || *walk == '\r') {
                    next_state(token);
                    token_handled = true;
                    /* To make sure we start with an appropriate matching
                     * datastructure for commands which do *not* specify any
                     * criteria, we re-initialize the criteria system after
                     * every command. */
// TODO: make this testable
#ifndef TEST_PARSER
                    cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
#endif
                    linecnt++;
                    walk++;
                    break;
                }
            }
        }

        if (!token_handled) {
            /* Figure out how much memory we will need to fill in the names of
             * all tokens afterwards. */
            int tokenlen = 0;
            for (c = 0; c < ptr->n; c++)
                tokenlen += strlen(ptr->array[c].name) + strlen("'', ");

            /* Build up a decent error message. We include the problem, the
             * full input, and underline the position where the parser
             * currently is. */
            char *errormessage;
            char *possible_tokens = smalloc(tokenlen + 1);
            char *tokenwalk = possible_tokens;
            for (c = 0; c < ptr->n; c++) {
                token = &(ptr->array[c]);
                if (token->name[0] == '\'') {
                    /* A literal is copied to the error message enclosed with
                     * single quotes. */
                    *tokenwalk++ = '\'';
                    strcpy(tokenwalk, token->name + 1);
                    tokenwalk += strlen(token->name + 1);
                    *tokenwalk++ = '\'';
                } else {
                    /* Skip error tokens in error messages, they are used
                     * internally only and might confuse users. */
                    if (strcmp(token->name, "error") == 0)
                        continue;
                    /* Any other token is copied to the error message enclosed
                     * with angle brackets. */
                    *tokenwalk++ = '<';
                    strcpy(tokenwalk, token->name);
                    tokenwalk += strlen(token->name);
                    *tokenwalk++ = '>';
                }
                if (c < (ptr->n - 1)) {
                    *tokenwalk++ = ',';
                    *tokenwalk++ = ' ';
                }
            }
            *tokenwalk = '\0';
            sasprintf(&errormessage, "Expected one of these tokens: %s",
                      possible_tokens);
            free(possible_tokens);

            /* Go back to the beginning of the line */
            const char *error_line = start_of_line(walk, input);

            /* Contains the same amount of characters as 'input' has, but with
             * the unparseable part highlighted using ^ characters. */
            char *position = scalloc(strlen(error_line) + 1, 1);
            const char *copywalk;
            for (copywalk = error_line;
                 *copywalk != '\n' && *copywalk != '\r' && *copywalk != '\0';
                 copywalk++)
                position[(copywalk - error_line)] = (copywalk >= walk ? '^' : (*copywalk == '\t' ? '\t' : ' '));
            position[(copywalk - error_line)] = '\0';

            ELOG("CONFIG: %s\n", errormessage);
            ELOG("CONFIG: (in file %s)\n", context->filename);
            char *error_copy = single_line(error_line);

            /* Print context lines *before* the error, if any. */
            if (linecnt > 1) {
                const char *context_p1_start = start_of_line(error_line - 2, input);
                char *context_p1_line = single_line(context_p1_start);
                if (linecnt > 2) {
                    const char *context_p2_start = start_of_line(context_p1_start - 2, input);
                    char *context_p2_line = single_line(context_p2_start);
                    ELOG("CONFIG: Line %3d: %s\n", linecnt - 2, context_p2_line);
                    free(context_p2_line);
                }
                ELOG("CONFIG: Line %3d: %s\n", linecnt - 1, context_p1_line);
                free(context_p1_line);
            }
            ELOG("CONFIG: Line %3d: %s\n", linecnt, error_copy);
            ELOG("CONFIG:           %s\n", position);
            free(error_copy);
            /* Print context lines *after* the error, if any. */
            for (int i = 0; i < 2; i++) {
                char *error_line_end = strchr(error_line, '\n');
                if (error_line_end != NULL && *(error_line_end + 1) != '\0') {
                    error_line = error_line_end + 1;
                    error_copy = single_line(error_line);
                    ELOG("CONFIG: Line %3d: %s\n", linecnt + i + 1, error_copy);
                    free(error_copy);
                }
            }

            context->has_errors = true;

            /* Format this error message as a JSON reply. */
            y(map_open);
            ystr("success");
            y(bool, false);
            /* We set parse_error to true to distinguish this from other
             * errors. i3-nagbar is spawned upon keypresses only for parser
             * errors. */
            ystr("parse_error");
            y(bool, true);
            ystr("error");
            ystr(errormessage);
            ystr("input");
            ystr(input);
            ystr("errorposition");
            ystr(position);
            y(map_close);

            /* Skip the rest of this line, but continue parsing. */
            while ((size_t)(walk - input) <= len && *walk != '\n')
                walk++;

            free(position);
            free(errormessage);
            clear_stack();

            /* To figure out in which state to go (e.g. MODE or INITIAL),
             * we find the nearest state which contains an <error> token
             * and follow that one. */
            bool error_token_found = false;
            for (int i = statelist_idx - 1; (i >= 0) && !error_token_found; i--) {
                cmdp_token_ptr *errptr = &(tokens[statelist[i]]);
                for (int j = 0; j < errptr->n; j++) {
                    if (strcmp(errptr->array[j].name, "error") != 0)
                        continue;
                    next_state(&(errptr->array[j]));
                    error_token_found = true;
                    break;
                }
            }

            assert(error_token_found);
        }
    }

    y(array_close);

    return &command_output;
}

/**
 * Launch nagbar to indicate errors in the configuration file.
 */
void start_config_error_nagbar(const char *configpath, bool has_errors) {
    char *editaction, *pageraction;
    sasprintf(&editaction, "i3-sensible-editor \"%s\" && i3-msg reload\n", configpath);
    sasprintf(&pageraction, "i3-sensible-pager \"%s\"\n", errorfilename);
    char *argv[] = {
        NULL, /* will be replaced by the executable path */
        "-f",
        (config.font.pattern ? config.font.pattern : "fixed"),
        "-t",
        (has_errors ? "error" : "warning"),
        "-m",
        (has_errors ? "You have an error in your i3 config file!" : "Your config is outdated. Please fix the warnings to make sure everything works."),
        "-b",
        "edit config",
        editaction,
        (errorfilename ? "-b" : NULL),
        (has_errors ? "show errors" : "show warnings"),
        pageraction,
        NULL};

    start_nagbar(&config_error_nagbar_pid, argv);
    free(editaction);
    free(pageraction);
}

/*
 * Parses the given file by first replacing the variables, then calling
 * parse_config and possibly launching i3-nagbar.
 *
 */
bool parse_file(const char *f) {
  if (database != NULL) {
    xcb_xrm_database_free(database);
    /* Explicitly set the database to NULL again in case the config gets reloaded. */
    database = NULL;
  }

  // A possibility is to call a function that must be defined inside
  // the config
  scm_c_primitive_load(f);
  extract_workspace_names_from_bindings();
  check_for_duplicate_bindings(context);

  return true;
}