Date: February 17, 2025
Writing a syntax highlighter
Since, I have my own tool to convert Markdown text to plain HTML blogs,
which I am very proud of. The only thing it was lacking for my use case was a
syntax highlighter for the code that I share, which I have been planning to implement for months.
Implementation
Current implementation does a simple regex replace of a keyword, making use of <font> tag in HTML.
I have only added C support for now, since it's the only language I have touched for months now.
There are some flaws with this implementation which you will be able to see below, that can be fixed by
handling edge-cases (a lot of edge cases) and by implementing a tiny lexer.
Code and example 😀:
/**
* C programming language
**/
static struct keyword c_keywords[] = {
{ "#include", "#E91E63" },
{ "#define ", "#E91E63" },
{ "for ", "#D84315" },
{ "while ", "#D84315" },
{ "do ", "#D84315" },
{ "break", "#D84315" },
{ "if ", "#D84315" },
{ "else ", "#D84315" },
{ "switch ", "#D84315" },
{ "continue", "#D84315" },
{ "return ", "#D84315" },
{ "int ", "#6A1B9A" },
{ "char ", "#6A1B9A" },
{ "float ", "#6A1B9A" },
{ "double ", "#6A1B9A" },
{ "long ", "#6A1B9A" },
{ "short ", "#6A1B9A" },
{ "unsigned ", "#6A1B9A" },
{ "signed ", "#6A1B9A" },
{ "void ", "#6A1B9A" },
{ "struct ", "#1565C0" },
{ "union ", "#1565C0" },
{ "enum ", "#1565C0" },
{ "sizeof", "#D84315" },
{ "typedef ", "#D84315" },
{ "enum ", "#D84315" },
};
static char *
highlight_keywords (char *codeblk,
struct keyword keywords[],
int n_keywords)
{
int size = 1000;
int count = 0;
char *highlighted = NULL;
char *ptr = NULL;
highlighted = malloc (sizeof (char) * size);
ptr = codeblk;
while (*ptr != '\0')
{
int i = n_keywords;
if (count == size - 1)
{
size <<= 2;
highlighted = realloc (highlighted, size);
}
if (isspace (*ptr))
{
highlighted[count++] = *ptr++;
continue;
}
for (i = 0; i < n_keywords; i++)
{
char *keyword;
keyword = keywords[i].str;
if (strncmp (ptr, keyword, strlen (keyword)) == 0)
break;
}
if (i < n_keywords)
{
char *cpy, *org;
char *strs[5] = {
"<font color=\"", NULL, "\">",
NULL,
"</font>"
};
strs[1] = keywords[i].color;
strs[3] = keywords[i].str;
cpy = &highlighted[count];
org = cpy;
for (int i = 0; i < 5; i++)
{
strcpy (cpy, strs[i]);
cpy += strlen (strs[i]);
count += strlen (strs[i]);
}
ptr += strlen (keywords[i].str);
}
else /* Not a keyword */
{
unsigned int index;
index = get_char_index (*ptr);
if (index != N_CHARS)
{
char *cpy = NULL;
cpy = &highlighted[count];
strcpy (cpy, chars[index].str);
count += strlen (chars[index].str);
ptr++;
}
else
{
highlighted[count++] = *ptr++;
}
}
}
highlighted[count] = '\0';
return highlighted;
}
TODO
- Highlight comments (because they are for reading)
- Highlight numbers (binary/hex/float/real)
- Handle precedence
in-short: Implement a tiny lexer