|
@@ -0,0 +1,145 @@
|
|
|
+/*
|
|
|
+ * Transliterate UTF-8 russian input into all possible latin
|
|
|
+ * implementations, see hardcoded table below.
|
|
|
+ * Data is taken from stdin and is returned to stdout.
|
|
|
+ *
|
|
|
+ * Licensed under GPL-3.
|
|
|
+ */
|
|
|
+
|
|
|
+#include <stdio.h>
|
|
|
+#include <stdlib.h>
|
|
|
+#include <string.h>
|
|
|
+
|
|
|
+# define ALPHA_CNT 33
|
|
|
+# define TRANSL_CNT 6
|
|
|
+
|
|
|
+// conversion table
|
|
|
+// all lines must be NULL-terminated
|
|
|
+// first two rows must always be defined and not NULL
|
|
|
+const char *const ct[ALPHA_CNT][TRANSL_CNT]={
|
|
|
+ {"а", "a", NULL},
|
|
|
+ {"б", "b", NULL},
|
|
|
+ {"в", "v", NULL},
|
|
|
+ {"г", "g", "h", NULL},
|
|
|
+ {"д", "d", NULL},
|
|
|
+ {"е", "e", "je", "ye", NULL},
|
|
|
+ {"ё", "e", "jo", "yo", NULL},
|
|
|
+ {"ж", "j", "z", NULL},
|
|
|
+ {"з", "z", NULL},
|
|
|
+ {"и", "i", NULL},
|
|
|
+ {"й", "j", "i", NULL},
|
|
|
+ {"к", "k", NULL},
|
|
|
+ {"л", "l", NULL},
|
|
|
+ {"м", "m", NULL},
|
|
|
+ {"н", "n", NULL},
|
|
|
+ {"о", "o", NULL},
|
|
|
+ {"п", "p", NULL},
|
|
|
+ {"р", "r", "p", NULL},
|
|
|
+ {"с", "s", "c", NULL},
|
|
|
+ {"т", "t", NULL},
|
|
|
+ {"у", "u", "y", NULL},
|
|
|
+ {"ф", "f", NULL},
|
|
|
+ {"х", "h", "x", NULL},
|
|
|
+ {"ц", "ts", "s", NULL},
|
|
|
+ {"ч", "ch", "4", NULL},
|
|
|
+ {"ш", "sh", NULL},
|
|
|
+ {"щ", "sh", "sh'", "sh`", NULL},
|
|
|
+ {"ъ", "b", "'b", "`b", "", NULL},
|
|
|
+ {"ы", "y", "bl", "b1", NULL},
|
|
|
+ {"ь", "b", "", NULL},
|
|
|
+ {"э", "e", "3", NULL},
|
|
|
+ {"ю", "ju", "yu", "u", NULL},
|
|
|
+ {"я", "ya", "ja", NULL},
|
|
|
+};
|
|
|
+
|
|
|
+// recursion similar to fork + exec;
|
|
|
+// only forward scan of the input data
|
|
|
+void convline(char *in, char *out, size_t out_start, size_t out_len)
|
|
|
+{
|
|
|
+ size_t tr_len; // length of translated sequence
|
|
|
+ char *fork; // forked output data (for multiple matches)
|
|
|
+ size_t fork_len;
|
|
|
+ int match; // set this flag if we found something
|
|
|
+
|
|
|
+ // scan input line
|
|
|
+ while (*in != '\0' && *in != '\n' && *in != '\r')
|
|
|
+ {
|
|
|
+ match = 0;
|
|
|
+ for (int i=0; i<ALPHA_CNT; i++)
|
|
|
+ {
|
|
|
+ // we have an utf-8 symbol match
|
|
|
+ if (! strncmp(in, ct[i][0], 2))
|
|
|
+ {
|
|
|
+ // look for other possible translations first
|
|
|
+ for (int j=2; ct[i][j] && j<TRANSL_CNT; j++)
|
|
|
+ {
|
|
|
+ // precreate fork buffer
|
|
|
+ fork_len = out_len;
|
|
|
+ tr_len = strlen(ct[i][j]);
|
|
|
+ if (out_len - out_start <= tr_len)
|
|
|
+ fork_len = fork_len*2;
|
|
|
+
|
|
|
+ fork = malloc(sizeof(char)*fork_len);
|
|
|
+
|
|
|
+ // copy base value
|
|
|
+ strcpy(fork, out);
|
|
|
+ // get value from conversion table
|
|
|
+ strcpy(fork+out_start, ct[i][j]);
|
|
|
+ // fork further expansion for an alternative translation
|
|
|
+ convline(in+2, fork, out_start+tr_len, fork_len);
|
|
|
+ }
|
|
|
+
|
|
|
+ // do we need to grow out buffer?
|
|
|
+ tr_len = strlen(ct[i][1]);
|
|
|
+ if (out_len - out_start <= tr_len)
|
|
|
+ {
|
|
|
+ out_len = out_len*2;
|
|
|
+ out = realloc(out, sizeof(char)*out_len);
|
|
|
+ }
|
|
|
+
|
|
|
+ // get value from conversion table
|
|
|
+ strcpy(out+out_start, ct[i][1]);
|
|
|
+
|
|
|
+ // go to next char
|
|
|
+ out_start += tr_len;
|
|
|
+ in+=2;
|
|
|
+ match = 1;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (match)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ // we can be here only if all alpha matches above failed
|
|
|
+ // assume non-convertable single character and just copy it
|
|
|
+ if (out_len - out_start <= 1)
|
|
|
+ {
|
|
|
+ out_len = out_len*2;
|
|
|
+ out = realloc(out, sizeof(char)*out_len);
|
|
|
+ }
|
|
|
+ *(out+out_start++) = *(in++);
|
|
|
+ }
|
|
|
+
|
|
|
+ // processing finished, outputting result
|
|
|
+ out[out_start] = '\0';
|
|
|
+ puts(out);
|
|
|
+ free(out);
|
|
|
+}
|
|
|
+
|
|
|
+int main()
|
|
|
+{
|
|
|
+ char *line = NULL; // input line buffer
|
|
|
+ size_t len = 0; //
|
|
|
+ char *out = NULL; // output string
|
|
|
+
|
|
|
+ while (getline(&line, &len, stdin) != -1)
|
|
|
+ {
|
|
|
+ // precreate buffer with a sane value
|
|
|
+ // must be freed after puts()
|
|
|
+ out = malloc(sizeof(char)*len);
|
|
|
+ // recursive line conversion
|
|
|
+ convline(line, out, 0, len);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|