Code Review Asked by TajyMany on October 27, 2021
I’m working on a bot that can use a wordlist to play hangman optimally as fast as possible. With the current implementation, the simulateGame
function, which plays a whole game against the bot, takes ~500-700 microseconds to execute for the words I’ve tested, like "fatherhood", "esoteric", "joyousnesses", etc.
Wordlist can be found here, and the SIMDStarterKit.h
header can be found here.
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <strings.h>
#include <sys/time.h>
#include "SIMDStarterKit.h"
typedef struct {
char **buffer;
int length;
int capacity;
} StringArray;
StringArray newStringArray() {
StringArray array;
array.buffer = malloc(sizeof(char **));
array.length = 0;
array.capacity = 1;
return array;
}
void appendToStringArray(StringArray *array, char *string) {
if (array->length == array->capacity) {
int newCapacity = array->capacity * 2;
array->buffer = realloc(array->buffer, sizeof(char **) * newCapacity);
array->capacity = newCapacity;
}
array->buffer[array->length++] = string;
}
typedef struct {
StringArray *arrays;
int *lengths;
} IndexedWordlist;
IndexedWordlist indexWordlist(StringArray wordlist) {
int minLength = INT_MAX;
int maxLength = INT_MIN;
for (int i = 0; i < wordlist.length; i++) {
int len = (int) strlen(wordlist.buffer[i]);
if (len > maxLength)
maxLength = len;
else if (len < minLength)
minLength = len;
}
int lists = (maxLength - minLength) + 1;
StringArray *wordlists = malloc(sizeof(StringArray) * lists);
for (int i = 0; i < lists; i++)
wordlists[i] = newStringArray();
for (int i = 0; i < wordlist.length; i++) {
int len = (int) strlen(wordlist.buffer[i]);
int lenIdx = len - minLength;
appendToStringArray(wordlists + lenIdx, wordlist.buffer[i]);
}
IndexedWordlist indexedWordlist;
indexedWordlist.arrays = wordlists;
indexedWordlist.lengths = malloc(sizeof(int) * lists);
for (int i = 0; i < lists; i++) {
indexedWordlist.lengths[i] = i + minLength;
}
return indexedWordlist;
}
char mostLikelyCharacter(char *query, char *noCount, char *forbidden, StringArray wordlist) {
int len = (int) strlen(query);
int *globalCounts = aligned_alloc(MEMORY_ALIGNMENT, 128 * sizeof(int));
int *localCounts = aligned_alloc(MEMORY_ALIGNMENT, 128 * sizeof(int));
long *largeLocalCounts = (long *) localCounts;
for (int i = 0; i < 128; i++) {
globalCounts[i] = 0;
localCounts[i] = 0;
}
for (int i = 0; i < wordlist.length; i++) {
char *string = wordlist.buffer[i];
for (int j = 0; j < len; j++) {
char predCharacter = string[j];
char realCharacter = query[j];
if (realCharacter == '_') {
if (forbidden[predCharacter] == 1)
goto forbiddenWord;
} else {
if (predCharacter != realCharacter)
goto forbiddenWord;
}
if (noCount[predCharacter] == 1)
goto skipCount;
localCounts[predCharacter] += 1;
skipCount:;
}
for (int j = 0; j < 128; j += VECTOR_SIZE) {
SIMDi global = Load(&globalCounts[j]);
SIMDi local = Load(&localCounts[j]);
SIMDi added = Add(global, local);
Store(globalCounts + j, added);
}
forbiddenWord:
for (int j = 0; j < 128 / 2; j++)
largeLocalCounts[j] = 0;
}
char bestCharacter = '';
int bestCount = 0;
for (int i = 0; i < 128; i++) {
if (globalCounts[i] > bestCount) {
bestCount = globalCounts[i];
bestCharacter = (char) i;
}
}
free(globalCounts);
free(localCounts);
return bestCharacter;
}
char *fgetsCopy(char *src) {
int len = (int) strlen(src);
char *newBuffer = malloc(sizeof(char) * len);
memcpy(newBuffer, src, len - 1);
newBuffer[len - 1] = '';
return newBuffer;
}
StringArray loadWordlist() {
FILE* file = fopen("/Users/tanmaybakshi/wordlist.txt", "r");
char line[256];
StringArray words = newStringArray();
while (fgets(line, sizeof(line), file)) {
appendToStringArray(&words, fgetsCopy(line));
}
return words;
}
char stringsEqual(char *a, char *b) {
for (int i = 0; i < strlen(a); i++)
if (a[i] != b[i])
return 0;
return 1;
}
void simulateGame(char *word, IndexedWordlist indexedWordlist) {
int i = 0;
StringArray wordlist;
while (1) {
if (indexedWordlist.lengths[i] == strlen(word)) {
wordlist = indexedWordlist.arrays[i];
break;
}
i++;
}
char *forbidden = calloc(128, sizeof(char));
char *noCount = calloc(128, sizeof(char));
char *query = malloc(sizeof(char) * strlen(word) + 1);
for (int i = 0; i < strlen(word); i++)
query[i] = '_';
query[strlen(word)] = '';
while (stringsEqual(query, word) == 0) {
char result = mostLikelyCharacter(query, noCount, forbidden, wordlist);
char correct = 0;
for (int i = 0; i < strlen(word); i++) {
if (word[i] == result) {
correct = 1;
query[i] = result;
}
}
if (correct == 0) {
printf("Incorrect response! (%.1s)n", &result);
forbidden[result] = 1;
} else {
printf("Correct response! (%.1s)n", &result);
printf("%sn", query);
noCount[result] = 1;
}
}
free(forbidden);
free(noCount);
free(query);
}
long long timems() {
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000;
}
int main() {
StringArray wordlist = loadWordlist();
IndexedWordlist indexedWordlist = indexWordlist(wordlist);
long long start = timems();
for (int i = 0; i < 5000; i++)
simulateGame("esoteric", indexedWordlist);
long long end = timems();
printf("Time: %lldn", end - start);
for (int i = 0; i < wordlist.length; i++) {
free(wordlist.buffer[i]);
}
free(wordlist.buffer);
free(indexedWordlist.lengths);
return 0;
}
The SIMD code has type errors.
The problem is currently a bunch of floats are read, assigned to a SIMDi
anyway, added as floats (and remember, these were integers, reinterpreted as floats), assigned to a SIMDi
again, and then stored as floats (into an array of integers). The type warnings are not just noise, it's also actually wrong: integers are being reinterpreted as floats and have floating point addition applied to them. That sort of works for certain ranges of integers that after reinterpretation correspond to subnormal floats (though on some processors adding subnormals is very slow, for example Intel Atom and AMD Bulldozer), so this can fly under the radar for a while, until it can't .. and anyway, even if the right result comes out, this is still an unnecessary level of disregard for types.
"SIMDStarterKit.h" apparently does not have integer loads.. that's a bit odd. Actually I would question why this header is used at all. The official intrinsics are admittedly gross-looking (or to be generous, they're an acquired taste), but at least they are standard. Load
could do just about anything, who knows? Whereas what _mm256_load_ps
(or preferably _mm256_load_si256
, there are no floats in this program) does is right on the tin.
Keeping "SIMDStarterKit.h", the cast functions can be used to remove a couple of warnings, and some manual pointer casting needs to happen, for example:
for (int j = 0; j < 128; j += VECTOR_SIZE) {
SIMDi global = CastToInt(Load((float*)&globalCounts[j]));
SIMDi local = CastToInt(Load((float*)&localCounts[j]));
SIMDi added = Addi(global, local);
Store((float*)&globalCounts[j], CastToFloat(added));
}
With that, and also #include <string.h>
(not the same as strings.h
), the code can compile without warnings, and the SIMD addition does the right thing.
By the way, writing this with loop with explicit SIMD is not really necessary, it gets autovectorized anyway.
Answered by harold on October 27, 2021
There is no need to implement stringEqual
yourself. Use strcmp(a, b) == 0
instead.
For all variables that contain memory sizes, you should use size_t
instead of int
.
In C, the function strlen
is terribly slow since it has to scan the entire string for the terminating ' '
. Never use strlen
in a loop for the same string.
Bug: as soon as your word list contains non-ASCII words, your code runs into undefined behavior (array index out of bounds). The code should be able to work for Arabic and Cyrillic as well. I don't know how Koreans play Hangman, that might be interesting as well.
Answered by Roland Illig on October 27, 2021
The only obvious thing I see is that your realloc
machinery is too complicated given the task at hand. Your entire word file is only 530 KB. You could easily iterate over the entire file to count newlines and get the exact array length you need, and this would be done quite quickly.
If alignment of the resulting strings is crucial, you'd want to keep your current method of malloc
ing a copy of every single line. If not, there is a much simpler solution: read the entire file into memory in one big blob, null out the newlines, and then form a secondary array of pointers into that blob.
Answered by Reinderien on October 27, 2021
Get help from others!
Recent Questions
Recent Answers
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP