13-11-2011, 01:21 PM
Another of my once-in-a-while blog posts that could maybe be a tutorial, but instead of writing text for it I comment up the code extensively. This example demonstrates reading a text file of just about any size into an array of lines, which can then be manipulated by the user in any way desired.
It demonstrates the use of fgets, errno/strerror, as well as C memory allocation -- both malloc and realloc -- and the use of a dynamic array of pointers. It could be used as the basis for a text editor.
It demonstrates the use of fgets, errno/strerror, as well as C memory allocation -- both malloc and realloc -- and the use of a dynamic array of pointers. It could be used as the basis for a text editor.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
char *readLine(FILE *fp)
{
if (!fp)
return NULL;
/*
Our temporary buffer, initialized to all 0s.
Note I'm making this artificially small to
demonstrate/test the algorithm. In a real-world
application you would use the BUFSIZ macro, which
is part of stdio:
char buffer[BUFSIZ] = { 0 };
*/
char buffer[16] = { 0 };
/* The line which we will return to the caller */
char *line = NULL;
/* Try to read a line from the file into our buffer */
if (fgets(buffer, sizeof(buffer), fp) != NULL)
{
/* OK, so we read some data. This may or may not be
the entire line, if the line length exceeds the
length of the buffer provided.
fgets puts the terminating newline into the buffer,
so we check to see if that's there to determine if
there is more to this line.
If the line is simply "hello" and the buffer provided
is greater in length than 6 characters, then the buffer
will contain the characters
'h', 'e', 'l', 'l', 'o', '\n', '\0'
It the buffer is insufficient, say 5 characters, then
the buffer will contain
'h', 'e', 'l', 'l', '\0'
and we will know we did not get the whole line.
*/
/* Get the length of the returned string into a variable */
size_t len = strlen(buffer);
/*
OK, given the string "hello", if the entire line was read
the string length would be 6, because the '\0' is not
included in the string length and the newline is present.
So here we check that the character ONE LESS than the string
length is the newline character. If it is, we read the entire
string, and we overwrite the newline with a '\0' to terminate
the string there.
*/
if (buffer[len - 1] == '\n')
{
buffer[len - 1] = '\0';
/* Now we allocate memory for the line variable to
return to the caller */
line = malloc(len);
if (!line)
{
/* Die immediately */
fclose(fp);
fprintf(stderr, "Could not allocate memory!\n");
exit(-1);
}
/* Copy buffer into the memory */
strcpy(line, buffer);
/* Return the line to the caller */
return line;
}
/*
If we got here, we know we didn't get the entire line,
and we need to go back to fgets to get the rest. That
means we need to empty our buffer for the next read.
So, we allocate the memory required for this line and
copy the buffer into it. Can't forget room for the
terminating NULL character.
*/
line = malloc(strlen(buffer) + 1);
strcpy(line, buffer);
/* And now we read some more, until we know we've got
the entire line */
int done = 0;
while (!done && fgets(buffer, sizeof(buffer), fp) != NULL)
{
len = strlen(buffer);
if (buffer[len - 1] == '\n')
{
buffer[len - 1] = '\0';
/* We've got the entire line, so set
our done flag */
done = 1;
}
/*
We need to reallocate the memory for line
here to expand it. If realloc fails, it returns
NULL and we will need to cleanup the previously-
allocated memory to prevent a memory leak.
In order to maintain a pointer to the
previously-allocated memory we create a
temporary variable. If that comes back from the
realloc with a NULL value, we still have the line
pointer which points to the previously-allocated
memory for cleanup.
*/
char *temp = realloc(line, strlen(line) + len + 1);
if (!temp)
{
/* Cleanup allocated memory */
free(line);
fclose(fp);
fprintf(stderr, "Could not reallocate memory!\n");
exit(-1);
}
/* Set the line pointer to point to our newly
reallocated memory */
line = temp;
/* Append the contents of the buffer to
the line */
strcat(line, buffer);
}
}
return line;
}
int main(void)
{
char fileToRead[4096] = { 0 };
printf("Enter path of file to read: ");
if (fgets(fileToRead, sizeof(fileToRead), stdin) == NULL)
{
fprintf(stderr, "Invalid input!\n");
exit(-1);
}
/* Remove the newline from the received data */
fileToRead[strlen(fileToRead) - 1] = '\0';
/* Try to open the file */
FILE *fp = fopen(fileToRead, "r");
if (!fp)
{
fprintf(stderr, "Unable to open %s: %s\n",
fileToRead, strerror(errno));
exit(-1);
}
/* Our array of lines is actually a pointer to an array of character
pointers, one for each line, so keep track of the number of lines. */
char **lines = NULL;
size_t lineCount = 0;
char *line = NULL;
while ((line = readLine(fp)) != NULL)
{
/* As noted above in the function, realloc failure means
we need to cleanup the previously created memory, so we
use a temporary to hold the returned value, keeping the
pointer to the previously created memory valid for cleanup
*/
char **temp = realloc(lines, sizeof(*lines) * lineCount + 1);
if (!temp)
{
fprintf(stderr, "Unable to realloc for lines!\n");
/* Cleanup the existing memory */
size_t i = 0;
for (; i < lineCount; ++i)
{
free(lines[i]);
}
free(lines);
fclose(fp);
exit(-1);
}
/* Assign our lines variable to point to the reallocated memory */
lines = temp;
/* Set our newly allocated pointer to point to the line returned */
lines[lineCount++] = line;
}
/* Print our lines -- backwards for fun -- and clean up */
int j = 0;
for (j = lineCount - 1; j >= 0; j--)
{
printf("%s\n", lines[j]);
/* Free the line's memory allocated in the readLine function */
free(lines[j]);
}
/* Free the array that held the lines */
free(lines);
fclose(fp);
return 0;
}