1
2
3
4
5
6
7
8
9
10
11
12
13#include <stdio.h>
14#include <stdlib.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <fcntl.h>
18#include <unistd.h>
19#include <string.h>
20
21struct block_list {
22 char *txt;
23 int len;
24 int num;
25};
26
27
28static struct block_list *list;
29static int list_size;
30static int max_size;
31
32struct block_list *block_head;
33
34int read_block(char *buf, int buf_size, FILE *fin)
35{
36 char *curr = buf, *const buf_end = buf + buf_size;
37
38 while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
39 if (*curr == '\n')
40 return curr - buf;
41 curr += strlen(curr);
42 }
43
44 return -1;
45}
46
47static int compare_txt(const void *p1, const void *p2)
48{
49 const struct block_list *l1 = p1, *l2 = p2;
50
51 return strcmp(l1->txt, l2->txt);
52}
53
54static int compare_num(const void *p1, const void *p2)
55{
56 const struct block_list *l1 = p1, *l2 = p2;
57
58 return l2->num - l1->num;
59}
60
61static void add_list(char *buf, int len)
62{
63 if (list_size != 0 &&
64 len == list[list_size-1].len &&
65 memcmp(buf, list[list_size-1].txt, len) == 0) {
66 list[list_size-1].num++;
67 return;
68 }
69 if (list_size == max_size) {
70 printf("max_size too small??\n");
71 exit(1);
72 }
73 list[list_size].txt = malloc(len+1);
74 list[list_size].len = len;
75 list[list_size].num = 1;
76 memcpy(list[list_size].txt, buf, len);
77 list[list_size].txt[len] = 0;
78 list_size++;
79 if (list_size % 1000 == 0) {
80 printf("loaded %d\r", list_size);
81 fflush(stdout);
82 }
83}
84
85#define BUF_SIZE (128 * 1024)
86
87int main(int argc, char **argv)
88{
89 FILE *fin, *fout;
90 char *buf;
91 int ret, i, count;
92 struct block_list *list2;
93 struct stat st;
94
95 if (argc < 3) {
96 printf("Usage: ./program <input> <output>\n");
97 perror("open: ");
98 exit(1);
99 }
100
101 fin = fopen(argv[1], "r");
102 fout = fopen(argv[2], "w");
103 if (!fin || !fout) {
104 printf("Usage: ./program <input> <output>\n");
105 perror("open: ");
106 exit(1);
107 }
108
109 fstat(fileno(fin), &st);
110 max_size = st.st_size / 100;
111
112 list = malloc(max_size * sizeof(*list));
113 buf = malloc(BUF_SIZE);
114 if (!list || !buf) {
115 printf("Out of memory\n");
116 exit(1);
117 }
118
119 for ( ; ; ) {
120 ret = read_block(buf, BUF_SIZE, fin);
121 if (ret < 0)
122 break;
123
124 add_list(buf, ret);
125 }
126
127 printf("loaded %d\n", list_size);
128
129 printf("sorting ....\n");
130
131 qsort(list, list_size, sizeof(list[0]), compare_txt);
132
133 list2 = malloc(sizeof(*list) * list_size);
134
135 printf("culling\n");
136
137 for (i = count = 0; i < list_size; i++) {
138 if (count == 0 ||
139 strcmp(list2[count-1].txt, list[i].txt) != 0) {
140 list2[count++] = list[i];
141 } else {
142 list2[count-1].num += list[i].num;
143 }
144 }
145
146 qsort(list2, count, sizeof(list[0]), compare_num);
147
148 for (i = 0; i < count; i++)
149 fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt);
150
151 return 0;
152}
153