gSort1400

gSort1400 has been rigorously tested and accredited by the Royal Mail

gSort1400 takes a CSV file of addresses and sorts them into Royal Mail distribution centre order, inserting a dummy address inbetween. It also outputs a computer planning report, which gives statistics about the sort, and a line listing, which lists all of the distribution centres by code and name, how many items are going there, and how many mailbags.

If enough items go to one distribution centre (say 25 or more), they are bagged up and you are rewarded with a discount on the postal cost ("directs"). If there are less than this total, the region's distribution centres are lumped together and a smaller discount is rewarded ("residues"). The program has to check for town names where the postcode cannot be matched, and assign the address to a residue if possible.

The whole thing revolves around a custom designed hash table and algorithm, linking each postcode to a temporary file for each distribution centre.

I hand coded the entire database in C - you might call it 'bespoke'. I'd never used a linked list before and had not got my head fully around pointers. I handled all of the linked lists with my own code, mainly using while loops, and in the end I bubble sorted a linked list and made another go backwards so now I understand pointers intimately!

/*

gSort1400

(c) July 2007 R Geleit

*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <kernel.h>

#define BOBDIR_SSCLIST "<gSort$Dir>.RMdata.MSORTB/DAT"
#define BOBDIR_MSORTA "<gSort$Dir>.RMdata.MSORTA/DAT"
#define BOBDIR_MSORTC "<gSort$Dir>.RMdata.MSORTC/DAT"

#define LABEL_HEADER label_header
#define COMPANY company

#define BOBDIR_MSORTB "<gSort$Dir>.RMdata.MSORTB/DAT"
#define BOBDIR_LINELIST "<gSortTarget$Path>linelist"
#define BOBDIR_REPORT "<gSortTarget$Path>report"

#define BOBDIR_CSV "<gSortTarget$Path>addresses"
#define BOBDIR_RESULTS "<gSortTarget$Path>outfile"
#define BOBDIR_TMP "<gSortTemp$Path>tmp"

#define DUMPFILE "<gSort$Dir>.dumpfile"

#define BUFFSIZE 1024
#define MANYINBAG manyinbag
#define SSTRINGLEN 12
#define FNAMESIZE 32
#define DIRECTLIMIT direct_limit

char main_buffer[BUFFSIZE];                                                    // for line read from address csv file
char pcode_buffer[BUFFSIZE];                                                   // holds postcode from file
char add1buffer[BUFFSIZE];                                                     // holds primary town name
char add2buffer[BUFFSIZE];                                                     // holds 2nd town name
char msortb_buffer[BUFFSIZE];

char *label_header;
char *company;
char *jobref;
char *size_format;
char *service_class;
int weight=0;
int manyinbag=0;
int no_of_fields;                                                              // no of fields in csv file
int postcode_field;                                                            // "POSTCODE" field (from 0)
int direct_limit=0;                                                            // 5 or 25
int towns_matched=0;                                                           // if no postcode match then...
int full_p_count=0;                                                            // no of full postcodes
int out_p_count=0;                                                             // no of outward only pcodes

FILE *dumpfile;

typedef enum {FALSE,TRUE} boolean;                                             // TRUE or FALSE
typedef enum {DIRECT,RESIDUE} dr;                                                 
typedef enum {SPC,NUM,ALPH,OTHER} aornum;                                      // for postcode level

typedef enum {CANTOPENSSC,MEMORYFULL,CANTREADSSCFILE,CANTFINDSSC,CANTOPENCSV,CANTREADCSVFILE,CANTFINDPOSTCODEFIELD,CANTOPENWRACK,
CANTWRITEWRACK,CANTOPENCSV2,CANTREADCSVFILE2,CANTOPENOUTFILE,CANTOPENTMP,CANTREADAPPFILE,CANTOPENTMPFILE2,CANTOPENTMP2,
CANTOPENTMPFILE3,CANTOPENOUTFILE2,CANTOPENMSORTA,CANTREADMSORTA,BADPCODE,CANTOPENSTATS,CANTOPENMSORTC,CANTREADMSORTC,
TOWNTOOLONG,CANTREADMSORTB,CANTOPENMSORTB,SSCNOTINMSORTB,CANTOPENLLIST,CANTOPENLNLIST,CANTOPENREPORT} errors; struct SSCdata { // info on SSC int SSC; // SSC char SSCfilename[FNAMESIZE]; // filename of temp file boolean file_virgin; // file previously opened? int tally; // no of addresses in file } detritus; // for undecodeable postcodes struct SSCptr_list { // linked list of all SSCdata structures int SSC; // SSC no struct SSCdata *SSCptr; // pointer to SSCdata struct SSCptr_list *next_rec; // next record in linked list }; struct SSCptr_list *SSCptr_list_first=NULL; // initiate linked list of SSCdata struct open_files_list { // linked list of open files struct SSCdata *SSCptr; // pointer to file info structure boolean direct_or_done; // =TRUE if final tally>=DIRECTLIMIT or Resid done struct open_files_list *next_rec; // next record in linked list }; struct open_files_list *files_list_first=NULL; // initiate linked list of open files struct pcode { int first; int second; int third; int fourth; int f_o_r; }; struct town_list { // MSORTC data char town[11]; int SSC; struct town_list *next_rec; } *first_town=NULL; struct label_list { // list of SSCs and quantity for line listing int SSC; int many_in_SSC; dr D_or_R; struct label_list *next_rec; struct label_list *prev_rec; }; struct label_list *label_list_first; struct label_list *label_list_last; struct f_o_r_list { struct SSCdata *SSCdata_ptr[10]; }; struct f_o_r_list *pcode_table[26][36][37][37]; void createSSCleaves(void); // allocate SSC structs and add to linked list void read_msorta(); void read_msortc(); void mailsort(); // perform sort of lines in labels.csv void sort_file_list(); boolean beginning(void); boolean middle(struct open_files_list *this); boolean ending(struct open_files_list *this); void output_stats(); // save data for line listing & labels void label_directs(); // sets up open_files_list->direct void output_directs(); // set up output file and append direct tmps & SSC void output_residues(); // collect residues together & append to outfile void delete_tmp(); // removes SSC files int get_no_of_fields(); // how many lines in address int get_postcode_field(); // which line in address is POSTCODE void postcode_field_write(); // prepare ssc record in main_buffer void append_tmp_file(); // appends x lines of one file to another void delete_tmp(); // removes SSC files void get_pcode(); // move postcode to start of main_buffer void get_towns(); void to_upper(char *string); int town_scan(char *string); aornum isnum(char); void reverse_labels(); void output_linelisting(); void output_report(); void err(errors); // error handler void display_array(); /***************************** * main * *****************************/ int main(int argc,char *argv[]) { int t; for(t=0;t<argc;t++) { if(argv[t][0]=='-') { switch(argv[t][1]) { case 'm': manyinbag=atoi(argv[t]+2); break; case 'h': label_header=argv[t]+2; break; case 'n': company=argv[t]+2; break; case 'l': direct_limit=atoi(argv[t]+2); break; case 'w': weight=atoi(argv[t]+2); break; case 'j': jobref=argv[t]+2; break; case 's': service_class=argv[t]+2; break; case 'f': size_format=argv[t]+2; break; } } } createSSCleaves(); read_msorta(); //printf("msorta read\n"); read_msortc(); //printf("msortc read\n"); //display_array(); //dumpfile=fopen(DUMPFILE,"w"); mailsort(); sort_file_list(); //fclose(dumpfile); label_directs(); output_directs(); output_residues(); delete_tmp(); reverse_labels(); //printf("before report\n"); output_report(); //printf("after report\n"); output_linelisting(); //printf("after LL (end)\n"); _kernel_oscli("set gSort$Error None"); return(0); } /***************************** * createSSCleaves * *****************************/ void createSSCleaves(void) // read in SSC data, malloc and list them { int SSCff; FILE *SSClist; // input file - SSClist char SSCbuffer[BUFFSIZE]; // holds a line of SSClist char *msortc_ptr; struct SSCdata *this_SSC; // points to SSC structure struct SSCptr_list *this_listed_SSC; // linked list of SSC structures detritus.SSC=0; strcpy(detritus.SSCfilename,"<gSortTemp$Path>SSC0"); detritus.file_virgin=TRUE; detritus.tally=0; if((SSClist=fopen(BOBDIR_SSCLIST,"r"))==NULL) err(CANTOPENSSC); // open SSClist while(!feof(SSClist)) // till eof { if((fgets(SSCbuffer,sizeof(SSCbuffer),SSClist))==NULL) err(CANTREADSSCFILE); // read line of SSClist to buffer sscanf(SSCbuffer+1,"%d",&SSCff); // decode SSC if(SSCbuffer[0]=='3') break; if((this_SSC=malloc(sizeof(struct SSCdata)))==NULL) err(MEMORYFULL); // allocate SSCleaf this_SSC->SSC=SSCff; sprintf(this_SSC->SSCfilename,"<gSortTemp$Path>SSC%d",SSCff); this_SSC->file_virgin=TRUE; this_SSC->tally=0; /* add to linked list*/ // maintain list of SSCleafs if((this_listed_SSC=malloc(sizeof(struct SSCptr_list)))==NULL) err(MEMORYFULL); // allocate list of SSCleafs this_listed_SSC->SSC=SSCff; // initiate structure this_listed_SSC->SSCptr=this_SSC; this_listed_SSC->next_rec=SSCptr_list_first; SSCptr_list_first=this_listed_SSC; } fclose(SSClist); // close SSClist // if((SSClist=fopen(BOBDIR_MSORTC,"r"))==NULL) err(CANTOPENMSORTC); // open residues list for(;;) // till eof { msortc_ptr=fgets(SSCbuffer,sizeof(SSCbuffer),SSClist); // read line of residues to buffer if(msortc_ptr==NULL && !feof(SSClist)) err(CANTREADMSORTC); else if(msortc_ptr==NULL) break; sscanf(SSCbuffer+11,"%d",&SSCff); // decode SSC SSCff=SSCff*100; //printf("%d\n",SSCff); if((this_SSC=malloc(sizeof(struct SSCdata)))==NULL) err(MEMORYFULL); // allocate SSCleaf this_SSC->SSC=SSCff; sprintf(this_SSC->SSCfilename,"<gSortTemp$Path>SSC%d",SSCff); this_SSC->file_virgin=TRUE; this_SSC->tally=0; /* add to linked list*/ // maintain list of SSCleafs if((this_listed_SSC=malloc(sizeof(struct SSCptr_list)))==NULL) err(MEMORYFULL); // allocate list of SSCleafs this_listed_SSC->SSC=SSCff; // initiate structure this_listed_SSC->SSCptr=this_SSC; this_listed_SSC->next_rec=SSCptr_list_first; SSCptr_list_first=this_listed_SSC; } fclose(SSClist); // close residue list // } /***************************** * read_msorta * *****************************/ void read_msorta() { FILE *msorta_file; struct pcode this_pcode; struct SSCptr_list *current_ptr; struct f_o_r_list *this_element; char t,*msorta_ptr; int this_SSC,i; if((msorta_file=fopen(BOBDIR_MSORTA,"r"))==NULL) err(CANTOPENMSORTA); for(;;) { msorta_ptr=fgets(main_buffer,sizeof(main_buffer),msorta_file); // read line and check for eof (CR at eof) if(msorta_ptr==NULL && !feof(msorta_file)) err(CANTREADMSORTA); else if(msorta_ptr==NULL) break; this_SSC=atoi(main_buffer+6); // SSC from msorta this_pcode.first=(int)main_buffer[1]-65; // first part of postcode this_pcode.second=-1; // to trap non alphanumeric this_pcode.third=-1; this_pcode.fourth=-1; t=(char)main_buffer[2]; // second if(t>='A' && t<='Z') this_pcode.second=(int)t-55; if(t>='0' && t<='9') this_pcode.second=(int)t-48; t=(char)main_buffer[3]; // third if(t==' ') this_pcode.third=36; if(t>='A' && t<='Z') this_pcode.third=(int)t-55; if(t>='0' && t<='9') this_pcode.third=(int)t-48; t=(char)main_buffer[4]; // fourth if(t==' ') this_pcode.fourth=36; if(t>='A' && t<='Z') this_pcode.fourth=(int)t-55; if(t>='0' && t<='9') this_pcode.fourth=(int)t-48; this_pcode.f_o_r=(int)main_buffer[5]-48; // first number in right half of postcode if(this_pcode.first>25 || this_pcode.first <0 || this_pcode.second>35 || this_pcode.second <0 || this_pcode.third>36 || this_pcode.third <0 || this_pcode.fourth>36 || this_pcode.fourth <0 || this_pcode.f_o_r>9 || this_pcode.f_o_r <0) err(BADPCODE); // validate this_element=pcode_table[this_pcode.first][this_pcode.second][this_pcode.third][this_pcode.fourth]; // ptr to array of SSC if(this_element==NULL) // then malloc array of *SSC file data { if((this_element=malloc(sizeof(struct f_o_r_list)))==NULL) err(MEMORYFULL); for(i=0;i<10;i++) // NULLify array this_element->SSCdata_ptr[i]=NULL; } current_ptr=SSCptr_list_first; // search for SSCleaf of SSC from file while(current_ptr!=NULL) { if(current_ptr->SSC==this_SSC) break; current_ptr=current_ptr->next_rec; } if(current_ptr==NULL) err(CANTFINDSSC); // end of list and cant find SSC this_element->SSCdata_ptr[this_pcode.f_o_r]=current_ptr->SSCptr; // point array element t SSC file struct pcode_table[this_pcode.first][this_pcode.second][this_pcode.third][this_pcode.fourth]=this_element; // register in table } fclose(msorta_file); // done } /***************************** * read_msortc * *****************************/ void read_msortc() { FILE *msortc_file; char *msortc_ptr; struct town_list *this_town; int i; if((msortc_file=fopen(BOBDIR_MSORTC,"r"))==NULL) err(CANTOPENMSORTC); //dumpfile=fopen(DUMPFILE,"w"); for(;;) { msortc_ptr=fgets(main_buffer,sizeof(main_buffer),msortc_file); // read line and check for eof (CR at eof) if(msortc_ptr==NULL && !feof(msortc_file)) err(CANTREADMSORTC); else if(msortc_ptr==NULL) break; if((this_town=malloc(sizeof(struct town_list)))==NULL) err(MEMORYFULL); this_town->SSC=atoi(main_buffer+11)*100; for(i=0;i<10;i++) // copy town to list this_town->town[i]=main_buffer[i+1]; this_town->town[10]=' '; for(i=10;i>0;i--) // find terminating space if(this_town->town[i]!=' ') break; this_town->town[i+1]=0; // and replace with \0 //fprintf(dumpfile,"%sX\n",this_town->town); this_town->next_rec=first_town; // link list first_town=this_town; } fclose(msortc_file); //fclose(dumpfile); //this_town=first_town; //for(i=0;i<5;i++) //{ //printf("%sX%d\n",this_town->town,this_town->SSC); //this_town=this_town->next_rec; //} //exit(0); } /***************************** * mailsort * *****************************/ /***************************** * sort_file_list * *****************************/ void sort_file_list() { boolean swapped; struct open_files_list *this, *p1, *p2, *p3, *p4; if(files_list_first!=NULL) { if(files_list_first->next_rec!=NULL) { if(files_list_first->next_rec->next_rec==NULL) // only two items in list { if(files_list_first->SSCptr->SSC>files_list_first->next_rec->SSCptr->SSC) // *** { p1=files_list_first->next_rec; p1->next_rec=files_list_first; files_list_first->next_rec=NULL; files_list_first=p1; } } else if(files_list_first->next_rec->next_rec->next_rec==NULL) // only three items in list { do { swapped=beginning(); this=files_list_first->next_rec; swapped|=ending(this); } while(swapped==TRUE); } else // lots in the middle { do { swapped=beginning(); this=files_list_first; while(this->next_rec->next_rec->next_rec!=NULL) { swapped|=middle(this); this=this->next_rec; } swapped|=ending(this); } while(swapped==TRUE); } } } } boolean beginning(void) { boolean swapped; struct open_files_list *p1,*p2; swapped=FALSE; if(files_list_first->SSCptr->SSC>files_list_first->next_rec->SSCptr->SSC) // *** { p1=files_list_first->next_rec; p2=files_list_first->next_rec->next_rec; p1->next_rec=files_list_first; files_list_first->next_rec=p2; files_list_first=p1; swapped=TRUE; } return(swapped); } boolean middle(struct open_files_list *this) { boolean swapped; struct open_files_list *p1,*p2,*p3; swapped=FALSE; if(this->next_rec->SSCptr->SSC>this->next_rec->next_rec->SSCptr->SSC) // *** { p1=this->next_rec; p2=this->next_rec->next_rec; p3=this->next_rec->next_rec->next_rec; this->next_rec=p2; p2->next_rec=p1; p1->next_rec=p3; swapped=TRUE; } return(swapped); } boolean ending(struct open_files_list *this) { boolean swapped; struct open_files_list *p1,*p2; swapped=FALSE; if(this->next_rec->SSCptr->SSC>this->next_rec->next_rec->SSCptr->SSC) // *** { p1=this->next_rec; p2=this->next_rec->next_rec; this->next_rec=p2; p2->next_rec=p1; p1->next_rec=NULL; swapped=TRUE; } return(swapped); } /***************************** * isnum * *****************************/ aornum isnum(char candidate) { if(candidate>='0' && candidate<='9') return(NUM); if(candidate>='A' && candidate<='Z') return(ALPH); if(candidate==' ') return(SPC); return(OTHER); } /***************************** * get_pcode * *****************************/ /***************************** * get_towns * *****************************/ /***************************** * town_scan * *****************************/ /***************************** * to_upper * *****************************/ void to_upper(char *string) { int i,length; char letter; length=strlen(string); if(length>BUFFSIZE) err(TOWNTOOLONG); for(i=0;i<length;i++) { letter=string[i]; if(letter>='a' && letter<='z') string[i]=letter&223; } } /***************************** * get_no_of_fields * *****************************/ int get_no_of_fields() // returns no of fields in first line of CSV { int i,fields=0; for(i=0;main_buffer[i]!='\0';i++) if(main_buffer[i]==',') fields++; return(fields); } /***************************** * get_postcode_field * *****************************/ int get_postcode_field() // returns field in 1st line cont POSTCODE (from 0) { int field=0; char *postcode_start,*i; postcode_start=strstr(main_buffer,"POSTCODE"); if(postcode_start==NULL) err(CANTFINDPOSTCODEFIELD); for(i=main_buffer;i<postcode_start;i++) { if(*i==',') field++; // " in first line of file? *** } return(field); } /***************************** * label_directs * *****************************/ void label_directs() // sets direct flag in open files list { struct open_files_list *this_rec; this_rec=files_list_first; while(this_rec!=NULL) { if(this_rec->SSCptr->tally>=DIRECTLIMIT && this_rec->SSCptr->SSC%100!=0) this_rec->direct_or_done=TRUE; else this_rec->direct_or_done=FALSE; this_rec=this_rec->next_rec; } } /***************************** * output_directs * *****************************/ /***************************** * output_residues * *****************************/ /***************************** * postcode_field_write * *****************************/ void postcode_field_write(char *SSC_str) // creates id label in main_buffer { int i,j,diff; for(i=0;i<postcode_field;i++) main_buffer[i]=','; // *** "xxx"? main_buffer[i]='\0'; strcat(main_buffer,SSC_str); i=strlen(main_buffer); diff=no_of_fields-postcode_field; for(j=0;j<diff;i++,j++) main_buffer[i]=','; main_buffer[i++]='\n'; main_buffer[i]='\0'; } /***************************** * append_tmp_file * not used? *****************************/ /***************************** * delete_tmp * *****************************/ void delete_tmp() { struct open_files_list *current_file; current_file=files_list_first; while(current_file!=NULL) { remove(current_file->SSCptr->SSCfilename); current_file=current_file->next_rec; } remove("<gSortTemp$Path>tmp"); } /***************************** * reverse_labels * *****************************/ void reverse_labels() { struct label_list *this_label; label_list_first->prev_rec=NULL; this_label=label_list_first; while(this_label->next_rec!=NULL) { this_label->next_rec->prev_rec=this_label; this_label=this_label->next_rec; } label_list_last=this_label; } /***************************** * output_linelisting * *****************************/ /***************************** * output_report * *****************************/ /***************************** * err * *****************************/ void err(errors errno) { //fprintf(stderr,"%s\n",main_buffer); // *** switch(errno) { case CANTOPENSSC: _kernel_oscli("set gSort$Error Can't open SSClist"); break; case MEMORYFULL: _kernel_oscli("set gSort$Error Insufficient memory"); break; case CANTREADSSCFILE: _kernel_oscli("set gSort$Error Can't read from SSC file"); break; case CANTFINDSSC: _kernel_oscli("set gSort$Error Can't find SSC in linked list"); break; case CANTOPENCSV: _kernel_oscli("set gSort$Error Can't open labels.csv file"); break; case CANTREADCSVFILE: _kernel_oscli("set gSort$Error Can't read addresses file"); break; case CANTFINDPOSTCODEFIELD: _kernel_oscli("set gSort$Error Can't identify POSTCODE field in first line of addresses file"); break; case CANTOPENWRACK: _kernel_oscli("set gSort$Error Can't open wrack file"); break; case CANTWRITEWRACK: _kernel_oscli("set gSort$Error Can't write to wrack file"); break; case CANTOPENCSV2: _kernel_oscli("set gSort$Error Can't open CSV file in concat"); break; case CANTREADCSVFILE2: _kernel_oscli("set gSort$Error Can't read CSV file in concat"); break; case CANTOPENOUTFILE: _kernel_oscli("set gSort$Error Can't open outfile in concat"); break; case CANTOPENTMP: _kernel_oscli("set gSort$Error Can't open SSCfile for concat"); break; case CANTREADAPPFILE: _kernel_oscli("set gSort$Error Can't open SSCfile for appending"); break; case CANTOPENTMPFILE2: _kernel_oscli("set gSort$Error Can't open tmp file in residues"); break; case CANTOPENTMP2: _kernel_oscli("set gSort$Error Can't open SSC (R) file in residues"); break; case CANTOPENTMPFILE3: _kernel_oscli("set gSort$Error Can't open tmp file for reading in residues"); break; case CANTOPENOUTFILE2: _kernel_oscli("set gSort$Error Can't open main outfile file in residues"); break; case CANTOPENMSORTA: _kernel_oscli("set gSort$Error Can't open MSORTA"); break; case CANTREADMSORTA: _kernel_oscli("set gSort$Error Can't read MSORTA"); break; case CANTOPENMSORTC: _kernel_oscli("set gSort$Error Can't open MSORTC"); break; case CANTREADMSORTC: _kernel_oscli("set gSort$Error Can't read MSORTC"); break; case BADPCODE: _kernel_oscli("set gSort$Error Illegal postcode in MSORTA"); break; case CANTOPENSTATS: _kernel_oscli("set gSort$Error Can't open stats file"); break; case TOWNTOOLONG: _kernel_oscli("set gSort$Error to_upper() failed - string too long"); break; case CANTREADMSORTB: _kernel_oscli("set gSort$Error Can't read from MSORTB file"); break; case CANTOPENMSORTB: _kernel_oscli("set gSort$Error Can't open MSORTB file"); break; case SSCNOTINMSORTB: _kernel_oscli("set gSort$Error Can't find SSC in MSORTB file"); break; case CANTOPENLLIST: _kernel_oscli("set gSort$Error Can't open labellist file for output"); break; case CANTOPENLNLIST: _kernel_oscli("set gSort$Error Can't open linelist file for output"); break; case CANTOPENREPORT: _kernel_oscli("set gSort$Error Can't open report file for output"); break; } delete_tmp(); exit(errno); } /********************************************************** diagnostics ***********************************************************/ void display_array() { int a,b,c,d; FILE *dump; dump=fopen(DUMPFILE,"w"); for(a=0;a<26;a++) { for(b=0;b<36;b++) { for(c=0;c<37;c++) { for(d=0;d<37;d++) { fprintf(dump,"%p\n",pcode_table[a][b][c][d]); } } } } fclose(dump); }