Windows下运行的Deatable Model学习用的learn_cpp.docx_第1页
Windows下运行的Deatable Model学习用的learn_cpp.docx_第2页
Windows下运行的Deatable Model学习用的learn_cpp.docx_第3页
Windows下运行的Deatable Model学习用的learn_cpp.docx_第4页
Windows下运行的Deatable Model学习用的learn_cpp.docx_第5页
已阅读5页,还剩22页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

#include #include #include #include #include #include #include #include /#include stdafx.husing namespace std;/* * Optimize LSVM objective function via gradient descent. * * We use an adaptive cache mechanism. After a negative example * scores beyond the margin multiple times it is removed from the * training set for a fixed number of iterations. */ Data File Format/ EXAMPLE*/ / EXAMPLE:/ long label ints/ blocks int/ dim int/ DATAblocks/ DATA:/ block label float/ block data floats/ Internal Binary Format/ len int (byte length of EXAMPLE)/ EXAMPLE / unique flag byte/ number of iterations/*#ifndef DRAND48_H #define DRAND48_H #include */ /#define m 0x100000000LL /#define a 0x5DEECE66DLL /static unsigned long long seed = 1;/#endif/#define Infinity 1.0+308#define ITER 10e6/ minimum # of iterations before termination#define MIN_ITER 5e6/ convergence threshold#define DELTA_STOP 0.9995/ number of times in a row the convergence threshold/ must be reached before stopping#define STOP_COUNT 5/ small cache parameters#define INCACHE 25#define MINWAIT (INCACHE+25)#define REGFREQ 20/ error checking#define check(e) (e ? (void)0 : (printf(%s:%u error: %sn%sn, _FILE_, _LINE_, #e, strerror(errno), exit(1)/ number of non-zero blocks in example ex#define NUM_NONZERO(ex) (int *)ex)labelsize+1)/ float pointer to data segment of example ex#define EX_DATA(ex) (float *)(ex + sizeof(int)*(labelsize+3)/ class label (+1 or -1) for the example#define LABEL(ex) (int *)ex)1)/ block label (converted to 0-based index)#define BLOCK_IDX(data) (int)data0)-1)/ set to 0 to use max-component L2 regularization/ set to 1 to use full model L2 regularization#define FULL_L2 0#define MNWZ 0x100000000 #define ANWZ 0x5DEECE66D #define CNWZ 0xB16 #define INFINITY 0xFFFFFFFFFint labelsize;int dim;static unsigned long long seed = 1;double drand48(void) seed = (ANWZ * seed + CNWZ) & 0xFFFFFFFFFFFFLL; unsigned int x = seed 16; return (double)x / (double)MNWZ); /static unsigned long long seed = 1;void srand48(unsigned int i) seed = (long long int)i) 16) | rand(); / comparison function for sorting examples int comp(const void *a, const void *b) / sort by extended label first, and whole example second. int c = memcmp(*(char *)a) + sizeof(int), *(char *)b) + sizeof(int), labelsize*sizeof(int); if (c) return c; / labels are the same int alen = *(int *)a); int blen = *(int *)b); if (alen = blen) return memcmp(*(char *)a) + sizeof(int), *(char *)b) + sizeof(int), alen); return (alen blen) ? -1 : 1);/ a collapsed example is a sequence of examplesstruct collapsed char *seq; int num;/ the two node types in an AND/OR treeenum node_type OR, AND ;/ set of collapsed examplesstruct data collapsed *x; int num; int numblocks; int numcomponents; int *blocksizes; int *componentsizes; int *componentblocks; float *regmult; float *learnmult;/ seed the random number generator with an arbitrary (fixed) valuevoid seed_rand() srand48(3);/srand(3);static inline double min(double x, double y) return (x = y ? x : y); static inline double max(double x, double y) return (x = y ? y : x); / compute the score of an examplestatic inline double ex_score(const char *ex, data X, double *w) double val = 0.0; float *data = EX_DATA(ex); int blocks = NUM_NONZERO(ex); for (int j = 0; j blocks; j+) int b = BLOCK_IDX(data); data+; double blockval = 0; for (int k = 0; k X.blocksizesb; k+) blockval += wbk * datak; data += X.blocksizesb; val += blockval; return val;/ return the value of the object function./ out0 : loss on negative examples/ out1 : loss on positive examples/ out2 : regularization terms valuedouble compute_loss(double out3, double C, double J, data X, double *w) double loss = 0.0;#if FULL_L2 / compute |w|2 for (int j = 0; j X.numblocks; j+) for (int k = 0; k X.blocksizesj; k+) loss += wjk * wjk * X.regmultj; #else / compute max norm2 component for (int c = 0; c X.numcomponents; c+) double val = 0; for (int i = 0; i X.componentsizesc; i+) int b = X.componentblocksci; double blockval = 0; for (int k = 0; k loss) loss = val; #endif loss *= 0.5; / record the regularization term out2 = loss; / compute loss from the training data for (int l = 0; l = 1; l+) / which label subset to look at: -1 or 1 int subset = (l*2)-1; double subsetloss = 0.0; for (int i = 0; i X.num; i+) collapsed x = X.xi; / only consider examples in the target subset char *ptr = x.seq0; if (LABEL(ptr) != subset) continue; / compute max over latent placements int M = -1; double V = -INFINITY; /double V = -NWZ; for (int m = 0; m V) M = m; V = val; / compute loss on max ptr = x.seqM; int label = LABEL(ptr); double mult = C * (label = 1 ? J : 1); subsetloss += mult * max(0.0, 1.0-label*V); loss += subsetloss; outl = subsetloss; return loss;/ gradient descentvoid gd(double C, double J, data X, double *w, double *lb, char *logdir, char *logtag) ofstream logfile; string filepath = string(logdir) + /learnlog/ + string(logtag) + .log; /*char* filepath; strcat(filepath,logdir); strcat(filepath,/learnlog/); strcat(filepath,logtag); strcat(filepath,/log);*/ logfile.open(filepath.c_str(); /logfile.open(filepath); logfile.precision(14); logfile.setf(ios:fixed, ios:floatfield); int num = X.num; / state for random permutations int *perm = (int *)malloc(sizeof(int)*X.num); check(perm != NULL); / state for small cache int *W = (int *)malloc(sizeof(int)*num); check(W != NULL); for (int j = 0; j num; j+) Wj = INCACHE; double prev_loss = 1E9; bool converged = false; int stop_count = 0; int t = 0; while (t ITER & !converged) / pick random permutation for (int i = 0; i num; i+) permi = i; for (int swapi = 0; swapi num; swapi+) int swapj = (int)(drand48()*(num-swapi) + swapi; /int swapj = (int)(rand()*(num-swapi) + swapi; int tmp = permswapi; permswapi = permswapj; permswapj = tmp; / count number of examples in the small cache int cnum = 0; for (int i = 0; i num; i+) if (Wi = INCACHE)cnum+; int numupdated = 0; for (int swapi = 0; swapi INCACHE) Wi-;continue; collapsed x = X.xi; / learning rate double T = min(ITER/2.0, t + 10000.0); double rateX = cnum * C / T; t+; if (t % 100000 = 0) double info3; double loss = compute_loss(info, C, J, X, w); double delta = 1.0 - (fabs(prev_loss - loss) / loss); logfile t t loss t delta = DELTA_STOP & t = MIN_ITER) stop_count+; if (stop_count STOP_COUNT) converged = true; else if (stop_count 0) stop_count = 0; prev_loss = loss; printf(r%7.2f% of max # iterations (delta = %.5f; stop count = %d), 100*double(t)/double(ITER), max(delta, 0.0), STOP_COUNT - stop_count + 1);fflush(stdout); if (converged) break; / compute max over latent placements int M = -1; double V = -INFINITY; /double V = -NWZ; for (int m = 0; m V) M = m; V = val; char *ptr = x.seqM; int label = LABEL(ptr); if (label * V 1.0) numupdated+;Wi = 0;float *data = EX_DATA(ptr);int blocks = NUM_NONZERO(ptr);for (int j = 0; j 0 ? J : -1) * rateX * X.learnmultb; data+; for (int k = 0; k X.blocksizesb; k+) wbk += mult * datak; data += X.blocksizesb; else if (Wi = INCACHE) Wi = MINWAIT + (int)(drand48()*50); /Wi = MINWAIT + (int)(rand()*50);else Wi+; / periodically regularize the model if (t % REGFREQ = 0) / apply lowerbounds for (int j = 0; j X.numblocks; j+) for (int k = 0; k X.blocksizesj; k+) wjk = max(wjk, lbjk); double rateR = 1.0 / T;#if FULL_L2 / update model for (int j = 0; j X.numblocks; j+) double mult = rateR * X.regmultj * X.learnmultj; mult = pow(1-mult), REGFREQ); for (int k = 0; k X.blocksizesj; k+) wjk = mult * wjk; #else / assume simple mixture model int maxc = 0; double bestval = 0; for (int c = 0; c X.numcomponents; c+) double val = 0; for (int i = 0; i X.componentsizesc; i+) int b = X.componentblocksci; double blockval = 0; for (int k = 0; k bestval) maxc = c; bestval = val; for (int i = 0; i X.componentsizesmaxc; i+) int b = X.componentblocksmaxci; double mult = rateR * X.regmultb * X.learnmultb; mult = pow(1-mult), REGFREQ); for (int k = 0; k X.blocksizesb; k+) wbk = mult * wbk; #endif if (converged) printf(nTermination criteria reached after %d iterations.n, t); else printf(nMax iteration count reached.n, t); free(perm); free(W); logfile.close();/ score examplesdouble *score(data X, char *examples, int num, double *w) double *s = (double *)malloc(sizeof(double)*num); check(s != NULL); for (int i = 0; i num; i+) si = ex_score(examplesi, X, w); return s; / merge examples with identical labelsvoid collapse(data *X, char *examples, int num) collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num); check(x != NULL); int i = 0; x0.seq = examples; x0.num = 1; for (int j = 1; j x = x; X-num = i+1; int main(int argc, char *argv) seed_rand(); int count; data X; / command line arguments check(argc = 12); double C = atof(argv1); double J = atof(argv2); char *hdrfile = argv3; char *datfile = argv4; char *modfile = argv5; char *inffile = argv6; char *lobfile = argv7; char *cmpfile = argv8; char *objfile = argv9; char *logdir = argv10; char *logtag = argv11; / read header file FILE *f = fopen(hdrfile, rb); check(f != NULL); int header3; count = fread(header, sizeof(int), 3, f); check(count = 3); int num = header0; labelsize = header1; X.numblocks = header2; X.blocksizes = (int *)malloc(X.numblocks*sizeof(int); count = fread(X.blocksizes, sizeof(int), X.numblocks, f); check(count = X.numblocks); X.regmult = (float *)malloc(sizeof(float)*X.numblocks); check(X.regmult != NULL); count = fread(X.regmult, sizeof(float), X.numblocks, f); check(count = X.numblocks); X.learnmult = (float *)malloc(sizeof(float)*X.numblocks); check(X.learnmult != NULL); count = fread(X.learnmult, sizeof(float), X.numblocks, f); check(count = X.numblocks); check(num != 0); fclose(f); printf(%d examples with label size %d and %d blocksn, num, labelsize, X.numblocks); printf(block size, regularization multiplier, learning rate multipliern); dim = 0; for (int i = 0; i X.numblocks; i+) dim += X.blocksizesi; printf(%d, %.2f, %.2fn, X.blocksizesi, X.regmulti, X.learnmulti); / read component info file / format: #components #blocks blk1 . blk#blocks#components f = fopen(cmpfile, rb); count = fread(&X.numcomponents, sizeof(int), 1, f); check(count = 1); printf(the model has %d componentsn, X.numcomponents); X.componentblocks = (int *)malloc(X.numcomponents*sizeof(int *); X.componentsizes = (int *)malloc(X.numcomponents*sizeof(int); for (int i = 0; i X.numcomponents; i+) count = fread(&X.componentsizesi, sizeof(int), 1, f); check(count = 1); printf(component %d has %d blocks:, i, X.componentsizesi); X.componentblocksi = (int *)malloc(X.componentsizesi*sizeof(int); count = fread(X.componentblocksi, sizeof(int), X.componentsizesi, f); check(count = X.componentsizesi); for (int j = 0; j X.componentsizesi; j+) printf( %d, X.componentblocksij); printf(n); fclose(f); / read examples f = fopen(datfile, rb); check(f != NULL); printf(Reading examplesn); char *examples = (char *)malloc(num*sizeof(char *); check(examples != NULL); for (int i = 0; i num; i+) / we use an extra byte in the end of each example to mark unique / we use an extra int at the start of each example to store the / examples byte length (excluding unique flag and this int) /int buflabelsize+2;int *buf = new intlabelsize+2; count = fread(buf, sizeof(int), labelsize+2, f); check(count = labelsize+2); / byte length of an examples data segment int len = sizeof(int)*(labelsize+2) + sizeof(float)*buflabelsize+1; / memory for data, an initial integer, and a final byte examplesi = (char *)malloc(sizeof(int)+len+1); check(examplesi != NULL); / set data segments byte length (int *)examplesi)0 = len; / set the unique flag to zero examplesisizeof(int)+len = 0; / copy label data into example for (int j = 0; j labelsize+2; j+) (int *)examplesi)j+1 = bufj; / read the rest of the data segment into the example count = fread(examplesi+sizeof(int)*(labelsize+3), 1, len-sizeof(int)*(labelsize+2), f); check(count = len-sizeof(int)*(labelsize+2);delete buf; fclose(f); printf(donen); / sort printf(Sorting examplesn); char *sorted = (char *)malloc(num*sizeof(char *); check(sorted != NULL); memcpy(sorted, examples, num*sizeof(char *); qsort(sorted, num, sizeof(char *), comp); printf(donen); / find unique examples int i = 0; int len = *(int *)sorted0); sorted0sizeof(int)+len = 1; for (int j = 1; j num; j+) int alen = *(int *)sortedi); int blen = *(int *)sortedj); if (alen != blen | memcmp(sortedi + sizeof(int), sortedj + sizeof(int), alen) i+; sortedi = sortedj; sortedisizeof(int)+blen = 1; int num_unique = i+1; printf(%d unique examplesn, num_unique); / collapse examples collapse(&X, sorted, num_unique); printf(%d collapsed examplesn, X.num); / initial model double *w = (double *)malloc(sizeof(double *)*X.numblocks); check(w != NULL); f = fopen(modfile, rb); for (int i = 0; i X.numblocks; i+) wi = (double *)malloc(sizeof(double)*X.blocksizesi); check(wi != NULL); count = fread(wi, sizeof(double), X.blocksizesi, f); check(count = X.blocksizesi); fclose(f); /

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论