Search This Blog

Tuesday, August 3, 2010

sqlite3 and the World Ocean Database, continued

I got things sped up quite a bit by directly querying the database file I had created (see my last post) and by returning the query results as an array of watercolumn structures instead of writing them to disk and then reading them in again in my svp_tool.

Here's the bit of code that deals with building the query:

int wod_extract_profiles (WOD_Reader * wod_reader)
{
    int rc;
    char *zErrMsg = 0;

    int i;
    int offset;

    char month_list[50];
    char instrument_list[50];
    char sql_string[500];

    memset(month_list,0,50);
    memset(instrument_list,0,50);
    memset(sql_string,0,500);

    // There must be easier ways to do this than the way I'm doing it
    month_list[0]='(';
    offset = 1;
    for (i=1;i<=12;i++)
    {
        if (!wod_reader->query_bounds.month[i]) continue;
        sprintf(&month_list[offset],"%d,",i);
        offset = strlen(month_list);
    }
    month_list[offset-1]=')';
    // There must be easier ways to do this than the way I'm doing it
    instrument_list[0]='(';
    offset = 1;
    for (i=1;i<=WOD_NUM_TYPES;i++)
    {
        if (!wod_reader->query_bounds.instrument[i]) continue;
        sprintf(&instrument_list[offset],"%d,",i);
        offset = strlen(instrument_list);
    }
    instrument_list[offset-1]=')';

    // Check some of the query bounds before we start
    if (wod_reader->query_bounds.longitude_west > 180) wod_reader->query_bounds.longitude_west -= 360;
    if (wod_reader->query_bounds.longitude_east > 180) wod_reader->query_bounds.longitude_east -= 360;

    // Form the query: select records based on geo/time bounds and then sort in such a way that
    // the hits return in WOD filename order to minimize jumping around between files
    sprintf(sql_string,"SELECT * FROM tbl1 WHERE latitude > %.3f AND latitude < %.3f AND longitude > %.3f AND longitude < %.3f AND month in %s AND instrument in %s ORDER BY instrument, observed, quadrant, wmo_lat, wmo_lon, year, month, day;",
        wod_reader->query_bounds.latitude_south, wod_reader->query_bounds.latitude_north,
        wod_reader->query_bounds.longitude_west, wod_reader->query_bounds.longitude_east,month_list,instrument_list);

    if (wod_reader->debug) printf("Doing SQL string %s\n",sql_string);

    printf("Doing WOD query...patience\n");

    // Do the query and deal with the results
    rc = sqlite3_exec(wod_reader->db, sql_string, wod_query_callback, wod_reader, &zErrMsg);
    if (rc) error("SQL error: %s",zErrMsg);

    return(0);
}


The callback function (wod_query_callback) in the sqlite3_exec call deals with the query results one at a time.  Here'swhat's going on in there:

static int wod_query_callback(void * reader_in, int argc, char **argv, char **azColName)
{
    int i;
    int iend;

    int got_new_file;

    WOD_Reader *wod_reader;

    char * inname = NULL;

    int file_offset, num_bytes;

    int instrument, observed, quadrant, wmo_lat, wmo_lon;

    char type_lowercase[4];
    char type_uppercase[4];

    memset(type_uppercase,0,4);
    memset(type_lowercase,0,4);

    wod_reader = (WOD_Reader *) reader_in;

    got_new_file = 0;

    // Parse the output record from the query
    for(i=0; i
    {
        if (wod_reader->debug) printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");

        if (!strcmp(azColName[i],"instrument"))
        {
            instrument = atoi(argv[i]);
            if (instrument != wod_reader->instrument) got_new_file++;
        }
        else if (!strcmp(azColName[i],"observed"))
        {
            observed = atoi(argv[i]);
            if (observed != wod_reader->observed) got_new_file++;
        }
        else if (!strcmp(azColName[i],"quadrant"))
        {
            quadrant = atoi(argv[i]);
            if (quadrant != wod_reader->quadrant) got_new_file++;
        }
        else if (!strcmp(azColName[i],"wmo_lat"))
        {
            wmo_lat = atoi(argv[i]);
            if (wmo_lat != wod_reader->wmo_lat) got_new_file++;
        }
        else if (!strcmp(azColName[i],"wmo_lon"))
        {
            wmo_lon = atoi(argv[i]);
            if (wmo_lon != wod_reader->wmo_lon) got_new_file++;
        }
        else if (!strcmp(azColName[i],"file_offset"))
        {
            file_offset = atoi(argv[i]);
        }
        else if (!strcmp(azColName[i],"num_bytes"))
        {
            num_bytes = atoi(argv[i]);
        }
    }

    if (got_new_file)
    {
        if (wod_reader->debug) printf("Need to load a new WOD file!!\n");

        // Got a new file, so close the old one...
        wod_close_file(wod_reader);

        memcpy(type_uppercase,wod_data_types[instrument],3);
        type_lowercase[0] = tolower(type_uppercase[0]);
        type_lowercase[1] = tolower(type_uppercase[1]);
        type_lowercase[2] = tolower(type_uppercase[2]);

        // Build up the filename
        inname = calloc(strlen(wod_reader->wod_path)+50,1);
        memset(inname,0,strlen(wod_reader->wod_path)+50);

        // this the type of path that we're trying to construct: ctd/OBS/CTDO7807.gz
        sprintf(inname,"%s/%s/OBS/%sO%d%d%0.2d.gz",
        wod_reader->wod_path,type_lowercase,type_uppercase,quadrant,wmo_lat,wmo_lon);

        if (wod_open_file(wod_reader,inname))
        {
            sprintf(inname,"%s/%s/OBS/%sO%d%d%0.2d",wod_reader->wod_path,type_lowercase,type_uppercase,
                quadrant,wmo_lat,wmo_lon);
            if (wod_open_file(wod_reader,inname))
            {
                printf("Can't open file %s ... non-existant??\n",inname);
                free(inname);
                inname = NULL;
                return(1);
            }
        }

        if (inname) free(inname);
        inname = NULL;
    }

    if (wod_reader->debug) printf("%s: fseeking to %d for %d bytes\n",wod_reader->fname, file_offset,num_bytes);

    // Seek to the right spot in the file
    fseek(wod_reader->fp,file_offset,SEEK_SET);

    // Now read the profile at that location
    if ((iend = oclread (wod_reader)) == -1)
    {
        wod_close_file(wod_reader);
        return(1);
    }

    // Convert the cast to OMG SVP format, this updates the extracted_watercolumns array
    wod2svp(wod_reader);

    return 0;
}



Now I've just got to build a query building utility in the graphical end that lets the user select instrument, month and/or other fields.  Geographic bounds are currently selected by lasso in the map viewer and the month defaults to the current month.

I'm also planning on speeding up the oclread function (was delivered with the free NODC code that came with the WOD), right now it's reading things a single field at a time and I think it can be sped up a bit by reading the entire cast into memory and then parsing it from there.

jb

No comments:

Post a Comment