diff --git a/misc/import_memberdb.js b/misc/import_memberdb.js index a8e5aa90230cb230d603432ea4c639a4b06e173e..33de5d96b2057adc19b1011de4e70b171be5d602 100755 --- a/misc/import_memberdb.js +++ b/misc/import_memberdb.js @@ -3,30 +3,58 @@ /* script to import users from memberdb format (MySQL) to mongoose for testing */ const pg = require('pg'); -var mongoose = require('mongoose'); -var conf = require('./pg.json'); -var Member = require('../models/memberSchema'); +const mongoose = require('mongoose'); +const conf = require('./pg.json'); +const Member = require('../models/memberSchema'); +const TLA = require('../models/tlaSchema'); +const Fuse = require('fuse.js'); +const tlaParse = require('./tlaParser'); -/* specify stuff via environment variables +/* DEPRECATED: specify stuff via environment variables PGUSER=uccmemberdb PGHOST=localhost PGPASSWORD=[redacted] PGDATABASE=uccmemberdb_2018 PGPORT=5432 node misc/import_memberdb.js */ + const pgclient = new pg.Client(conf); -pgclient.connect(); +console.log("Connecting to memberdb..."); -console.log("waiting for pgclient to connect"); +pgclient.connect(function (err) { + if (err) { + console.warn(err); + console.warn("could not connect to memberdb!"); + process.exit(1); + } else { + console.log("memberdb connected OK"); + } +}); +console.log("Connecting to mongodb..."); mongoose.connect('mongodb://localhost/uccportal-dev'); var db = mongoose.connection; db.on('error', console.error.bind(console, 'db connection error:')); db.once('open', function() { - console.log("Connected to db"); + console.log("Connected to mongodb."); + TLA.remove({}, function (err) { + if (err) { + console.warn(err); + } + TLA.insertMany(tlas, function (err) { + if (err) { + console.warn(err); + console.warn("could not update TLAs in database"); + } + }); + }); Member.remove({}, function (err) { - console.log(err); + if (err) { + console.log(err); + process.exit(1); + } pgclient.query('SELECT * FROM memberdb_member', processOldMembers); }); }); -console.log("waiting for mongoose to open"); +const tlafile = process.argv[2]; +var tlas = tlaParse(tlafile); /** old memberdb schema from postgresql: CREATE TABLE memberdb_member ( @@ -40,9 +68,7 @@ CREATE TABLE memberdb_member ( student_no character varying(20) NOT NULL, date_of_birth date, signed_up date DEFAULT '2018-02-23'::date NOT NULL -); -*/ - +); */ function processOldMembers(err, res) { var newMembers = []; if (err) { @@ -51,20 +77,75 @@ function processOldMembers(err, res) { console.log("Dumping " + res.rows.length + " rows..."); for (var i = 0; i < res.rows.length; i++) { if (res.rows[i].username) { - console.log("user: " + res.rows[i].username); + // console.log("user: " + res.rows[i].username); } else { - console.log("new user: " + res.rows[i].real_name); + // console.log("new user: " + res.rows[i].real_name); } newMembers.push(convertMember(res.rows[i])); } Member.insertMany(newMembers, function (err, docs) { - console.log("Done."); + console.log("Done, found " + newMembers.length + " members."); pgclient.end(); - Member.find().exec(verifyMongoMembers); + Member.find({}).exec(verifyMongoMembers); + TLA.find({}).exec(function (err, res) { + if (err) { + console.warn(err); + } else { + console.log(res.length + " TLAs imported into DB."); + } + }); }); } }; +function matchTLAs(members) { + // clone original TLA array so we can remove TLAs we find and end up with a list of unclaimed TLAs. + var tlasUnused = tlas.slice(0); + + // Fuse doesn't like to search across multiple fields so we have to combine firstname and lastname again to make it work. + var fixedTLAs = []; + for (var i = 0; i < tlas.length; i++) { + fixedTLAs.push({ + name: tlas[i].firstname + " " + tlas[i].lastname, + tla: tlas[i].tla, + }); + } + + var options = { + shouldSort: true, + threshold: 0.3, + location: 0, + distance: 100, + maxPatternLength: 50, + minMatchCharLength: 1, + keys: [ + "name" + ] + }; + var fuse = new Fuse(fixedTLAs, options); // "list" is the item array + + // now loop through members and find their TLAs + for (var mi = 0; mi < members.length; mi++) { + var m = members[mi]; + var results = fuse.search(m.firstname + " " + m.lastname); + // console.log("searching for tlas for " + m.firstname + " " + m.lastname); + if (results.length > 0) { + console.log(m.firstname + " has " + results.length + " tlas, using [" + results[0].tla + "]"); + // Some false positives: only use the first TLA. + m.tlas = [results[0].tla]; + var usedId = tlasUnused.indexOf(results[0].tla); + tlasUnused.splice(usedId, usedId + 1); + m.save(); + } + if (results.length == 0) { + // console.log(m.firstname + " has no TLA."); + } + } + + console.log(tlas.length - tlasUnused.length + " TLAs assigned to members, total " + tlas.length + ", unclaimed " + (tlasUnused.length)); + return members; +} + function convertMember(row) { var renewtype, is_student; switch (row.membership_type) { @@ -82,7 +163,7 @@ function convertMember(row) { break; } - return { + var m = { firstname: row.real_name, lastname: "", is_student: is_student, @@ -92,13 +173,15 @@ function convertMember(row) { phone: row.phone_number, birthdate: row.date_of_birth, signupdate: row.signed_up, + username: row.username, renewals: [ { renewtype: renewtype, date: row.signed_up } ], - tlas: [ "???" ] + tlas: [] }; + return m; } function verifyMongoMembers(err, res) { - console.log(res[0]); console.log("Currently " + res.length + " members in mongodb."); + matchTLAs(res); db.close(); } \ No newline at end of file diff --git a/misc/tlaParser.js b/misc/tlaParser.js new file mode 100644 index 0000000000000000000000000000000000000000..e923f00589c654b584e8fcdf51528949b4cc317a --- /dev/null +++ b/misc/tlaParser.js @@ -0,0 +1,54 @@ +const fs = require('fs'); + +module.exports = function parseTLAs(tlafile) { + console.log("Loading TLAs from '" + tlafile + "'..."); + var tlas = []; + try { + var tlalines = fs.readFileSync(tlafile, "utf8").split("\n"); + for (var i = 0; i < tlalines.length; i++) { + /* comments start with 3 spaces, don't ask me why... */ + if (tlalines[i].startsWith(" ", 0) || tlalines[i].length < 3) { + continue; + } + var s = tlalines[i].split(/ _:_ /, 2); + + var n; + if (s[1].includes("\b")) { + // split name by backspace character and fix nastiness + n = s[1].replace(/.[\b]/, "|").trim().replace(/^\|/, "").trim().split("|", 2); + + if (n[0] == "" || n[1] == "" || n.length < 2) { + // backspace character is at start or end, try splitting by space + n = s[1].split(" "); // try splitting by space + } + } else { // No backspaces, split by space. + n = s[1].split(" "); + } + + // TLAs are stored in a terrible format, so I might have to guess that people typically only have one first name. + if (n.length > 2) { + n = [n[0], n.slice(1).join(" ") ]; + } else if (n.length != 2) { + // last resort, pretend we only got first name (probably for something like [TLA] ???????) + n = [n[0], "??????????"]; + } + + var tla = { + firstname: n[0].trim(), + lastname: n[1], + tla: s[0] + }; + tlas.push(tla); + console.log(tla.firstname + " / " + tla.lastname + " / " + tla.tla); + // process.stdout.write("[" + s[0] + "] " + ((tlas.length % 16 == 0) ? "\n" : "")); + } + // process.stdout.write("\n"); + } catch (err) { + console.warn("Error parsing TLA data!"); + console.warn(err); + process.exit(1); + } + + console.log('Finished parsing TLAs, got ' + tlas.length + ' different TLAs.'); + return tlas; +} \ No newline at end of file diff --git a/models/memberSchema.js b/models/memberSchema.js index 57c82ac043a8390ee3e8a5c723e67ee3f47502ce..e00c595972894d57020c34671f02b29e6ad5c6de 100644 --- a/models/memberSchema.js +++ b/models/memberSchema.js @@ -1,5 +1,5 @@ 'use strict'; -var mongoose = require('mongoose'); +const mongoose = require('mongoose'); // Here we define the schema used by the model for gumby documents var memberSchema = mongoose.Schema({ diff --git a/models/tlaSchema.js b/models/tlaSchema.js new file mode 100644 index 0000000000000000000000000000000000000000..8770569d4771fbf463646d04dd0f4c21c347cea4 --- /dev/null +++ b/models/tlaSchema.js @@ -0,0 +1,9 @@ +const mongoose = require('mongoose'); + +var tlaSchema = mongoose.Schema({ + tla: String, // Three Letter Acronym + firstname: String, // Name associated with TLA (even if a member, yes I know it's duplicated but TLAs are weird.) + lastname: String, +}); + +module.exports = mongoose.model('TLA', tlaSchema); \ No newline at end of file