Archive for category Uncategorized
URL encode / decode in JavaScript
Posted by jens in Uncategorized on February 26th, 2010
Decoding and Encoding URLs in JavaScript should be a pretty easy thing to do especially since all browsers still have the functionality built-in. Interestingly no browser allows the JavaScript runtime to use this feature. So I had to write it for myself.
The code I came up with is far from perfect but it worked for me. To decode an URL use url_decode(url) and to reverse it just call the utf16to8 function. The rest does your browser for you.
function url_decode(str){
var hex = /^[0-9a-fA-F]{2}/;
var out='';
var arr = str.split('%');
if(arr.length<2) return str;
for(var i=0;i<arr.length;i++)
{
/*look for hex values */
if(hex.exec(arr[i])) {
out += String.fromCharCode(parseInt(arr[i].substring(0,2),16))+arr[i].substring(2,arr[i].length);
} else { if(i==0) out+=arr[i]; else out+='%'+arr[i];
}
}
return utf8to16(out);
}
function utf16to8(str) {
var out, i, len, c;
out = "";
len = str.length;
for(i = 0; i < len; i++) {
c = str.charCodeAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
out += str.charAt(i);
} else if (c > 0x07FF) {
out += String.fromCharCode(0xE0 | ((c >> 12) & 0x0F));
out += String.fromCharCode(0x80 | ((c >> 6) & 0x3F));
out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
} else {
out += String.fromCharCode(0xC0 | ((c >> 6) & 0x1F));
out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
}
}
return out;
}
function utf8to16(str) {
var out, i, len, c;
var char2, char3;
out = "";
len = str.length;
i = 0;
while(i < len) {
c = str.charCodeAt(i++);
switch(c >> 4)
{
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
out += str.charAt(i-1);
break;
case 12: case 13:
// 110x xxxx 10xx xxxx
char2 = str.charCodeAt(i++);
out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
char2 = str.charCodeAt(i++);
char3 = str.charCodeAt(i++);
out += String.fromCharCode(((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0));
break;
}
}
return out;
}
tracking virtual links with google analytics
Posted by jens in Uncategorized on January 4th, 2010
Tracking dynamic sites is sometimes a bit tricky. Typically tracking systems are specialized in tracking page views. More sophisticated system have there own way of tracking custom event (like shown here).
Unfortunately I needed to track clicks on a HTML canvas. To make these clicks visible to a tracking system, I wanted to transform each click to virtual URL. That way I could use Google analytics not only for tracking but also for popularity statistics of certain content.
The script for doing so is actually pretty simple.
function trace(url){
var tracker = _gat._getTracker("UA-XXXXXXX-X");
tracker._trackPageview(url);
}
Now every time I need to track something I call this function with a custom build URL.
searching for hash strings in postgres
Posted by jens in Uncategorized on September 23rd, 2009
For one of my projects a have a database which has a rather large table consisting of just an url and a corresponding id. For performance reasons I added a md5 column which hashes the url. With this column it should be a lot faster to look up an url.
CREATE TABLE pages ( id bigint NOT NULL, url character varying(255), md5 character(32), CONSTRAINT pages_pkey PRIMARY KEY (id) )
The faster lookup should mainly be possible through the shorter column length (and therefore smaller index). Actually I don’t know if the fixed width is good or bad here, but hashes usually don’t vary in length. After creating this table I added a B-Tree unique Index to the md5 column to enable a fast lookup.
After a while a noticed a rather high CPU load on lookups for this table so I tried to analyze the problem. First I tried the obvious through psql.
cloud=# explain analyze select * from pages where md5 ='abc';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Index Scan using i_pages_md5 on pages (cost=0.00..8.50 rows=1 width=166) (actual time=0.046..0.046 rows=0 loops=1)
Index Cond: (md5 = 'abc'::bpchar)
Total runtime: 0.157 ms
(3 rows)
As the explain shows all works perfectly fine and lookups shouldn’t be a problem. So there had to be something different what was going on.
After that I tried the same select with an actual md5.
cloud=# explain analyze select * from pages where md5 = md5('abc');
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Seq Scan on pages (cost=0.00..32017.63 rows=3994 width=166) (actual time=1203.699..1203.699 rows=0 loops=1)
Filter: ((md5)::text = '900150983cd24fb0d6963f7d28e17f72'::text)
Total runtime: 1203.769 ms
(3 rows)
Now you can see the plan does change quite a lot. I have a full table scan instead of an index scan. You can also see that the query time increases nearly by factor ten thousand.
The reason for this dramatic change is a simple type mismatch. For whatever reason the md5 function will be evaluated to a string of the type text. To create a match with the column md5 all values had to be casted to that type. The side effect of this is that the index can no longer be used, because it is of the wrong type.
To solve this I just had to cast the result of the md5 function back to something that is compatible with the index type. In my case I used a fixed width character field which is represented in the database as bpchar (blank padded character). So after modifying the query to the following I was back on index usage.
cloud=# explain analyze select * from pages where md5 = md5('abc')::bpchar;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Index Scan using i_pages_md5 on pages (cost=0.00..8.50 rows=1 width=166) (actual time=0.141..0.141 rows=0 loops=1)
Index Cond: (md5 = '900150983cd24fb0d6963f7d28e17f72'::bpchar)
Total runtime: 0.199 ms
(3 rows)