# STL Set Operations

A summary with code samples of the set operations that come with STL: union, intersection, symmetrical difference and set difference.

For consistency, the two sets of integer vectors used in each example are the same and are:

```vals1 = { 1, 2, 4 }
vals2 = { 1, 2, 5, 7 };
```

Unions

The union of two sets is formed by the elements that are present in either one of the sets, or in both.

```#include <iostream>
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>

typedef std::vector<int>::iterator iter;

template<typename T, typename InputIterator>
void Print(std::ostream& ostr,
InputIterator itbegin,
InputIterator itend,
const std::string& delimiter)
{
std::copy(itbegin,
itend,
std::ostream_iterator<T>(ostr, delimiter.c_str()));
}

int main()
{
// Initialise sample data sets and vectors
int vals1[] = { 1, 2, 4 };
int vals2[] = { 1, 2, 5, 7 };
int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] );
int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] );

std::vector<int> v1( vals1, vals1 + size1 );
std::vector<int> v2( vals2, vals2 + size2 );
std::vector<int> v( size1 + size2 );
std::vector<int>::iterator it_union;
std::vector<int>::iterator it;

// Sort the vectors (essential)
std::sort( v1.begin(), v1.end() );
std::sort( v2.begin(), v2.end() );

// Find the union of the two vector sets: elements present in either
// of the sets or both
it_union = set_union ( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin());
int union_size = int( it_union - v.begin() );
std::cout << "Union size = " << union_size << std::endl;

// Print the union
Print<int, iter>( std::cout, v.begin(), it_union, " " );

return 0;
}
```

Output:

Union size = 5
1 2 4 5 7

Intersections

Intersections consist of elements present in both sets at the same time.

```#include <iostream>
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>

typedef std::vector<int>::iterator iter;

template<typename T, typename InputIterator>
void Print(std::ostream& ostr,
InputIterator itbegin,
InputIterator itend,
const std::string& delimiter)
{
std::copy(itbegin,
itend,
std::ostream_iterator<T>(ostr, delimiter.c_str()));
}

int main()
{
// Initialise sample data sets and vectors
int vals1[] = { 1, 2, 4 };
int vals2[] = { 1, 2, 5, 7 };
int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] );
int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] );

std::vector<int> v1( vals1, vals1 + size1 );
std::vector<int> v2( vals2, vals2 + size2 );
std::vector<int> v( size1 + size2 );
std::vector<int>::iterator it_intersect;
std::vector<int>::iterator it;

// Sort the vectors (essential)
std::sort( v1.begin(), v1.end() );
std::sort( v2.begin(), v2.end() );

// Find the intersection of the two vector sets: elements present in both sets
it_intersect = set_intersection( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin());
int intersect_size = int( it_intersect - v.begin() );
std::cout << "Intersection size = " << intersect_size << std::endl;

// Print the intersection
Print<int, iter>( std::cout, v.begin(), it_intersect, " " );

return 0;
}
```

Output:

Intersection size = 2
1 2

Symmetric Differences

The set of elements that are present in one of the sets, but not in the other.

```#include <iostream>
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>

typedef std::vector<int>::iterator iter;

template<typename T, typename InputIterator>
void Print(std::ostream& ostr,
InputIterator itbegin,
InputIterator itend,
const std::string& delimiter)
{
std::copy(itbegin,
itend,
std::ostream_iterator<T>(ostr, delimiter.c_str()));
}

int main()
{
// Initialise sample data sets and vectors
int vals1[] = { 1, 2, 4 };
int vals2[] = { 1, 2, 5, 7 };
int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] );
int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] );

std::vector<int> v1( vals1, vals1 + size1 );
std::vector<int> v2( vals2, vals2 + size2 );
std::vector<int> v( size1 + size2 );
std::vector<int>::iterator it_symm_diff;
std::vector<int>::iterator it;

// Sort the vectors (essential)
std::sort( v1.begin(), v1.end() );
std::sort( v2.begin(), v2.end() );

// The symmetric difference of two sets is formed by the elements that are present in one
// of the sets, but not in the other.
it_symm_diff = set_symmetric_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin());
int symm_diff_size = int( it_symm_diff - v.begin() );
std::cout << "Symmetric difference size = " << symm_diff_size << std::endl;

// Print the symmetric difference
Print<int, iter>( std::cout, v.begin(), it_symm_diff, " " );

return 0;
}
```

Output:

Symmetric difference size = 3
4 5 7

Set Differences

The elements that present in the first set, but not in the second one.

```#include <iostream>
#include <algorithm>
#include <iterator>
#include <string>
#include <vector>

typedef std::vector<int>::iterator iter;

template<typename T, typename InputIterator>
void Print(std::ostream& ostr,
InputIterator itbegin,
InputIterator itend,
const std::string& delimiter)
{
std::copy(itbegin,
itend,
std::ostream_iterator<T>(ostr, delimiter.c_str()));
}

int main()
{
// Initialise sample data sets and vectors
int vals1[] = { 1, 2, 4 };
int vals2[] = { 1, 2, 5, 7 };
int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] );
int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] );

std::vector<int> v1( vals1, vals1 + size1 );
std::vector<int> v2( vals2, vals2 + size2 );
std::vector<int> v( size1 + size2 );
std::vector<int>::iterator it_set_diff;
std::vector<int>::iterator it;

// Sort the vectors (essential)
std::sort( v1.begin(), v1.end() );
std::sort( v2.begin(), v2.end() );

// The set difference of two sets is formed by the elements that are present in the first
// sets, but not in the second.
it_set_diff = set_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin());
int set_diff_size = int( it_set_diff - v.begin() );
std::cout << "Set difference size = " << set_diff_size << std::endl;

// Print the set difference
Print<int, iter>( std::cout, v.begin(), it_set_diff, " " );

return 0;
}
```

Output:

Set difference size = 1
4

Practical example: using union / set intersection to calculate the Jaccard Index

The Jaccard index is a means of measuring the similarity and/or diversity of sample sets:

```#include <iostream>
#include <vector>
#include <algorithm>
#include <iterator>
#include <string>

class Token
{
private:
std::string str;

public:
Token() {}
Token( std::string val ) : str( val ) {}
std::string Word() const
{ return str; }
};

struct Ascending
{
bool operator() ( Token& start, Token& end )
{
return start.Word() < end.Word();
}
};

int main()
{
std::string str1[] = { "The", "crazy", "cat", "sat", "on",  "the", "mat" };
std::string str2[] = { "The", "cat",   "sat", "on",  "the", "red", "mat" };

std::vector<Token>::iterator tok_intersect, tok_union;

int size1 = sizeof( str1 ) / sizeof( str1[ 0 ] );
int size2 = sizeof( str2 ) / sizeof( str2[ 0 ] );
std::vector<Token> tokens1( str1, str1 + size1 );
std::vector<Token> tokens2( str2, str2 + size2 );

std::vector<Token> tokens( size1 + size2 );

std::sort( tokens1.begin(), tokens1.end(), Ascending() );
std::sort( tokens2.begin(), tokens2.end(), Ascending() );

tok_intersect = std::set_intersection( tokens1.begin(),
tokens1.end(),
tokens2.begin(),
tokens2.end(),
tokens.begin(),
Ascending() );

int intersect_size = int( tok_intersect - tokens.begin() );

tok_union = std::set_union ( tokens1.begin(),
tokens1.end(),
tokens2.begin(),
tokens2.end(),
tokens.begin(),
Ascending() );

int union_size = int( tok_union - tokens.begin() );

double JaccardIndex = (double) intersect_size / (double) union_size;

return 0;
}
```

In this example, the size of the union set is 8, since there are 8 words in either one of the sets, or in both: “The”, “crazy”, “cat”, “sat”, “on”, “the”, “red”, “mat”. The intersection size is 6 since there are 6 words present in both sets at the same time: “The”, “cat”, “sat”, “on”, “the”, “mat”. Thus the Jaccard Index is calculated as 6 / 8 = 0.75.