Redis implements the intersection, union and complement of data

2022-08-11 750 Reading

Tips: This article has exceeded six hundred and seventy-two No update in days, please note whether relevant content is still available!

Today, let's simulate a scenario where we have multiple text files locally. Each file stores a lot of 32-bit strings as the unique identifier of users. Each user stores one line. If we have very large users every day, we may need to do intersection, union, or complement for these users in our work, The simplest way is to perform operations through sets in Java, such as HashSet. However, there is a limitation of such operations, that is, we generally have limited initial memory when the JVM is running. If all calculations are performed in the JVM memory, it is easy to cause OOM exceptions due to insufficient memory space, Today, let's introduce a more extensible way to perform some operations of intersection, union and complement: data intersection, union and complement are realized through Redis

Environmental description

Redis version: Redis 6.0.6Jedis version: 4.2.2 tool class hutool version: 5.8.0.M3

Pom file:

 <dependencies>         <dependency>             <groupId>redis.clients</groupId>             <artifactId>jedis</artifactId>             <version>4.2.2</version>         </dependency>         <dependency>             <groupId>cn.hutool</groupId>             <artifactId>hutool-all</artifactId>             <version>5.8.0.M3</version>         </dependency> </dependencies>

Hand over and supplement calculation

Initialization constant

 public class RedisCalculateUtils {     static String oneFileString = "/Users/tmp/test-1.txt";     static String twoFileString = "/Users/tmp/test-2.txt";     static String diffFileString = "/Users/tmp/diff-test.txt";     static String interFileString = "/Users/tmp/inter-test.txt";     static String unionFileString = "/Users/tmp/union-test.txt";     static String oneFileCacheKey = "oneFile";     static String twoFileCacheKey = "twoFile";     static String diffFileCacheKey = "diffFile";     static String interFileCacheKey = "interFile";     static String unionFileCacheKey = "unionFile"; }

Initialize data to the specified file

 /** *Initialize the data and write it to the file */ public static void writeFile() {         File oneFile = new File(oneFileString);         List<String> fs = new ArrayList<>(10000);         for (int i = 10000; i < 15000; i++) {             String s = SecureUtil.md5(String.valueOf(i));             fs.add(s);         }         FileUtil.writeUtf8Lines(fs, oneFile);         File twoFile = new File(twoFileString);         fs.clear();         for (int i = 12000; i < 20000; i++) {             String s = SecureUtil.md5(String.valueOf(i));             fs.add(s);         }         FileUtil.writeUtf8Lines(fs, twoFile);     }

Write the specified file to Redis

 /** *Read file data and write it to Redis */ public static void writeCache() {     try(Jedis jedis = new Jedis("127.0.0.1", 6379)) {         Pipeline p = jedis.pipelined();         List<String> oneFileStringList = FileUtil.readLines(oneFileString, "UTF-8");         for (String s : oneFileStringList) {             p.sadd(oneFileCacheKey, s);         }         p.sync();         List<String> twoFileStringList = FileUtil.readLines(twoFileString, "UTF-8");         for (String s : twoFileStringList) {             p.sadd(twoFileCacheKey, s);         }         p.sync();     } catch (Exception e) {         throw new RuntimeException(e);     } }

Calculation of difference set

     /** *The difference between the set corresponding to oneKey and the set corresponding to twoKey is written into threeKey *@ param oneKey Set Key before the difference set *@ param twoKey Set Key after the difference set *@ param threeKey Set Key of difference set result      */     public static void diff(String oneKey, String twoKey, String threeKey) {         try(Jedis jedis = new Jedis("127.0.0.1", 6379)) {             long result = jedis.sdiffstore(threeKey, oneKey, twoKey); System. out. println ("Number of difference sets between oneKey and twoKey:"+result);         } catch (Exception e) {             throw new RuntimeException(e);         }     }

Write the difference calculation result to the specified file

     /** *Write the calculated difference set data to the specified file      */     public static void writeDiffToFile() {         File diffFile = new File(diffFileString);         try(Jedis jedis = new Jedis("127.0.0.1", 6379)) {             Set<String> result = jedis.smembers(diffFileCacheKey);             FileUtil.writeUtf8Lines(result, diffFile);         } catch (Exception e) {             throw new RuntimeException(e);         }     }

Calculation of intersection

 /**      * *@ param cacheKeyArray intersection set Key *@ param destinationKey Intersection Collection Result Key      */     public static void inter(String[] cacheKeyArray, String destinationKey) {         try(Jedis jedis = new Jedis("127.0.0.1", 6379)) {             long result = jedis.sinterstore(destinationKey, cacheKeyArray); System. out. println ("Number of intersections of cacheKeyArray:"+result);         } catch (Exception e) {             throw new RuntimeException(e);         }     }

The intersection calculation result is written to the specified file

 /** *Writes the calculated intersection data to the specified file  */ public static void writeInterToFile() { File interFile = new File(interFileString); try(Jedis jedis = new Jedis("127.0.0.1", 6379)) { Set<String> result = jedis.smembers(interFileCacheKey); FileUtil.writeUtf8Lines(result, interFile); } catch (Exception e) { throw new RuntimeException(e); } }

Calculation of union

     /** *Calculate the union of multiple keys and write it to a new key *@ param cacheKeyArray Key of union set *@ param destinationKey Key written by union result      */      public static void union(String[] cacheKeyArray, String destinationKey) {          try(Jedis jedis = new Jedis("127.0.0.1", 6379)) {              long result = jedis.sunionstore(destinationKey, cacheKeyArray); System. out. println ("Number of union sets of cacheKeyArray:"+result);          } catch (Exception e) {              throw new RuntimeException(e);          }      }

Write union calculation results to the specified file

 /** *Writes the calculated union data to the specified file  */ public static void writeUnionToFile() {  File unionFile = new File(unionFileString);  try(Jedis jedis = new Jedis("127.0.0.1", 6379)) {  Set<String> result = jedis.smembers(unionFileCacheKey);  FileUtil.writeUtf8Lines(result, unionFile);  } catch (Exception e) {  throw new RuntimeException(e);  }  }

Redis Command Description

SDIFFSTORE destination key [key &hellip;]

For example:

 key1 = {a,b,c,d} key2 = {c} key3 = {a,c,e} SDIFF key1 key2 key3 = {b,d}

The SDIFFSTORE command is similar to SDIFF, except that it saves the results to the destination set and returns the result set to the client.

If the destination collection already exists, it will be overwritten.

Return value

Number of members in the result set

SINTERSTORE destination key [key &hellip;]

For example:

 key1 = {a,b,c,d} key2 = {c} key3 = {a,c,e} SINTER key1 key2 key3 = {c}

The SINTERSTORE command is similar to the SINTER command. The difference is that it does not directly return a result set, but saves the results in the destination set.

If the destination collection exists, it will be overwritten.

Return value

Number of members in the result set

SUNIONSTORE destination key [key &hellip;]

For example:

 key1 = {a,b,c,d} key2 = {c} key3 = {a,c,e} SUNION key1 key2 key3 = {a,b,c,d,e}

The function of the SUNIONSTORE command is similar to SUNION. The difference is that the result set is not returned, but stored in destination.

If the destination already exists, it will be overwritten.

Return value

Number of members in the result set

reference material: https://www.redis.com.cn/set.html